File 1143-AArch64-Eliminate-branch-before-a-stub-section.patch of Package erlang

From 119b160401f8759eddad5382cc1b64a4c72ff6c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Tue, 19 Sep 2023 08:46:40 +0200
Subject: [PATCH 23/25] AArch64: Eliminate branch before a stub section

Because of the limited reach of branch instructions, stubs that
forward branches need to be emitted at regular intervals. Each stubs
section is always preceded by a `b` instruction skipping the section.

Frquently, this `b` instruction is unreachable (for example, if the
preceding instruction is a `ret` instruction).

By keeping track of whether the current position is unreachable,
unnecessary `b` instructions can be omitted. We can also emit stubs
sections somewhat earlier than strictly necessary if the current
position is unreachable. That will reduce the number of `b`
instructions that will need to be executed.
---
 erts/emulator/beam/jit/arm/beam_asm.hpp       | 21 +++++++++++++++++++
 .../emulator/beam/jit/arm/beam_asm_global.cpp |  2 ++
 .../emulator/beam/jit/arm/beam_asm_module.cpp | 16 +++++++++++---
 erts/emulator/beam/jit/arm/instr_call.cpp     |  8 +++++++
 erts/emulator/beam/jit/arm/instr_common.cpp   |  8 +++++++
 erts/emulator/beam/jit/arm/instr_fun.cpp      |  3 +++
 erts/emulator/beam/jit/arm/instr_msg.cpp      |  6 ++++++
 erts/emulator/beam/jit/arm/instr_select.cpp   |  3 +++
 erts/emulator/beam/jit/arm/ops.tab            |  6 ++++--
 9 files changed, 68 insertions(+), 5 deletions(-)

diff --git a/erts/emulator/beam/jit/arm/beam_asm.hpp b/erts/emulator/beam/jit/arm/beam_asm.hpp
index 9503f0bb4b..9f7f5361ae 100644
--- a/erts/emulator/beam/jit/arm/beam_asm.hpp
+++ b/erts/emulator/beam/jit/arm/beam_asm.hpp
@@ -835,8 +835,29 @@ class BeamModuleAssembler : public BeamAssembler,
     size_t last_error_offset = 0;
 
     static constexpr ptrdiff_t STUB_CHECK_INTERVAL = 4 << 10;
+    static constexpr ptrdiff_t STUB_CHECK_INTERVAL_UNREACHABLE =
+            (4 << 10) - 128;
     size_t last_stub_check_offset = 0;
 
+    /* Save the last known unreachable position. */
+    size_t last_unreachable_offset = 0;
+
+    /* Mark this point unreachable. Use at the end of a BEAM
+     * instruction. */
+    void mark_unreachable() {
+        last_unreachable_offset = a.offset();
+    }
+
+    /* Use within BEAM instructions. */
+    void mark_unreachable_check_pending_stubs() {
+        mark_unreachable();
+        check_pending_stubs();
+    }
+
+    bool is_unreachable() {
+        return a.offset() == last_unreachable_offset;
+    }
+
     enum Displacement : size_t {
         /* Pessimistic estimate for helper functions, where we don't know the
          * branch displacement or whether it will be used near label
diff --git a/erts/emulator/beam/jit/arm/beam_asm_global.cpp b/erts/emulator/beam/jit/arm/beam_asm_global.cpp
index 84cbf0a515..2e93cf9ee6 100644
--- a/erts/emulator/beam/jit/arm/beam_asm_global.cpp
+++ b/erts/emulator/beam/jit/arm/beam_asm_global.cpp
@@ -247,6 +247,8 @@ void BeamModuleAssembler::emit_raise_exception(const ErtsCodeMFA *exp) {
         fragment_call(ga->get_raise_exception_null_exp());
     }
 
+    mark_unreachable();
+
     /* `line` instructions need to know the latest offset that may throw an
      * exception. See the `line` instruction for details. */
     last_error_offset = a.offset();
diff --git a/erts/emulator/beam/jit/arm/beam_asm_module.cpp b/erts/emulator/beam/jit/arm/beam_asm_module.cpp
index 276a560ca9..211f60dd77 100644
--- a/erts/emulator/beam/jit/arm/beam_asm_module.cpp
+++ b/erts/emulator/beam/jit/arm/beam_asm_module.cpp
@@ -417,6 +417,7 @@ void BeamModuleAssembler::emit_int_code_end() {
      *
      * Since the table is potentially very large, we'll emit all stubs that are
      * due within it so we won't have to check on every iteration. */
+    mark_unreachable();
     flush_pending_stubs(_dispatchTable.size() * sizeof(Uint32[8]) +
                         dispUnknown);
 
@@ -427,6 +428,8 @@ void BeamModuleAssembler::emit_int_code_end() {
         a.br(SUPER_TMP);
     }
 
+    mark_unreachable();
+
     /* Emit all remaining stubs. */
     flush_pending_stubs(dispMax);
 }
@@ -615,7 +618,10 @@ void BeamModuleAssembler::check_pending_stubs() {
     /* We shouldn't let too much space pass between checks. */
     ASSERT((last_stub_check_offset + dispMin) >= currOffset);
 
-    if ((last_stub_check_offset + STUB_CHECK_INTERVAL) < currOffset) {
+    if (last_stub_check_offset + STUB_CHECK_INTERVAL < currOffset ||
+        (is_unreachable() &&
+         last_stub_check_offset + STUB_CHECK_INTERVAL_UNREACHABLE <
+                 currOffset)) {
         last_stub_check_offset = currOffset;
 
         flush_pending_stubs(STUB_CHECK_INTERVAL * 2);
@@ -638,7 +644,9 @@ void BeamModuleAssembler::flush_pending_stubs(size_t range) {
                 next = a.newLabel();
 
                 comment("Begin stub section");
-                a.b(next);
+                if (!is_unreachable()) {
+                    a.b(next);
+                }
             }
 
             emit_veneer(veneer);
@@ -663,7 +671,9 @@ void BeamModuleAssembler::flush_pending_stubs(size_t range) {
             next = a.newLabel();
 
             comment("Begin stub section");
-            a.b(next);
+            if (!is_unreachable()) {
+                a.b(next);
+            }
         }
 
         emit_constant(constant);
diff --git a/erts/emulator/beam/jit/arm/instr_call.cpp b/erts/emulator/beam/jit/arm/instr_call.cpp
index 8f6f587698..c7a150b6bf 100644
--- a/erts/emulator/beam/jit/arm/instr_call.cpp
+++ b/erts/emulator/beam/jit/arm/instr_call.cpp
@@ -50,6 +50,8 @@ void BeamModuleAssembler::emit_dispatch_return() {
     a.b_mi(resolve_fragment(ga->get_dispatch_return(), disp1MB));
 
     a.ret(a64::x30);
+
+    mark_unreachable();
 }
 
 void BeamModuleAssembler::emit_return() {
@@ -85,6 +87,7 @@ void BeamModuleAssembler::emit_move_call_last(const ArgYRegister &Src,
         a.ldp(dst.reg, a64::x30, src_ref);
         flush_var(dst);
         a.b(resolve_beam_label(CallTarget, disp128MB));
+        mark_unreachable();
     } else if (src_index == 0 && Support::isInt9(deallocate)) {
         auto dst = init_destination(Dst, TMP1);
         const arm::Mem src_ref = arm::Mem(E).post(deallocate);
@@ -101,6 +104,7 @@ void BeamModuleAssembler::emit_move_call_last(const ArgYRegister &Src,
 void BeamModuleAssembler::emit_i_call_only(const ArgLabel &CallTarget) {
     emit_leave_erlang_frame();
     a.b(resolve_beam_label(CallTarget, disp128MB));
+    mark_unreachable();
 }
 
 /* Handles save_calls for remote calls. When the active code index is
@@ -143,6 +147,7 @@ void BeamModuleAssembler::emit_i_call_ext_only(const ArgExport &Exp) {
     arm::Mem target = emit_setup_dispatchable_call(ARG1);
     emit_leave_erlang_frame();
     branch(target);
+    mark_unreachable();
 }
 
 void BeamModuleAssembler::emit_i_call_ext_last(const ArgExport &Exp,
@@ -166,6 +171,7 @@ void BeamModuleAssembler::emit_move_call_ext_last(const ArgYRegister &Src,
         a.ldp(dst.reg, a64::x30, src_ref);
         flush_var(dst);
         branch(target);
+        mark_unreachable();
     } else if (src_index == 0 && Support::isInt9(deallocate)) {
         auto dst = init_destination(Dst, TMP1);
         const arm::Mem src_ref = arm::Mem(E).post(deallocate);
@@ -229,6 +235,7 @@ void BeamModuleAssembler::emit_i_apply_only() {
 
     emit_leave_erlang_frame();
     branch(target);
+    mark_unreachable();
 }
 
 arm::Mem BeamModuleAssembler::emit_fixed_apply(const ArgWord &Arity,
@@ -281,4 +288,5 @@ void BeamModuleAssembler::emit_apply_last(const ArgWord &Arity,
 
     emit_leave_erlang_frame();
     branch(target);
+    mark_unreachable();
 }
diff --git a/erts/emulator/beam/jit/arm/instr_common.cpp b/erts/emulator/beam/jit/arm/instr_common.cpp
index 808f098dac..050d7e5361 100644
--- a/erts/emulator/beam/jit/arm/instr_common.cpp
+++ b/erts/emulator/beam/jit/arm/instr_common.cpp
@@ -915,6 +915,7 @@ void BeamModuleAssembler::emit_is_nonempty_list(const ArgLabel &Fail,
 
 void BeamModuleAssembler::emit_jump(const ArgLabel &Fail) {
     a.b(resolve_beam_label(Fail, disp128MB));
+    mark_unreachable();
 }
 
 void BeamModuleAssembler::emit_is_atom(const ArgLabel &Fail,
@@ -1066,6 +1067,7 @@ void BeamModuleAssembler::emit_is_function2(const ArgLabel &Fail,
     if (arity > MAX_ARG) {
         /* Arity is negative or too large. */
         a.b(resolve_beam_label(Fail, disp128MB));
+        mark_unreachable();
 
         return;
     }
@@ -2554,6 +2556,8 @@ void BeamModuleAssembler::emit_raise(const ArgSource &Trace,
     mov_var(ARG2, trace);
     fragment_call(ga->get_raise_shared());
 
+    mark_unreachable();
+
     /* `line` instructions need to know the latest offset that may throw an
      * exception. See the `line` instruction for details. */
     last_error_offset = a.offset();
@@ -2672,3 +2676,7 @@ void BeamModuleAssembler::emit_i_perf_counter() {
 
     a.bind(next);
 }
+
+void BeamModuleAssembler::emit_mark_unreachable() {
+    mark_unreachable();
+}
diff --git a/erts/emulator/beam/jit/arm/instr_fun.cpp b/erts/emulator/beam/jit/arm/instr_fun.cpp
index 72b00e0392..61cded38e7 100644
--- a/erts/emulator/beam/jit/arm/instr_fun.cpp
+++ b/erts/emulator/beam/jit/arm/instr_fun.cpp
@@ -197,6 +197,7 @@ void BeamModuleAssembler::emit_i_lambda_trampoline(const ArgLambda &Lambda,
     }
 
     a.b(resolve_beam_label(Lbl, disp128MB));
+    mark_unreachable();
 }
 
 void BeamModuleAssembler::emit_i_make_fun3(const ArgLambda &Lambda,
@@ -437,12 +438,14 @@ void BeamModuleAssembler::emit_i_call_fun2_last(const ArgVal &Tag,
         emit_leave_erlang_frame();
 
         a.br(target);
+        mark_unreachable();
     } else {
         emit_deallocate(Deallocate);
         emit_leave_erlang_frame();
 
         const auto &trampoline = lambdas[Tag.as<ArgLambda>().get()].trampoline;
         a.b(resolve_label(trampoline, disp128MB));
+        mark_unreachable();
     }
 }
 
diff --git a/erts/emulator/beam/jit/arm/instr_msg.cpp b/erts/emulator/beam/jit/arm/instr_msg.cpp
index b927aca686..a853d91d52 100644
--- a/erts/emulator/beam/jit/arm/instr_msg.cpp
+++ b/erts/emulator/beam/jit/arm/instr_msg.cpp
@@ -256,6 +256,7 @@ void BeamModuleAssembler::emit_loop_rec_end(const ArgLabel &Dest) {
 
     a.sub(FCALLS, FCALLS, imm(1));
     a.b(resolve_beam_label(Dest, disp128MB));
+    mark_unreachable();
 }
 
 void BeamModuleAssembler::emit_wait_unlocked(const ArgLabel &Dest) {
@@ -268,6 +269,7 @@ void BeamModuleAssembler::emit_wait_unlocked(const ArgLabel &Dest) {
     emit_leave_runtime(0);
 
     a.b(resolve_fragment(ga->get_do_schedule(), disp128MB));
+    mark_unreachable();
 }
 
 void BeamModuleAssembler::emit_wait_locked(const ArgLabel &Dest) {
@@ -280,6 +282,10 @@ void BeamModuleAssembler::emit_wait_locked(const ArgLabel &Dest) {
     emit_leave_runtime(0);
 
     a.b(resolve_fragment(ga->get_do_schedule(), disp128MB));
+
+    /* Must check stubs here because this branch is followed by
+     * a label when part of `wait_timeout_locked`. */
+    mark_unreachable_check_pending_stubs();
 }
 
 void BeamModuleAssembler::emit_wait_timeout_unlocked(const ArgSource &Src,
diff --git a/erts/emulator/beam/jit/arm/instr_select.cpp b/erts/emulator/beam/jit/arm/instr_select.cpp
index b5a4bb290c..3c0c2c966c 100644
--- a/erts/emulator/beam/jit/arm/instr_select.cpp
+++ b/erts/emulator/beam/jit/arm/instr_select.cpp
@@ -214,6 +214,7 @@ void BeamModuleAssembler::emit_linear_search(arm::Gp comparand,
     /* An invalid label means fallthrough to the next instruction. */
     if (fail.isValid()) {
         a.b(resolve_label(fail, disp128MB));
+        mark_unreachable_check_pending_stubs();
     }
 }
 
@@ -453,6 +454,7 @@ void BeamModuleAssembler::emit_i_jump_on_val(const ArgSource &Src,
     embed_vararg_rodata(args, TMP2);
     a.ldr(TMP3, arm::Mem(TMP2, TMP1, arm::lsl(3)));
     a.br(TMP3);
+    mark_unreachable_check_pending_stubs();
 
     if (Fail.getType() == ArgVal::Immediate) {
         a.bind(fail);
@@ -504,6 +506,7 @@ bool BeamModuleAssembler::emit_optimized_three_way_select(
     /* An invalid label means fallthrough to the next instruction. */
     if (fail.isValid()) {
         a.b(resolve_label(fail, disp128MB));
+        mark_unreachable_check_pending_stubs();
     }
 
     return true;
diff --git a/erts/emulator/beam/jit/arm/ops.tab b/erts/emulator/beam/jit/arm/ops.tab
index cd22696347..c5a7da3142 100644
--- a/erts/emulator/beam/jit/arm/ops.tab
+++ b/erts/emulator/beam/jit/arm/ops.tab
@@ -652,9 +652,11 @@ call_ext_only Ar=u Bif=u$is_bif | is_heavy_bif(Bif) =>
 # return instructions.
 #
 call_ext_last u Bif u | is_exit_bif(Bif) =>
-    call_light_bif Bif
+    call_light_bif Bif | mark_unreachable
 call_ext_only Ar=u Bif | is_exit_bif(Bif) =>
-    allocate u Ar | call_light_bif Bif
+    allocate u Ar | call_light_bif Bif | mark_unreachable
+
+mark_unreachable
 
 #
 # The general case for BIFs that have no special requirements.
-- 
2.35.3

openSUSE Build Service is sponsored by