File 1143-AArch64-Eliminate-branch-before-a-stub-section.patch of Package erlang
From 119b160401f8759eddad5382cc1b64a4c72ff6c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Tue, 19 Sep 2023 08:46:40 +0200
Subject: [PATCH 23/25] AArch64: Eliminate branch before a stub section
Because of the limited reach of branch instructions, stubs that
forward branches need to be emitted at regular intervals. Each stubs
section is always preceded by a `b` instruction skipping the section.
Frquently, this `b` instruction is unreachable (for example, if the
preceding instruction is a `ret` instruction).
By keeping track of whether the current position is unreachable,
unnecessary `b` instructions can be omitted. We can also emit stubs
sections somewhat earlier than strictly necessary if the current
position is unreachable. That will reduce the number of `b`
instructions that will need to be executed.
---
erts/emulator/beam/jit/arm/beam_asm.hpp | 21 +++++++++++++++++++
.../emulator/beam/jit/arm/beam_asm_global.cpp | 2 ++
.../emulator/beam/jit/arm/beam_asm_module.cpp | 16 +++++++++++---
erts/emulator/beam/jit/arm/instr_call.cpp | 8 +++++++
erts/emulator/beam/jit/arm/instr_common.cpp | 8 +++++++
erts/emulator/beam/jit/arm/instr_fun.cpp | 3 +++
erts/emulator/beam/jit/arm/instr_msg.cpp | 6 ++++++
erts/emulator/beam/jit/arm/instr_select.cpp | 3 +++
erts/emulator/beam/jit/arm/ops.tab | 6 ++++--
9 files changed, 68 insertions(+), 5 deletions(-)
diff --git a/erts/emulator/beam/jit/arm/beam_asm.hpp b/erts/emulator/beam/jit/arm/beam_asm.hpp
index 9503f0bb4b..9f7f5361ae 100644
--- a/erts/emulator/beam/jit/arm/beam_asm.hpp
+++ b/erts/emulator/beam/jit/arm/beam_asm.hpp
@@ -835,8 +835,29 @@ class BeamModuleAssembler : public BeamAssembler,
size_t last_error_offset = 0;
static constexpr ptrdiff_t STUB_CHECK_INTERVAL = 4 << 10;
+ static constexpr ptrdiff_t STUB_CHECK_INTERVAL_UNREACHABLE =
+ (4 << 10) - 128;
size_t last_stub_check_offset = 0;
+ /* Save the last known unreachable position. */
+ size_t last_unreachable_offset = 0;
+
+ /* Mark this point unreachable. Use at the end of a BEAM
+ * instruction. */
+ void mark_unreachable() {
+ last_unreachable_offset = a.offset();
+ }
+
+ /* Use within BEAM instructions. */
+ void mark_unreachable_check_pending_stubs() {
+ mark_unreachable();
+ check_pending_stubs();
+ }
+
+ bool is_unreachable() {
+ return a.offset() == last_unreachable_offset;
+ }
+
enum Displacement : size_t {
/* Pessimistic estimate for helper functions, where we don't know the
* branch displacement or whether it will be used near label
diff --git a/erts/emulator/beam/jit/arm/beam_asm_global.cpp b/erts/emulator/beam/jit/arm/beam_asm_global.cpp
index 84cbf0a515..2e93cf9ee6 100644
--- a/erts/emulator/beam/jit/arm/beam_asm_global.cpp
+++ b/erts/emulator/beam/jit/arm/beam_asm_global.cpp
@@ -247,6 +247,8 @@ void BeamModuleAssembler::emit_raise_exception(const ErtsCodeMFA *exp) {
fragment_call(ga->get_raise_exception_null_exp());
}
+ mark_unreachable();
+
/* `line` instructions need to know the latest offset that may throw an
* exception. See the `line` instruction for details. */
last_error_offset = a.offset();
diff --git a/erts/emulator/beam/jit/arm/beam_asm_module.cpp b/erts/emulator/beam/jit/arm/beam_asm_module.cpp
index 276a560ca9..211f60dd77 100644
--- a/erts/emulator/beam/jit/arm/beam_asm_module.cpp
+++ b/erts/emulator/beam/jit/arm/beam_asm_module.cpp
@@ -417,6 +417,7 @@ void BeamModuleAssembler::emit_int_code_end() {
*
* Since the table is potentially very large, we'll emit all stubs that are
* due within it so we won't have to check on every iteration. */
+ mark_unreachable();
flush_pending_stubs(_dispatchTable.size() * sizeof(Uint32[8]) +
dispUnknown);
@@ -427,6 +428,8 @@ void BeamModuleAssembler::emit_int_code_end() {
a.br(SUPER_TMP);
}
+ mark_unreachable();
+
/* Emit all remaining stubs. */
flush_pending_stubs(dispMax);
}
@@ -615,7 +618,10 @@ void BeamModuleAssembler::check_pending_stubs() {
/* We shouldn't let too much space pass between checks. */
ASSERT((last_stub_check_offset + dispMin) >= currOffset);
- if ((last_stub_check_offset + STUB_CHECK_INTERVAL) < currOffset) {
+ if (last_stub_check_offset + STUB_CHECK_INTERVAL < currOffset ||
+ (is_unreachable() &&
+ last_stub_check_offset + STUB_CHECK_INTERVAL_UNREACHABLE <
+ currOffset)) {
last_stub_check_offset = currOffset;
flush_pending_stubs(STUB_CHECK_INTERVAL * 2);
@@ -638,7 +644,9 @@ void BeamModuleAssembler::flush_pending_stubs(size_t range) {
next = a.newLabel();
comment("Begin stub section");
- a.b(next);
+ if (!is_unreachable()) {
+ a.b(next);
+ }
}
emit_veneer(veneer);
@@ -663,7 +671,9 @@ void BeamModuleAssembler::flush_pending_stubs(size_t range) {
next = a.newLabel();
comment("Begin stub section");
- a.b(next);
+ if (!is_unreachable()) {
+ a.b(next);
+ }
}
emit_constant(constant);
diff --git a/erts/emulator/beam/jit/arm/instr_call.cpp b/erts/emulator/beam/jit/arm/instr_call.cpp
index 8f6f587698..c7a150b6bf 100644
--- a/erts/emulator/beam/jit/arm/instr_call.cpp
+++ b/erts/emulator/beam/jit/arm/instr_call.cpp
@@ -50,6 +50,8 @@ void BeamModuleAssembler::emit_dispatch_return() {
a.b_mi(resolve_fragment(ga->get_dispatch_return(), disp1MB));
a.ret(a64::x30);
+
+ mark_unreachable();
}
void BeamModuleAssembler::emit_return() {
@@ -85,6 +87,7 @@ void BeamModuleAssembler::emit_move_call_last(const ArgYRegister &Src,
a.ldp(dst.reg, a64::x30, src_ref);
flush_var(dst);
a.b(resolve_beam_label(CallTarget, disp128MB));
+ mark_unreachable();
} else if (src_index == 0 && Support::isInt9(deallocate)) {
auto dst = init_destination(Dst, TMP1);
const arm::Mem src_ref = arm::Mem(E).post(deallocate);
@@ -101,6 +104,7 @@ void BeamModuleAssembler::emit_move_call_last(const ArgYRegister &Src,
void BeamModuleAssembler::emit_i_call_only(const ArgLabel &CallTarget) {
emit_leave_erlang_frame();
a.b(resolve_beam_label(CallTarget, disp128MB));
+ mark_unreachable();
}
/* Handles save_calls for remote calls. When the active code index is
@@ -143,6 +147,7 @@ void BeamModuleAssembler::emit_i_call_ext_only(const ArgExport &Exp) {
arm::Mem target = emit_setup_dispatchable_call(ARG1);
emit_leave_erlang_frame();
branch(target);
+ mark_unreachable();
}
void BeamModuleAssembler::emit_i_call_ext_last(const ArgExport &Exp,
@@ -166,6 +171,7 @@ void BeamModuleAssembler::emit_move_call_ext_last(const ArgYRegister &Src,
a.ldp(dst.reg, a64::x30, src_ref);
flush_var(dst);
branch(target);
+ mark_unreachable();
} else if (src_index == 0 && Support::isInt9(deallocate)) {
auto dst = init_destination(Dst, TMP1);
const arm::Mem src_ref = arm::Mem(E).post(deallocate);
@@ -229,6 +235,7 @@ void BeamModuleAssembler::emit_i_apply_only() {
emit_leave_erlang_frame();
branch(target);
+ mark_unreachable();
}
arm::Mem BeamModuleAssembler::emit_fixed_apply(const ArgWord &Arity,
@@ -281,4 +288,5 @@ void BeamModuleAssembler::emit_apply_last(const ArgWord &Arity,
emit_leave_erlang_frame();
branch(target);
+ mark_unreachable();
}
diff --git a/erts/emulator/beam/jit/arm/instr_common.cpp b/erts/emulator/beam/jit/arm/instr_common.cpp
index 808f098dac..050d7e5361 100644
--- a/erts/emulator/beam/jit/arm/instr_common.cpp
+++ b/erts/emulator/beam/jit/arm/instr_common.cpp
@@ -915,6 +915,7 @@ void BeamModuleAssembler::emit_is_nonempty_list(const ArgLabel &Fail,
void BeamModuleAssembler::emit_jump(const ArgLabel &Fail) {
a.b(resolve_beam_label(Fail, disp128MB));
+ mark_unreachable();
}
void BeamModuleAssembler::emit_is_atom(const ArgLabel &Fail,
@@ -1066,6 +1067,7 @@ void BeamModuleAssembler::emit_is_function2(const ArgLabel &Fail,
if (arity > MAX_ARG) {
/* Arity is negative or too large. */
a.b(resolve_beam_label(Fail, disp128MB));
+ mark_unreachable();
return;
}
@@ -2554,6 +2556,8 @@ void BeamModuleAssembler::emit_raise(const ArgSource &Trace,
mov_var(ARG2, trace);
fragment_call(ga->get_raise_shared());
+ mark_unreachable();
+
/* `line` instructions need to know the latest offset that may throw an
* exception. See the `line` instruction for details. */
last_error_offset = a.offset();
@@ -2672,3 +2676,7 @@ void BeamModuleAssembler::emit_i_perf_counter() {
a.bind(next);
}
+
+void BeamModuleAssembler::emit_mark_unreachable() {
+ mark_unreachable();
+}
diff --git a/erts/emulator/beam/jit/arm/instr_fun.cpp b/erts/emulator/beam/jit/arm/instr_fun.cpp
index 72b00e0392..61cded38e7 100644
--- a/erts/emulator/beam/jit/arm/instr_fun.cpp
+++ b/erts/emulator/beam/jit/arm/instr_fun.cpp
@@ -197,6 +197,7 @@ void BeamModuleAssembler::emit_i_lambda_trampoline(const ArgLambda &Lambda,
}
a.b(resolve_beam_label(Lbl, disp128MB));
+ mark_unreachable();
}
void BeamModuleAssembler::emit_i_make_fun3(const ArgLambda &Lambda,
@@ -437,12 +438,14 @@ void BeamModuleAssembler::emit_i_call_fun2_last(const ArgVal &Tag,
emit_leave_erlang_frame();
a.br(target);
+ mark_unreachable();
} else {
emit_deallocate(Deallocate);
emit_leave_erlang_frame();
const auto &trampoline = lambdas[Tag.as<ArgLambda>().get()].trampoline;
a.b(resolve_label(trampoline, disp128MB));
+ mark_unreachable();
}
}
diff --git a/erts/emulator/beam/jit/arm/instr_msg.cpp b/erts/emulator/beam/jit/arm/instr_msg.cpp
index b927aca686..a853d91d52 100644
--- a/erts/emulator/beam/jit/arm/instr_msg.cpp
+++ b/erts/emulator/beam/jit/arm/instr_msg.cpp
@@ -256,6 +256,7 @@ void BeamModuleAssembler::emit_loop_rec_end(const ArgLabel &Dest) {
a.sub(FCALLS, FCALLS, imm(1));
a.b(resolve_beam_label(Dest, disp128MB));
+ mark_unreachable();
}
void BeamModuleAssembler::emit_wait_unlocked(const ArgLabel &Dest) {
@@ -268,6 +269,7 @@ void BeamModuleAssembler::emit_wait_unlocked(const ArgLabel &Dest) {
emit_leave_runtime(0);
a.b(resolve_fragment(ga->get_do_schedule(), disp128MB));
+ mark_unreachable();
}
void BeamModuleAssembler::emit_wait_locked(const ArgLabel &Dest) {
@@ -280,6 +282,10 @@ void BeamModuleAssembler::emit_wait_locked(const ArgLabel &Dest) {
emit_leave_runtime(0);
a.b(resolve_fragment(ga->get_do_schedule(), disp128MB));
+
+ /* Must check stubs here because this branch is followed by
+ * a label when part of `wait_timeout_locked`. */
+ mark_unreachable_check_pending_stubs();
}
void BeamModuleAssembler::emit_wait_timeout_unlocked(const ArgSource &Src,
diff --git a/erts/emulator/beam/jit/arm/instr_select.cpp b/erts/emulator/beam/jit/arm/instr_select.cpp
index b5a4bb290c..3c0c2c966c 100644
--- a/erts/emulator/beam/jit/arm/instr_select.cpp
+++ b/erts/emulator/beam/jit/arm/instr_select.cpp
@@ -214,6 +214,7 @@ void BeamModuleAssembler::emit_linear_search(arm::Gp comparand,
/* An invalid label means fallthrough to the next instruction. */
if (fail.isValid()) {
a.b(resolve_label(fail, disp128MB));
+ mark_unreachable_check_pending_stubs();
}
}
@@ -453,6 +454,7 @@ void BeamModuleAssembler::emit_i_jump_on_val(const ArgSource &Src,
embed_vararg_rodata(args, TMP2);
a.ldr(TMP3, arm::Mem(TMP2, TMP1, arm::lsl(3)));
a.br(TMP3);
+ mark_unreachable_check_pending_stubs();
if (Fail.getType() == ArgVal::Immediate) {
a.bind(fail);
@@ -504,6 +506,7 @@ bool BeamModuleAssembler::emit_optimized_three_way_select(
/* An invalid label means fallthrough to the next instruction. */
if (fail.isValid()) {
a.b(resolve_label(fail, disp128MB));
+ mark_unreachable_check_pending_stubs();
}
return true;
diff --git a/erts/emulator/beam/jit/arm/ops.tab b/erts/emulator/beam/jit/arm/ops.tab
index cd22696347..c5a7da3142 100644
--- a/erts/emulator/beam/jit/arm/ops.tab
+++ b/erts/emulator/beam/jit/arm/ops.tab
@@ -652,9 +652,11 @@ call_ext_only Ar=u Bif=u$is_bif | is_heavy_bif(Bif) =>
# return instructions.
#
call_ext_last u Bif u | is_exit_bif(Bif) =>
- call_light_bif Bif
+ call_light_bif Bif | mark_unreachable
call_ext_only Ar=u Bif | is_exit_bif(Bif) =>
- allocate u Ar | call_light_bif Bif
+ allocate u Ar | call_light_bif Bif | mark_unreachable
+
+mark_unreachable
#
# The general case for BIFs that have no special requirements.
--
2.35.3