File 1138-AArch64-Optimize-move_call_last-move_ext_call_last.patch of Package erlang
From 439d30ad9480e02800ac8e91d904f8944e18fe0e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Mon, 18 Sep 2023 15:05:42 +0200
Subject: [PATCH 18/25] AArch64: Optimize move_call_last & move_ext_call_last
There are more than 4700 occurrences of the following instruction
sequence in the OTP code base:
ldr x25, [x20], 8
ldr x30, [x20], 8
It can be simplified like so:
ldp x25, x30, [x20], 16
---
erts/emulator/beam/jit/arm/instr_call.cpp | 24 +++++++++++++++++++----
1 file changed, 20 insertions(+), 4 deletions(-)
diff --git a/erts/emulator/beam/jit/arm/instr_call.cpp b/erts/emulator/beam/jit/arm/instr_call.cpp
index 2699c30962..8f6f587698 100644
--- a/erts/emulator/beam/jit/arm/instr_call.cpp
+++ b/erts/emulator/beam/jit/arm/instr_call.cpp
@@ -79,16 +79,23 @@ void BeamModuleAssembler::emit_move_call_last(const ArgYRegister &Src,
auto src_index = Src.get();
Sint deallocate = Deallocate.get() * sizeof(Eterm);
- if (src_index == 0 && Support::isInt9(deallocate)) {
+ if (src_index == 0 && deallocate == 8) {
+ auto dst = init_destination(Dst, TMP1);
+ const arm::Mem src_ref = arm::Mem(E).post(2 * deallocate);
+ a.ldp(dst.reg, a64::x30, src_ref);
+ flush_var(dst);
+ a.b(resolve_beam_label(CallTarget, disp128MB));
+ } else if (src_index == 0 && Support::isInt9(deallocate)) {
auto dst = init_destination(Dst, TMP1);
const arm::Mem src_ref = arm::Mem(E).post(deallocate);
a.ldr(dst.reg, src_ref);
flush_var(dst);
+ emit_i_call_only(CallTarget);
} else {
mov_arg(Dst, Src);
emit_deallocate(Deallocate);
+ emit_i_call_only(CallTarget);
}
- emit_i_call_only(CallTarget);
}
void BeamModuleAssembler::emit_i_call_only(const ArgLabel &CallTarget) {
@@ -151,16 +158,25 @@ void BeamModuleAssembler::emit_move_call_ext_last(const ArgYRegister &Src,
auto src_index = Src.get();
Sint deallocate = Deallocate.get() * sizeof(Eterm);
- if (src_index == 0 && Support::isInt9(deallocate)) {
+ if (src_index == 0 && deallocate == 8) {
+ auto dst = init_destination(Dst, TMP1);
+ const arm::Mem src_ref = arm::Mem(E).post(2 * deallocate);
+ mov_arg(ARG1, Exp);
+ arm::Mem target = emit_setup_dispatchable_call(ARG1);
+ a.ldp(dst.reg, a64::x30, src_ref);
+ flush_var(dst);
+ branch(target);
+ } else if (src_index == 0 && Support::isInt9(deallocate)) {
auto dst = init_destination(Dst, TMP1);
const arm::Mem src_ref = arm::Mem(E).post(deallocate);
a.ldr(dst.reg, src_ref);
flush_var(dst);
+ emit_i_call_ext_only(Exp);
} else {
mov_arg(Dst, Src);
emit_deallocate(Deallocate);
+ emit_i_call_ext_only(Exp);
}
- emit_i_call_ext_only(Exp);
}
static ErtsCodeMFA apply3_mfa = {am_erlang, am_apply, 3};
--
2.35.3