File 1127-Combine-try_end-with-deallocate.patch of Package erlang
From 568ebc021df947511dcbcbd29a91c5fd60018c94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Wed, 6 Sep 2023 12:22:46 +0200
Subject: [PATCH 07/25] Combine `try_end` with `deallocate`
When `try_end` with `deallocate` are combined the clearing of the
catch on the stack can be omitted. That eliminates one instruction
for x86_64 (9 bytes) and two instructions for AArch64 (8 bytes).
Also combine `try_end` followed by `move` and `deallocate` for AArch64.
These instruction combinations are used more than 800 times in the OTP
code base.
---
erts/emulator/beam/jit/arm/instr_common.cpp | 22 +++++++++++++++++++++
erts/emulator/beam/jit/arm/ops.tab | 6 ++++++
erts/emulator/beam/jit/x86/instr_common.cpp | 5 +++++
erts/emulator/beam/jit/x86/ops.tab | 4 ++++
4 files changed, 37 insertions(+)
diff --git a/erts/emulator/beam/jit/arm/instr_common.cpp b/erts/emulator/beam/jit/arm/instr_common.cpp
index 10bd66eeee..7abaff09ee 100644
--- a/erts/emulator/beam/jit/arm/instr_common.cpp
+++ b/erts/emulator/beam/jit/arm/instr_common.cpp
@@ -2423,6 +2423,28 @@ void BeamModuleAssembler::emit_try_end(const ArgYRegister &CatchTag) {
emit_init(CatchTag);
}
+void BeamModuleAssembler::emit_try_end_deallocate(const ArgWord &Deallocate) {
+ a.ldr(TMP1, arm::Mem(c_p, offsetof(Process, catches)));
+ a.sub(TMP1, TMP1, imm(1));
+ a.str(TMP1, arm::Mem(c_p, offsetof(Process, catches)));
+ if (Deallocate.get() > 0) {
+ add(E, E, Deallocate.get() * sizeof(Eterm));
+ }
+}
+
+void BeamModuleAssembler::emit_try_end_move_deallocate(
+ const ArgSource &Src,
+ const ArgRegister &Dst,
+ const ArgWord &Deallocate) {
+ a.ldr(TMP1, arm::Mem(c_p, offsetof(Process, catches)));
+ a.sub(TMP1, TMP1, imm(1));
+ a.str(TMP1, arm::Mem(c_p, offsetof(Process, catches)));
+ mov_arg(Dst, Src);
+ if (Deallocate.get() > 0) {
+ add(E, E, Deallocate.get() * sizeof(Eterm));
+ }
+}
+
void BeamModuleAssembler::emit_try_case(const ArgYRegister &CatchTag) {
/* XREG0 = THE_NON_VALUE
* XREG1 = error reason/thrown value
diff --git a/erts/emulator/beam/jit/arm/ops.tab b/erts/emulator/beam/jit/arm/ops.tab
index b0c79d3e2c..2c3cd64757 100644
--- a/erts/emulator/beam/jit/arm/ops.tab
+++ b/erts/emulator/beam/jit/arm/ops.tab
@@ -179,6 +179,12 @@ try_end y
try_case_end s
+try_end Y | deallocate N => try_end_deallocate N
+try_end Y | move Src Dst | deallocate N => try_end_move_deallocate Src Dst N
+
+try_end_deallocate t
+try_end_move_deallocate s d t
+
# Destructive set tuple element
set_tuple_element s S P
diff --git a/erts/emulator/beam/jit/x86/instr_common.cpp b/erts/emulator/beam/jit/x86/instr_common.cpp
index 7574df29e0..fa45b9e0cd 100644
--- a/erts/emulator/beam/jit/x86/instr_common.cpp
+++ b/erts/emulator/beam/jit/x86/instr_common.cpp
@@ -2440,6 +2440,11 @@ void BeamModuleAssembler::emit_try_end(const ArgYRegister &CatchTag) {
emit_init(CatchTag);
}
+void BeamModuleAssembler::emit_try_end_deallocate(const ArgWord &Deallocate) {
+ a.dec(x86::qword_ptr(c_p, offsetof(Process, catches)));
+ emit_deallocate(Deallocate);
+}
+
void BeamModuleAssembler::emit_try_case(const ArgYRegister &CatchTag) {
/* The try_tag in the Y slot in the stack frame has already been
* cleared. */
diff --git a/erts/emulator/beam/jit/x86/ops.tab b/erts/emulator/beam/jit/x86/ops.tab
index bbc2313118..6879225dcd 100644
--- a/erts/emulator/beam/jit/x86/ops.tab
+++ b/erts/emulator/beam/jit/x86/ops.tab
@@ -194,6 +194,10 @@ try_end y
try_case_end s
+try_end Y | deallocate N => try_end_deallocate N
+
+try_end_deallocate t
+
# Destructive set tuple element
set_tuple_element s S P
--
2.35.3