File 1127-Combine-try_end-with-deallocate.patch of Package erlang

From 568ebc021df947511dcbcbd29a91c5fd60018c94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Wed, 6 Sep 2023 12:22:46 +0200
Subject: [PATCH 07/25] Combine `try_end` with `deallocate`

When `try_end` with `deallocate` are combined the clearing of the
catch on the stack can be omitted. That eliminates one instruction
for x86_64 (9 bytes) and two instructions for AArch64 (8 bytes).

Also combine `try_end` followed by `move` and `deallocate` for AArch64.

These instruction combinations are used more than 800 times in the OTP
code base.
---
 erts/emulator/beam/jit/arm/instr_common.cpp | 22 +++++++++++++++++++++
 erts/emulator/beam/jit/arm/ops.tab          |  6 ++++++
 erts/emulator/beam/jit/x86/instr_common.cpp |  5 +++++
 erts/emulator/beam/jit/x86/ops.tab          |  4 ++++
 4 files changed, 37 insertions(+)

diff --git a/erts/emulator/beam/jit/arm/instr_common.cpp b/erts/emulator/beam/jit/arm/instr_common.cpp
index 10bd66eeee..7abaff09ee 100644
--- a/erts/emulator/beam/jit/arm/instr_common.cpp
+++ b/erts/emulator/beam/jit/arm/instr_common.cpp
@@ -2423,6 +2423,28 @@ void BeamModuleAssembler::emit_try_end(const ArgYRegister &CatchTag) {
     emit_init(CatchTag);
 }
 
+void BeamModuleAssembler::emit_try_end_deallocate(const ArgWord &Deallocate) {
+    a.ldr(TMP1, arm::Mem(c_p, offsetof(Process, catches)));
+    a.sub(TMP1, TMP1, imm(1));
+    a.str(TMP1, arm::Mem(c_p, offsetof(Process, catches)));
+    if (Deallocate.get() > 0) {
+        add(E, E, Deallocate.get() * sizeof(Eterm));
+    }
+}
+
+void BeamModuleAssembler::emit_try_end_move_deallocate(
+        const ArgSource &Src,
+        const ArgRegister &Dst,
+        const ArgWord &Deallocate) {
+    a.ldr(TMP1, arm::Mem(c_p, offsetof(Process, catches)));
+    a.sub(TMP1, TMP1, imm(1));
+    a.str(TMP1, arm::Mem(c_p, offsetof(Process, catches)));
+    mov_arg(Dst, Src);
+    if (Deallocate.get() > 0) {
+        add(E, E, Deallocate.get() * sizeof(Eterm));
+    }
+}
+
 void BeamModuleAssembler::emit_try_case(const ArgYRegister &CatchTag) {
     /* XREG0 = THE_NON_VALUE
      * XREG1 = error reason/thrown value
diff --git a/erts/emulator/beam/jit/arm/ops.tab b/erts/emulator/beam/jit/arm/ops.tab
index b0c79d3e2c..2c3cd64757 100644
--- a/erts/emulator/beam/jit/arm/ops.tab
+++ b/erts/emulator/beam/jit/arm/ops.tab
@@ -179,6 +179,12 @@ try_end y
 
 try_case_end s
 
+try_end Y | deallocate N => try_end_deallocate N
+try_end Y | move Src Dst | deallocate N => try_end_move_deallocate Src Dst N
+
+try_end_deallocate t
+try_end_move_deallocate s d t
+
 # Destructive set tuple element
 
 set_tuple_element s S P
diff --git a/erts/emulator/beam/jit/x86/instr_common.cpp b/erts/emulator/beam/jit/x86/instr_common.cpp
index 7574df29e0..fa45b9e0cd 100644
--- a/erts/emulator/beam/jit/x86/instr_common.cpp
+++ b/erts/emulator/beam/jit/x86/instr_common.cpp
@@ -2440,6 +2440,11 @@ void BeamModuleAssembler::emit_try_end(const ArgYRegister &CatchTag) {
     emit_init(CatchTag);
 }
 
+void BeamModuleAssembler::emit_try_end_deallocate(const ArgWord &Deallocate) {
+    a.dec(x86::qword_ptr(c_p, offsetof(Process, catches)));
+    emit_deallocate(Deallocate);
+}
+
 void BeamModuleAssembler::emit_try_case(const ArgYRegister &CatchTag) {
     /* The try_tag in the Y slot in the stack frame has already been
      * cleared. */
diff --git a/erts/emulator/beam/jit/x86/ops.tab b/erts/emulator/beam/jit/x86/ops.tab
index bbc2313118..6879225dcd 100644
--- a/erts/emulator/beam/jit/x86/ops.tab
+++ b/erts/emulator/beam/jit/x86/ops.tab
@@ -194,6 +194,10 @@ try_end y
 
 try_case_end s
 
+try_end Y | deallocate N => try_end_deallocate N
+
+try_end_deallocate t
+
 # Destructive set tuple element
 
 set_tuple_element s S P
-- 
2.35.3

openSUSE Build Service is sponsored by