File 2162-Improve-register-usage.patch of Package erlang

From 365224b5b62183c8b3bc685df09c60232df07a0b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Tue, 1 Feb 2022 20:45:17 +0100
Subject: [PATCH 2/7] Improve register usage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Don't refetch a BEAM register whose contents already happen to be in a
CPU register (left there from the previous instruction).

This optimization makes most difference for the x86_64 JIT, where it
is applied about 57,000 times in the Erlang/OTP code base.

The AArch64 JIT keeps the 6 lowest-numbered X registers in CPU
registers, but the optimization is still applied about 10,000 times in
the Erlang/OTP code base.

Co-authored-by: John Högberg <john@erlang.org>
---
 erts/emulator/beam/jit/arm/beam_asm.hpp       | 43 +++++++++++++++++--
 .../emulator/beam/jit/arm/beam_asm_module.cpp |  2 +
 erts/emulator/beam/jit/x86/beam_asm.hpp       | 31 ++++++++++++-
 .../emulator/beam/jit/x86/beam_asm_module.cpp |  2 +
 4 files changed, 72 insertions(+), 6 deletions(-)

diff --git a/erts/emulator/beam/jit/arm/beam_asm.hpp b/erts/emulator/beam/jit/arm/beam_asm.hpp
index 15964e8190..3b5fe2f6dd 100644
--- a/erts/emulator/beam/jit/arm/beam_asm.hpp
+++ b/erts/emulator/beam/jit/arm/beam_asm.hpp
@@ -1003,6 +1003,12 @@ class BeamModuleAssembler : public BeamAssembler {
      * fragments as if they were local. */
     std::unordered_map<void (*)(), Label> _dispatchTable;
 
+    /* Skip unnecessary moves in load_source(), load_sources(), and
+     * mov_arg(). */
+    size_t last_destination_offset = 0;
+    arm::Gp last_destination_from;
+    arm::Mem last_destination_to;
+
 public:
     BeamModuleAssembler(BeamGlobalAssembler *ga,
                         Eterm mod,
@@ -1524,11 +1530,32 @@ protected:
         } else if (arg.isRegister()) {
             if (isRegisterBacked(arg)) {
                 auto index = arg.as<ArgXRegister>().get();
-                return Variable(register_backed_xregs[index]);
+                arm::Gp reg = register_backed_xregs[index];
+                if (reg == last_destination_from) {
+                    last_destination_offset = ~0;
+                }
+                return Variable(reg);
             }
 
             auto ref = getArgRef(arg);
-            a.ldr(tmp, ref);
+
+            if (a.offset() == last_destination_offset &&
+                ref == last_destination_to) {
+                if (last_destination_from != tmp) {
+                    comment("simplified fetching of BEAM register");
+                    a.mov(tmp, last_destination_from);
+                } else {
+                    comment("skipped fetching of BEAM register");
+                }
+                last_destination_offset = ~0;
+            } else if (a.offset() == last_destination_offset) {
+                a.ldr(tmp, ref);
+                if (last_destination_from != tmp) {
+                    last_destination_offset = a.offset();
+                }
+            } else {
+                a.ldr(tmp, ref);
+            }
             return Variable(tmp, ref);
         } else {
             if (arg.isImmed() || arg.isWord()) {
@@ -1597,8 +1624,16 @@ protected:
         }
     }
 
-    template<typename Reg>
-    void flush_var(const Variable<Reg> &to) {
+    void flush_var(const Variable<arm::Gp> &to) {
+        if (to.mem.hasBase()) {
+            a.str(to.reg, to.mem);
+            last_destination_offset = a.offset();
+            last_destination_to = to.mem;
+            last_destination_from = to.reg;
+        }
+    }
+
+    void flush_var(const Variable<arm::VecD> &to) {
         if (to.mem.hasBase()) {
             a.str(to.reg, to.mem);
         }
diff --git a/erts/emulator/beam/jit/arm/beam_asm_module.cpp b/erts/emulator/beam/jit/arm/beam_asm_module.cpp
index 219df1f125..9adc000451 100644
--- a/erts/emulator/beam/jit/arm/beam_asm_module.cpp
+++ b/erts/emulator/beam/jit/arm/beam_asm_module.cpp
@@ -350,6 +350,8 @@ void BeamModuleAssembler::emit_label(const ArgLabel &Label) {
 
     current_label = rawLabels[Label.get()];
     bind_veneer_target(current_label);
+
+    last_destination_offset = ~0;
 }
 
 void BeamModuleAssembler::emit_aligned_label(const ArgLabel &Label,
diff --git a/erts/emulator/beam/jit/x86/beam_asm.hpp b/erts/emulator/beam/jit/x86/beam_asm.hpp
index a95f4d10ea..b0eaddb455 100644
--- a/erts/emulator/beam/jit/x86/beam_asm.hpp
+++ b/erts/emulator/beam/jit/x86/beam_asm.hpp
@@ -1086,6 +1086,11 @@ class BeamModuleAssembler : public BeamAssembler {
     /* Save the last PC for an error. */
     size_t last_error_offset = 0;
 
+    /* Skip unnecessary moves in mov_arg. */
+    size_t last_movarg_offset = 0;
+    x86::Gp last_movarg_from;
+    x86::Mem last_movarg_to;
+
     /* Maps code pointers to thunks that jump to them, letting us treat global
      * fragments as if they were local. */
     std::unordered_map<void (*)(), Label> _dispatchTable;
@@ -1550,6 +1555,8 @@ protected:
 
     /* Note: May clear flags. */
     void mov_arg(x86::Gp to, const ArgVal &from, const x86::Gp &spill) {
+        bool is_last_offset_valid = a.offset() == last_movarg_offset;
+
         if (from.isBytePtr()) {
             make_move_patch(to, strings, from.as<ArgBytePtr>().get());
         } else if (from.isExport()) {
@@ -1561,13 +1568,28 @@ protected:
         } else if (from.isLiteral()) {
             make_move_patch(to, literals[from.as<ArgLiteral>().get()].patches);
         } else if (from.isRegister()) {
-            a.mov(to, getArgRef(from.as<ArgRegister>()));
+            auto mem = getArgRef(from.as<ArgRegister>());
+
+            if (a.offset() == last_movarg_offset && mem == last_movarg_to) {
+                if (last_movarg_from != to) {
+                    comment("simplified fetching of BEAM register");
+                    a.mov(to, last_movarg_from);
+                } else {
+                    comment("skipped fetching of BEAM register");
+                }
+            } else {
+                a.mov(to, mem);
+            }
         } else if (from.isWord()) {
             mov_imm(to, from.as<ArgWord>().get());
         } else {
             ASSERT(!"mov_arg with incompatible type");
         }
 
+        if (is_last_offset_valid && last_movarg_from != to) {
+            last_movarg_offset = a.offset();
+        }
+
 #ifdef DEBUG
         /* Explicitly clear flags to catch bugs quicker, it may be very rare
          * for a certain instruction to load values that would otherwise cause
@@ -1604,7 +1626,12 @@ protected:
     void mov_arg(const ArgVal &to, x86::Gp from, const x86::Gp &spill) {
         (void)spill;
 
-        a.mov(getArgRef(to), from);
+        auto mem = getArgRef(to);
+        a.mov(mem, from);
+
+        last_movarg_offset = a.offset();
+        last_movarg_to = mem;
+        last_movarg_from = from;
     }
 
     void mov_arg(const ArgVal &to, x86::Mem from, const x86::Gp &spill) {
diff --git a/erts/emulator/beam/jit/x86/beam_asm_module.cpp b/erts/emulator/beam/jit/x86/beam_asm_module.cpp
index fbde1bd565..83e11dc8ba 100644
--- a/erts/emulator/beam/jit/x86/beam_asm_module.cpp
+++ b/erts/emulator/beam/jit/x86/beam_asm_module.cpp
@@ -326,6 +326,8 @@ void BeamModuleAssembler::emit_label(const ArgLabel &Label) {
 
     current_label = rawLabels[Label.get()];
     a.bind(current_label);
+
+    last_movarg_offset = ~0;
 }
 
 void BeamModuleAssembler::emit_aligned_label(const ArgLabel &Label,
-- 
2.35.3

openSUSE Build Service is sponsored by