File 1135-Improve-reuse-of-registers.patch of Package erlang

From 4ad9e431776f60b83ca9b640bc5606f27e3d0197 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Thu, 14 Sep 2023 17:11:46 +0200
Subject: [PATCH 15/25] Improve reuse of registers

Improve reuse of registers by recognizing more neutral instructions
that will not force the cache to be discarded and by updating the
cache when reading from memory.

For the x86_64 JIT, with this commit 115697 loads of BEAM registers
are avoided, compared to 88786 loads before this commit.

For the AArch64 JIT, with this commit 14938 loads of BEAM registers
are avoided, compared to 14422 loads before this commit.
---
 erts/emulator/beam/jit/arm/beam_asm.hpp | 21 +++++++++++++--
 erts/emulator/beam/jit/x86/beam_asm.hpp | 36 +++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/erts/emulator/beam/jit/arm/beam_asm.hpp b/erts/emulator/beam/jit/arm/beam_asm.hpp
index 5789ef2978..5d4ee1364c 100644
--- a/erts/emulator/beam/jit/arm/beam_asm.hpp
+++ b/erts/emulator/beam/jit/arm/beam_asm.hpp
@@ -925,6 +925,10 @@ class BeamModuleAssembler : public BeamAssembler,
         invalidate_cache(dst);
     }
 
+    bool is_cache_valid() {
+        return a.offset() == last_destination_offset;
+    }
+
     /* Works as the STR instruction, but also updates the cache. */
     void str_cache(arm::Gp src, arm::Mem dst) {
         if (a.offset() == last_destination_offset &&
@@ -997,6 +1001,10 @@ class BeamModuleAssembler : public BeamAssembler,
         } else {
             /* The cache is invalid. */
             a.ldr(dst, mem);
+            last_destination_offset = a.offset();
+            last_destination_to1 = mem;
+            last_destination_from1 = dst;
+            last_destination_to2 = arm::Mem();
         }
     }
 
@@ -1553,14 +1561,14 @@ protected:
     void mov_arg(arm::Gp to, const ArgVal &from) {
         auto r = load_source(from, to);
         if (r.reg != to) {
-            a.mov(to, r.reg);
+            mov_preserve_cache(to, r.reg);
         }
     }
 
     void mov_arg(const ArgVal &to, arm::Gp from) {
         auto r = init_destination(to, from);
         if (r.reg != from) {
-            a.mov(r.reg, from);
+            mov_preserve_cache(r.reg, from);
         }
         flush_var(r);
     }
@@ -1613,7 +1621,11 @@ protected:
         ASSERT(gp.isGpX());
 
         if (abs_offset <= sizeof(Eterm) * MAX_LDR_STR_DISPLACEMENT) {
+            bool valid_cache = is_cache_valid();
             a.ldr(gp, mem);
+            if (valid_cache) {
+                preserve__cache(gp);
+            }
         } else {
             add(SUPER_TMP, arm::GpX(mem.baseId()), offset);
             a.ldr(gp, arm::Mem(SUPER_TMP));
@@ -1653,7 +1665,12 @@ protected:
         ASSERT(gp1 != gp2);
 
         if (abs_offset <= sizeof(Eterm) * MAX_LDP_STP_DISPLACEMENT) {
+            bool valid_cache = is_cache_valid();
             a.ldp(gp1, gp2, mem);
+            if (valid_cache) {
+                preserve__cache(gp1);
+                preserve__cache(gp2);
+            }
         } else if (abs_offset < sizeof(Eterm) * MAX_LDR_STR_DISPLACEMENT) {
             /* Note that we used `<` instead of `<=`, as we're loading two
              * elements rather than one. */
diff --git a/erts/emulator/beam/jit/x86/beam_asm.hpp b/erts/emulator/beam/jit/x86/beam_asm.hpp
index 8163ceec06..907209bb30 100644
--- a/erts/emulator/beam/jit/x86/beam_asm.hpp
+++ b/erts/emulator/beam/jit/x86/beam_asm.hpp
@@ -1042,6 +1042,11 @@ class BeamModuleAssembler : public BeamAssembler,
         invalidate_cache(dst);
     }
 
+    void preserve__cache(x86::Mem mem) {
+        last_movarg_offset = a.offset();
+        invalidate_cache(mem);
+    }
+
     bool is_cache_valid() {
         return a.offset() == last_movarg_offset;
     }
@@ -1052,6 +1057,12 @@ class BeamModuleAssembler : public BeamAssembler,
         }
     }
 
+    void preserve_cache(x86::Mem mem, bool cache_valid) {
+        if (cache_valid) {
+            preserve__cache(mem);
+        }
+    }
+
     /* Store CPU register into memory and update the cache. */
     void store_cache(x86::Gp src, x86::Mem dst) {
         if (is_cache_valid() && dst != last_movarg_to1) {
@@ -1087,6 +1098,17 @@ class BeamModuleAssembler : public BeamAssembler,
         }
     }
 
+    void invalidate_cache(x86::Mem mem) {
+        if (mem == last_movarg_to1) {
+            last_movarg_to1 = x86::Mem();
+            last_movarg_from1 = x86::Gp();
+        }
+        if (mem == last_movarg_to2) {
+            last_movarg_to2 = x86::Mem();
+            last_movarg_from2 = x86::Gp();
+        }
+    }
+
     x86::Gp cached_reg(x86::Mem mem) {
         if (is_cache_valid()) {
             if (mem == last_movarg_to1) {
@@ -1121,6 +1143,12 @@ class BeamModuleAssembler : public BeamAssembler,
         } else {
             /* The cache is invalid. */
             a.mov(dst, mem);
+
+            last_movarg_offset = a.offset();
+            last_movarg_to1 = mem;
+            last_movarg_from1 = dst;
+
+            last_movarg_to2 = x86::Mem();
         }
     }
 
@@ -1500,25 +1528,33 @@ protected:
     void mov_arg(x86::Mem to, const ArgVal &from, const x86::Gp &spill) {
         if (from.isImmed()) {
             auto val = from.as<ArgImmed>().get();
+            bool cache_valid = is_cache_valid();
 
             if (Support::isInt32((Sint)val)) {
                 a.mov(to, imm(val));
             } else {
                 a.mov(spill, imm(val));
                 a.mov(to, spill);
+                preserve_cache(spill, cache_valid);
             }
+            preserve_cache(to, cache_valid);
         } else if (from.isWord()) {
             auto val = from.as<ArgWord>().get();
+            bool cache_valid = is_cache_valid();
 
             if (Support::isInt32((Sint)val)) {
                 a.mov(to, imm(val));
             } else {
                 a.mov(spill, imm(val));
                 a.mov(to, spill);
+                preserve_cache(spill, cache_valid);
             }
+            preserve_cache(to, cache_valid);
         } else {
             mov_arg(spill, from);
+            bool cache_valid = is_cache_valid();
             a.mov(to, spill);
+            preserve_cache(to, cache_valid);
         }
     }
 
-- 
2.35.3

openSUSE Build Service is sponsored by