File 1131-x86_64-Optimize-creation-of-fun-environment.patch of Package erlang

From ae127203ac2423d057e1ef151d4ca8b114740b84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Thu, 7 Sep 2023 17:04:03 +0200
Subject: [PATCH 11/25] x86_64: Optimize creation of fun environment

Whenever possible, use SSE/AVX to copy two registers at once to the
fun environment.
---
 erts/emulator/beam/jit/x86/instr_fun.cpp | 38 ++++++++++++++++++++++--
 1 file changed, 35 insertions(+), 3 deletions(-)

diff --git a/erts/emulator/beam/jit/x86/instr_fun.cpp b/erts/emulator/beam/jit/x86/instr_fun.cpp
index 2d7fe8c168..e621d2529f 100644
--- a/erts/emulator/beam/jit/x86/instr_fun.cpp
+++ b/erts/emulator/beam/jit/x86/instr_fun.cpp
@@ -206,9 +206,41 @@ void BeamModuleAssembler::emit_i_make_fun3(const ArgLambda &Lambda,
 
     comment("Move fun environment");
     for (unsigned i = 0; i < num_free; i++) {
-        mov_arg(x86::qword_ptr(RET,
-                               offsetof(ErlFunThing, env) + i * sizeof(Eterm)),
-                env[i]);
+        const ArgVal &next = i + 1 < num_free ? env[i + 1] : ArgNil();
+        switch (ArgVal::memory_relation(env[i], next)) {
+        case ArgVal::Relation::consecutive: {
+            x86::Mem src_ptr = getArgRef(env[i].as<ArgRegister>(), 16);
+            x86::Mem dst_ptr = x86::xmmword_ptr(RET,
+                                                offsetof(ErlFunThing, env) +
+                                                        i * sizeof(Eterm));
+            comment("(moving two items)");
+            vmovups(x86::xmm0, src_ptr);
+            vmovups(dst_ptr, x86::xmm0);
+            i++;
+            break;
+        }
+        case ArgVal::Relation::reverse_consecutive: {
+            if (!hasCpuFeature(CpuFeatures::X86::kAVX)) {
+                goto fallback;
+            }
+            x86::Mem src_ptr = getArgRef(env[i + 1].as<ArgRegister>(), 16);
+            x86::Mem dst_ptr = x86::xmmword_ptr(RET,
+                                                offsetof(ErlFunThing, env) +
+                                                        i * sizeof(Eterm));
+            comment("(moving and swapping two items)");
+            a.vpermilpd(x86::xmm0, src_ptr, 1); /* Load and swap */
+            a.vmovups(dst_ptr, x86::xmm0);
+            i++;
+            break;
+        }
+        case ArgVal::Relation::none:
+        fallback:
+            mov_arg(x86::qword_ptr(RET,
+                                   offsetof(ErlFunThing, env) +
+                                           i * sizeof(Eterm)),
+                    env[i]);
+            break;
+        }
     }
 
     comment("Create boxed ptr");
-- 
2.35.3

openSUSE Build Service is sponsored by