File 2162-Improve-register-usage.patch of Package erlang
From 365224b5b62183c8b3bc685df09c60232df07a0b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Tue, 1 Feb 2022 20:45:17 +0100
Subject: [PATCH 2/7] Improve register usage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Don't refetch a BEAM register whose contents already happen to be in a
CPU register (left there from the previous instruction).
This optimization makes most difference for the x86_64 JIT, where it
is applied about 57,000 times in the Erlang/OTP code base.
The AArch64 JIT keeps the 6 lowest-numbered X registers in CPU
registers, but the optimization is still applied about 10,000 times in
the Erlang/OTP code base.
Co-authored-by: John Högberg <john@erlang.org>
---
erts/emulator/beam/jit/arm/beam_asm.hpp | 43 +++++++++++++++++--
.../emulator/beam/jit/arm/beam_asm_module.cpp | 2 +
erts/emulator/beam/jit/x86/beam_asm.hpp | 31 ++++++++++++-
.../emulator/beam/jit/x86/beam_asm_module.cpp | 2 +
4 files changed, 72 insertions(+), 6 deletions(-)
diff --git a/erts/emulator/beam/jit/arm/beam_asm.hpp b/erts/emulator/beam/jit/arm/beam_asm.hpp
index 15964e8190..3b5fe2f6dd 100644
--- a/erts/emulator/beam/jit/arm/beam_asm.hpp
+++ b/erts/emulator/beam/jit/arm/beam_asm.hpp
@@ -1003,6 +1003,12 @@ class BeamModuleAssembler : public BeamAssembler {
* fragments as if they were local. */
std::unordered_map<void (*)(), Label> _dispatchTable;
+ /* Skip unnecessary moves in load_source(), load_sources(), and
+ * mov_arg(). */
+ size_t last_destination_offset = 0;
+ arm::Gp last_destination_from;
+ arm::Mem last_destination_to;
+
public:
BeamModuleAssembler(BeamGlobalAssembler *ga,
Eterm mod,
@@ -1524,11 +1530,32 @@ protected:
} else if (arg.isRegister()) {
if (isRegisterBacked(arg)) {
auto index = arg.as<ArgXRegister>().get();
- return Variable(register_backed_xregs[index]);
+ arm::Gp reg = register_backed_xregs[index];
+ if (reg == last_destination_from) {
+ last_destination_offset = ~0;
+ }
+ return Variable(reg);
}
auto ref = getArgRef(arg);
- a.ldr(tmp, ref);
+
+ if (a.offset() == last_destination_offset &&
+ ref == last_destination_to) {
+ if (last_destination_from != tmp) {
+ comment("simplified fetching of BEAM register");
+ a.mov(tmp, last_destination_from);
+ } else {
+ comment("skipped fetching of BEAM register");
+ }
+ last_destination_offset = ~0;
+ } else if (a.offset() == last_destination_offset) {
+ a.ldr(tmp, ref);
+ if (last_destination_from != tmp) {
+ last_destination_offset = a.offset();
+ }
+ } else {
+ a.ldr(tmp, ref);
+ }
return Variable(tmp, ref);
} else {
if (arg.isImmed() || arg.isWord()) {
@@ -1597,8 +1624,16 @@ protected:
}
}
- template<typename Reg>
- void flush_var(const Variable<Reg> &to) {
+ void flush_var(const Variable<arm::Gp> &to) {
+ if (to.mem.hasBase()) {
+ a.str(to.reg, to.mem);
+ last_destination_offset = a.offset();
+ last_destination_to = to.mem;
+ last_destination_from = to.reg;
+ }
+ }
+
+ void flush_var(const Variable<arm::VecD> &to) {
if (to.mem.hasBase()) {
a.str(to.reg, to.mem);
}
diff --git a/erts/emulator/beam/jit/arm/beam_asm_module.cpp b/erts/emulator/beam/jit/arm/beam_asm_module.cpp
index 219df1f125..9adc000451 100644
--- a/erts/emulator/beam/jit/arm/beam_asm_module.cpp
+++ b/erts/emulator/beam/jit/arm/beam_asm_module.cpp
@@ -350,6 +350,8 @@ void BeamModuleAssembler::emit_label(const ArgLabel &Label) {
current_label = rawLabels[Label.get()];
bind_veneer_target(current_label);
+
+ last_destination_offset = ~0;
}
void BeamModuleAssembler::emit_aligned_label(const ArgLabel &Label,
diff --git a/erts/emulator/beam/jit/x86/beam_asm.hpp b/erts/emulator/beam/jit/x86/beam_asm.hpp
index a95f4d10ea..b0eaddb455 100644
--- a/erts/emulator/beam/jit/x86/beam_asm.hpp
+++ b/erts/emulator/beam/jit/x86/beam_asm.hpp
@@ -1086,6 +1086,11 @@ class BeamModuleAssembler : public BeamAssembler {
/* Save the last PC for an error. */
size_t last_error_offset = 0;
+ /* Skip unnecessary moves in mov_arg. */
+ size_t last_movarg_offset = 0;
+ x86::Gp last_movarg_from;
+ x86::Mem last_movarg_to;
+
/* Maps code pointers to thunks that jump to them, letting us treat global
* fragments as if they were local. */
std::unordered_map<void (*)(), Label> _dispatchTable;
@@ -1550,6 +1555,8 @@ protected:
/* Note: May clear flags. */
void mov_arg(x86::Gp to, const ArgVal &from, const x86::Gp &spill) {
+ bool is_last_offset_valid = a.offset() == last_movarg_offset;
+
if (from.isBytePtr()) {
make_move_patch(to, strings, from.as<ArgBytePtr>().get());
} else if (from.isExport()) {
@@ -1561,13 +1568,28 @@ protected:
} else if (from.isLiteral()) {
make_move_patch(to, literals[from.as<ArgLiteral>().get()].patches);
} else if (from.isRegister()) {
- a.mov(to, getArgRef(from.as<ArgRegister>()));
+ auto mem = getArgRef(from.as<ArgRegister>());
+
+ if (a.offset() == last_movarg_offset && mem == last_movarg_to) {
+ if (last_movarg_from != to) {
+ comment("simplified fetching of BEAM register");
+ a.mov(to, last_movarg_from);
+ } else {
+ comment("skipped fetching of BEAM register");
+ }
+ } else {
+ a.mov(to, mem);
+ }
} else if (from.isWord()) {
mov_imm(to, from.as<ArgWord>().get());
} else {
ASSERT(!"mov_arg with incompatible type");
}
+ if (is_last_offset_valid && last_movarg_from != to) {
+ last_movarg_offset = a.offset();
+ }
+
#ifdef DEBUG
/* Explicitly clear flags to catch bugs quicker, it may be very rare
* for a certain instruction to load values that would otherwise cause
@@ -1604,7 +1626,12 @@ protected:
void mov_arg(const ArgVal &to, x86::Gp from, const x86::Gp &spill) {
(void)spill;
- a.mov(getArgRef(to), from);
+ auto mem = getArgRef(to);
+ a.mov(mem, from);
+
+ last_movarg_offset = a.offset();
+ last_movarg_to = mem;
+ last_movarg_from = from;
}
void mov_arg(const ArgVal &to, x86::Mem from, const x86::Gp &spill) {
diff --git a/erts/emulator/beam/jit/x86/beam_asm_module.cpp b/erts/emulator/beam/jit/x86/beam_asm_module.cpp
index fbde1bd565..83e11dc8ba 100644
--- a/erts/emulator/beam/jit/x86/beam_asm_module.cpp
+++ b/erts/emulator/beam/jit/x86/beam_asm_module.cpp
@@ -326,6 +326,8 @@ void BeamModuleAssembler::emit_label(const ArgLabel &Label) {
current_label = rawLabels[Label.get()];
a.bind(current_label);
+
+ last_movarg_offset = ~0;
}
void BeamModuleAssembler::emit_aligned_label(const ArgLabel &Label,
--
2.35.3