File 2663-Optimize-the-move_call-instructions.patch of Package erlang
From 07a323813a47f92abf05bdf2255dc1354e82c55c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Sun, 18 Aug 2019 08:31:53 +0200
Subject: [PATCH 3/3] Optimize the move_call instructions
The `move_call` instructions are combinations of a `move` instruction
and a `call` instruction. As currently implemented, the `move` part of
the instruction is executed in its entirety before the `call` part is
even started. The reason is that the C compiler cannot see that it
would be safe to read the target address of the call before writing to
the move destination.
Rewrite the instructions to explicitly read both the source for the
move and the target address for the call before writing the
destination of the move.
Micro-benchmarks show a small but consistent speed-up after this
change.
---
erts/emulator/beam/instrs.tab | 45 ++++++++++++++++++++++++++++---------------
1 file changed, 30 insertions(+), 15 deletions(-)
diff --git a/erts/emulator/beam/instrs.tab b/erts/emulator/beam/instrs.tab
index 807f4512d1..156de67716 100644
--- a/erts/emulator/beam/instrs.tab
+++ b/erts/emulator/beam/instrs.tab
@@ -165,9 +165,11 @@ i_call(CallDest) {
}
move_call(Src, CallDest) {
- x(0) = $Src;
+ Eterm call_dest = $CallDest;
+ Eterm src = $Src;
$SAVE_CONTINUATION_POINTER($NEXT_INSTRUCTION);
- $DISPATCH_REL($CallDest);
+ x(0) = src;
+ $DISPATCH_REL(call_dest);
}
i_call_last(CallDest, Deallocate) {
@@ -176,8 +178,11 @@ i_call_last(CallDest, Deallocate) {
}
move_call_last(Src, CallDest, Deallocate) {
- x(0) = $Src;
- $i_call_last($CallDest, $Deallocate);
+ Eterm call_dest = $CallDest;
+ Eterm src = $Src;
+ $deallocate($Deallocate);
+ x(0) = src;
+ $DISPATCH_REL(call_dest);
}
i_call_only(CallDest) {
@@ -185,8 +190,10 @@ i_call_only(CallDest) {
}
move_call_only(Src, CallDest) {
- x(0) = $Src;
- $i_call_only($CallDest);
+ Eterm call_dest = $CallDest;
+ Eterm src = $Src;
+ x(0) = src;
+ $DISPATCH_REL(call_dest);
}
DISPATCHX(Dest) {
@@ -202,18 +209,23 @@ i_call_ext(Dest) {
$DISPATCHX($Dest);
}
-i_move_call_ext(Src, Dest) {
- x(0) = $Src;
- $i_call_ext($Dest);
+i_move_call_ext(Src, CallDest) {
+ Eterm call_dest = $CallDest;
+ Eterm src = $Src;
+ $SAVE_CONTINUATION_POINTER($NEXT_INSTRUCTION);
+ x(0) = src;
+ $DISPATCHX(call_dest);
}
i_call_ext_only(Dest) {
$DISPATCHX($Dest);
}
-i_move_call_ext_only(Dest, Src) {
- x(0) = $Src;
- $i_call_ext_only($Dest);
+i_move_call_ext_only(CallDest, Src) {
+ Eterm call_dest = $CallDest;
+ Eterm src = $Src;
+ x(0) = src;
+ $DISPATCHX(call_dest);
}
i_call_ext_last(Dest, Deallocate) {
@@ -221,9 +233,12 @@ i_call_ext_last(Dest, Deallocate) {
$DISPATCHX($Dest);
}
-i_move_call_ext_last(Dest, StackOffset, Src) {
- x(0) = $Src;
- $i_call_ext_last($Dest, $StackOffset);
+i_move_call_ext_last(CallDest, Deallocate, Src) {
+ Eterm call_dest = $CallDest;
+ Eterm src = $Src;
+ $deallocate($Deallocate);
+ x(0) = src;
+ $DISPATCHX(call_dest);
}
APPLY(I, Deallocate, Next) {
--
2.16.4