File 2671-erts-Simplify-deallocate_return.patch of Package erlang

From 9ee1b789e2a085a8cf59c249b46130a8c8801d0e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?John=20H=C3=B6gberg?= <john@erlang.org>
Date: Mon, 26 Aug 2019 10:26:06 +0200
Subject: [PATCH] erts: Simplify deallocate_return

---
 erts/emulator/beam/instrs.tab | 39 ++++++++++++++++++---------------------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/erts/emulator/beam/instrs.tab b/erts/emulator/beam/instrs.tab
index 156de67716..38b1e5909b 100644
--- a/erts/emulator/beam/instrs.tab
+++ b/erts/emulator/beam/instrs.tab
@@ -66,11 +66,25 @@ deallocate(Deallocate) {
     E = ADD_BYTE_OFFSET(E, $Deallocate);
 }
 
+//
+// Micro-benchmarks showed that the deallocate_return instruction
+// became slower when the continuation pointer was moved from
+// the process struct to the stack. The reason seems to be read
+// dependencies, i.e. that the CPU cannot figure out beforehand
+// from which position on the stack the continuation pointer
+// should be fetched.
+//
+// Initializing num_bytes with a constant value seems to restore
+// the lost speed, so we've specialized the instruction for the
+// most common values.
+//
+
 deallocate_return0 := dealloc_ret.n0.execute;
 deallocate_return1 := dealloc_ret.n1.execute;
 deallocate_return2 := dealloc_ret.n2.execute;
 deallocate_return3 := dealloc_ret.n3.execute;
 deallocate_return4 := dealloc_ret.n4.execute;
+deallocate_return := dealloc_ret.var.execute;
 
 dealloc_ret.head() {
     Uint num_bytes;
@@ -96,36 +110,19 @@ dealloc_ret.n4() {
     num_bytes = (4+1) * sizeof(Eterm);
 }
 
+dealloc_ret.var(Deallocate) {
+    num_bytes = $Deallocate;
+}
+
 dealloc_ret.execute() {
     //| -no_next
 
-    /*
-     * Micro-benchmarks showed that the deallocate_return instruction
-     * became slower when the continuation pointer was moved from
-     * the process struct to the stack. The reason seems to be read
-     * dependencies, i.e. that the CPU cannot figure out beforehand
-     * from which position on the stack the continuation pointer
-     * should be fetched.
-     *
-     * Making sure that num_bytes is always initialized with a
-     * constant value seems to restore the lost speed.
-     */
-
     E = ADD_BYTE_OFFSET(E, num_bytes);
     $RETURN();
     CHECK_TERM(x(0));
     DispatchReturn;
 }
 
-deallocate_return(Deallocate) {
-    //| -no_next
-    Uint bytes_to_pop = $Deallocate;
-    E = ADD_BYTE_OFFSET(E, bytes_to_pop);
-    $RETURN();
-    CHECK_TERM(x(0));
-    DispatchReturn;
-}
-
 move_deallocate_return(Src, Deallocate) {
     //| -no_next
 
-- 
2.16.4

openSUSE Build Service is sponsored by