File 2200-Unroll-binary-construction-loop.patch of Package erlang

From b35ad13dfeb6ca4dc9035e543315ae648b09ecd5 Mon Sep 17 00:00:00 2001
From: Raimo Niskanen <raimo@erlang.org>
Date: Tue, 5 Apr 2022 13:55:22 +0200
Subject: [PATCH 10/11] Unroll binary construction loop

Gives above 25% shorter execution time for a 1000 bytes binary.

Test larger binaries, i.e 1000 bytes, in measure/1.
---
 lib/stdlib/src/rand.erl        | 16 ++++++++++++++++
 lib/stdlib/test/rand_SUITE.erl | 31 +++++++++++++++++++++++++++++--
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/lib/stdlib/src/rand.erl b/lib/stdlib/src/rand.erl
index 51948a28d9..f92d4212b4 100644
--- a/lib/stdlib/src/rand.erl
+++ b/lib/stdlib/src/rand.erl
@@ -588,6 +588,22 @@ bytes_r(N, AlgHandler, Next, R, Bits, WeakLowBits) ->
     Shift = Bits - GoodBits,
     bytes_r(N, AlgHandler, Next, R, <<>>, GoodBytes, GoodBits, Shift).
 %%
+bytes_r(N0, AlgHandler, Next, R0, Bytes0, GoodBytes, GoodBits, Shift)
+  when (GoodBytes bsl 2) < N0 ->
+    %% Loop unroll 4 iterations
+    %% - gives about 25% shorter time for large binaries
+    {V1, R1} = Next(R0),
+    {V2, R2} = Next(R1),
+    {V3, R3} = Next(R2),
+    {V4, R4} = Next(R3),
+    Bytes1 =
+        <<Bytes0/binary,
+          (V1 bsr Shift):GoodBits,
+          (V2 bsr Shift):GoodBits,
+          (V3 bsr Shift):GoodBits,
+          (V4 bsr Shift):GoodBits>>,
+    N1 = N0 - (GoodBytes bsl 2),
+    bytes_r(N1, AlgHandler, Next, R4, Bytes1, GoodBytes, GoodBits, Shift);
 bytes_r(N0, AlgHandler, Next, R0, Bytes0, GoodBytes, GoodBits, Shift)
   when GoodBytes < N0 ->
     {V, R1} = Next(R0),
diff --git a/lib/stdlib/test/rand_SUITE.erl b/lib/stdlib/test/rand_SUITE.erl
index 53e6e30a09..7ce68b349d 100644
--- a/lib/stdlib/test/rand_SUITE.erl
+++ b/lib/stdlib/test/rand_SUITE.erl
@@ -1480,7 +1480,7 @@ do_measure(Iterations) ->
     %%
     ByteSize = 16, % At about 100 bytes crypto_bytes breaks even to exsss
     ct:pal("~nRNG ~w bytes performance~n",[ByteSize]),
-    [TMarkBytes16,OverheadBytes16|_] =
+    [TMarkBytes1,OverheadBytes1|_] =
         measure_1(
           fun (Mod, _State) ->
                   Generator = fun Mod:bytes_s/2,
@@ -1503,7 +1503,34 @@ do_measure(Iterations) ->
                              lcg35_bytes(ByteSize, St0), ByteSize, Bin, St1)
                   end
           end, lcg35_bytes, Iterations,
-          TMarkBytes16, OverheadBytes16),
+          TMarkBytes1, OverheadBytes1),
+    %%
+    ByteSize2 = 1000, % At about 100 bytes crypto_bytes breaks even to exsss
+    ct:pal("~nRNG ~w bytes performance~n",[ByteSize2]),
+    [TMarkBytes2,OverheadBytes2|_] =
+        measure_1(
+          fun (Mod, _State) ->
+                  Generator = fun Mod:bytes_s/2,
+                  fun (St0) ->
+                          ?CHECK_BYTE_SIZE(
+                             Generator(ByteSize2, St0), ByteSize2, Bin, St1)
+                  end
+          end,
+          case crypto_support() of
+              ok ->
+                  Algs ++ [crypto_bytes, crypto_bytes_cached];
+              _ ->
+                  Algs
+          end, Iterations div 50),
+    _ =
+        measure_1(
+          fun (_Mod, _State) ->
+                  fun (St0) ->
+                          ?CHECK_BYTE_SIZE(
+                             lcg35_bytes(ByteSize2, St0), ByteSize2, Bin, St1)
+                  end
+          end, lcg35_bytes, Iterations div 50,
+          TMarkBytes2, OverheadBytes2),
     %%
     ct:pal("~nRNG uniform float performance~n",[]),
     [TMarkUniformFloat,OverheadUniformFloat|_] =
-- 
2.34.1

openSUSE Build Service is sponsored by