File 2200-Unroll-binary-construction-loop.patch of Package erlang
From b35ad13dfeb6ca4dc9035e543315ae648b09ecd5 Mon Sep 17 00:00:00 2001
From: Raimo Niskanen <raimo@erlang.org>
Date: Tue, 5 Apr 2022 13:55:22 +0200
Subject: [PATCH 10/11] Unroll binary construction loop
Gives above 25% shorter execution time for a 1000 bytes binary.
Test larger binaries, i.e 1000 bytes, in measure/1.
---
lib/stdlib/src/rand.erl | 16 ++++++++++++++++
lib/stdlib/test/rand_SUITE.erl | 31 +++++++++++++++++++++++++++++--
2 files changed, 45 insertions(+), 2 deletions(-)
diff --git a/lib/stdlib/src/rand.erl b/lib/stdlib/src/rand.erl
index 51948a28d9..f92d4212b4 100644
--- a/lib/stdlib/src/rand.erl
+++ b/lib/stdlib/src/rand.erl
@@ -588,6 +588,22 @@ bytes_r(N, AlgHandler, Next, R, Bits, WeakLowBits) ->
Shift = Bits - GoodBits,
bytes_r(N, AlgHandler, Next, R, <<>>, GoodBytes, GoodBits, Shift).
%%
+bytes_r(N0, AlgHandler, Next, R0, Bytes0, GoodBytes, GoodBits, Shift)
+ when (GoodBytes bsl 2) < N0 ->
+ %% Loop unroll 4 iterations
+ %% - gives about 25% shorter time for large binaries
+ {V1, R1} = Next(R0),
+ {V2, R2} = Next(R1),
+ {V3, R3} = Next(R2),
+ {V4, R4} = Next(R3),
+ Bytes1 =
+ <<Bytes0/binary,
+ (V1 bsr Shift):GoodBits,
+ (V2 bsr Shift):GoodBits,
+ (V3 bsr Shift):GoodBits,
+ (V4 bsr Shift):GoodBits>>,
+ N1 = N0 - (GoodBytes bsl 2),
+ bytes_r(N1, AlgHandler, Next, R4, Bytes1, GoodBytes, GoodBits, Shift);
bytes_r(N0, AlgHandler, Next, R0, Bytes0, GoodBytes, GoodBits, Shift)
when GoodBytes < N0 ->
{V, R1} = Next(R0),
diff --git a/lib/stdlib/test/rand_SUITE.erl b/lib/stdlib/test/rand_SUITE.erl
index 53e6e30a09..7ce68b349d 100644
--- a/lib/stdlib/test/rand_SUITE.erl
+++ b/lib/stdlib/test/rand_SUITE.erl
@@ -1480,7 +1480,7 @@ do_measure(Iterations) ->
%%
ByteSize = 16, % At about 100 bytes crypto_bytes breaks even to exsss
ct:pal("~nRNG ~w bytes performance~n",[ByteSize]),
- [TMarkBytes16,OverheadBytes16|_] =
+ [TMarkBytes1,OverheadBytes1|_] =
measure_1(
fun (Mod, _State) ->
Generator = fun Mod:bytes_s/2,
@@ -1503,7 +1503,34 @@ do_measure(Iterations) ->
lcg35_bytes(ByteSize, St0), ByteSize, Bin, St1)
end
end, lcg35_bytes, Iterations,
- TMarkBytes16, OverheadBytes16),
+ TMarkBytes1, OverheadBytes1),
+ %%
+ ByteSize2 = 1000, % At about 100 bytes crypto_bytes breaks even to exsss
+ ct:pal("~nRNG ~w bytes performance~n",[ByteSize2]),
+ [TMarkBytes2,OverheadBytes2|_] =
+ measure_1(
+ fun (Mod, _State) ->
+ Generator = fun Mod:bytes_s/2,
+ fun (St0) ->
+ ?CHECK_BYTE_SIZE(
+ Generator(ByteSize2, St0), ByteSize2, Bin, St1)
+ end
+ end,
+ case crypto_support() of
+ ok ->
+ Algs ++ [crypto_bytes, crypto_bytes_cached];
+ _ ->
+ Algs
+ end, Iterations div 50),
+ _ =
+ measure_1(
+ fun (_Mod, _State) ->
+ fun (St0) ->
+ ?CHECK_BYTE_SIZE(
+ lcg35_bytes(ByteSize2, St0), ByteSize2, Bin, St1)
+ end
+ end, lcg35_bytes, Iterations div 50,
+ TMarkBytes2, OverheadBytes2),
%%
ct:pal("~nRNG uniform float performance~n",[]),
[TMarkUniformFloat,OverheadUniformFloat|_] =
--
2.34.1