File 2193-Update-after-feedback.patch of Package erlang
From 877c714f5c46224447e8e362111ad1e25f4c12b9 Mon Sep 17 00:00:00 2001
From: Raimo Niskanen <raimo@erlang.org>
Date: Thu, 31 Mar 2022 10:46:58 +0200
Subject: [PATCH 03/11] Update after feedback
* Rephrase some documentation
* Optimize with redundant masks to improve JIT
* Add more measure/1 benchmark items
---
lib/stdlib/doc/src/rand.xml | 6 +--
lib/stdlib/src/rand.erl | 17 +++++++--
lib/stdlib/test/rand_SUITE.erl | 68 ++++++++++++++++++++++++++++++++--
3 files changed, 82 insertions(+), 9 deletions(-)
diff --git a/lib/stdlib/doc/src/rand.xml b/lib/stdlib/doc/src/rand.xml
index 361692f33c..d45d0effca 100644
--- a/lib/stdlib/doc/src/rand.xml
+++ b/lib/stdlib/doc/src/rand.xml
@@ -912,7 +912,7 @@ end.</pre>
few bits (under 35),
is not a power of 2 generator (range 1 .. (2^35 - 32)),
offers no help in generating numbers on a specified range,
- etc...
+ and so on.
</p>
<p>
But for pseudo random load distribution
@@ -974,8 +974,8 @@ end.</pre>
But for pseudo random load distribution
and such it might be useful, since it is extremely fast.
The <seemfa marker="#mcg35/1"><c>mcg35/1</c></seemfa>
- generator above has got less statistical artifacts,
- but instead other pecularities since it is not
+ generator above has less statistical artifacts,
+ but instead it has other peculiarities since it is not
a power of 2 generator.
</p>
</note>
diff --git a/lib/stdlib/src/rand.erl b/lib/stdlib/src/rand.erl
index d9ae09100e..d498bcdda5 100644
--- a/lib/stdlib/src/rand.erl
+++ b/lib/stdlib/src/rand.erl
@@ -1456,8 +1456,13 @@ dummy_seed({A1, A2, A3}) ->
-type mcg35_state() :: 1..(?MCG35_M-1).
-spec mcg35(X0 :: mcg35_state()) -> X1 :: mcg35_state().
-mcg35(X0) ->
- X = ?MCG35_A * X0,
+mcg35(X0) -> % when is_integer(X0), 1 =< X0, X0 < ?MCG35_M ->
+ %% The mask operation on the input tricks the JIT into
+ %% realizing that all following operations does not
+ %% need bignum handling. The suggested guard test above
+ %% could have had the same effect but it did not, and, alas,
+ %% needs much more native code to execute than a 'band'.
+ X = ?MCG35_A * ?MASK(?MCG35_B, X0),
%% rem M = rem (2^B - D), optimization to avoid 'rem'
X1 = ?MASK(?MCG35_B, X) + ?MCG35_D*(X bsr ?MCG35_B),
if
@@ -1497,8 +1502,14 @@ mcg35(X0) ->
-type lcg35_state() :: 0..?MASK(?LCG35_B).
-spec lcg35(X0 :: lcg35_state()) -> X1 :: lcg35_state().
+%%lcg35(X0) when is_integer(X0), 0 =< X0, X0 =< ?MASK(?LCG35_B) ->
lcg35(X0) ->
- ?MASK(?LCG35_B, ?LCG35_A * X0 + ?LCG35_C).
+ %% The mask operation on the input tricks the JIT into
+ %% realizing that all following operations does not
+ %% need bignum handling. The suggested guard test above
+ %% could have had the same effect but it did not, and, alas,
+ %% needs much more native code to execute than a 'band'.
+ ?MASK(?LCG35_B, ?LCG35_A * ?MASK(?LCG35_B, X0) + ?LCG35_C).
%% =====================================================================
diff --git a/lib/stdlib/test/rand_SUITE.erl b/lib/stdlib/test/rand_SUITE.erl
index 9eed862a66..0c82076422 100644
--- a/lib/stdlib/test/rand_SUITE.erl
+++ b/lib/stdlib/test/rand_SUITE.erl
@@ -1184,6 +1184,22 @@ do_measure(_Config) ->
State)
end,
unique_phash2, TMarkUniformRange10000, OverheadUniformRange1000),
+ _ =
+ measure_1(
+ fun (_) -> 10000 end,
+ fun (State, Range, _) ->
+ measure_loop(
+ fun (State0) ->
+ %% Just a 'rem' with slightly skewed distribution
+ case os:system_time(microsecond) rem Range of
+ R
+ when is_integer(R), 0 =< R, R < Range ->
+ State0
+ end
+ end,
+ State)
+ end,
+ system_time_rem, TMarkUniformRange10000, OverheadUniformRange1000),
%%
ct:pal("~nRNG uniform integer 32 bit performance~n",[]),
[TMarkUniform32Bit,OverheadUniform32Bit|_] =
@@ -1245,6 +1261,24 @@ do_measure(_Config) ->
State)
end,
unique_phash2, TMarkUniform32Bit, OverheadUniform32Bit),
+ _ =
+ measure_1(
+ fun (_) -> 1 bsl 32 end,
+ fun (State, Range, _) ->
+ measure_loop(
+ fun (State0) ->
+ case
+ os:system_time(microsecond)
+ band ((1 bsl 32) - 1)
+ of
+ R
+ when is_integer(R), 0 =< R, R < Range ->
+ State0
+ end
+ end,
+ State)
+ end,
+ system_time_rem, TMarkUniform32Bit, OverheadUniform32Bit),
%%
ct:pal("~nRNG uniform integer half range performance~n",[]),
_ =
@@ -1365,7 +1399,10 @@ do_measure(_Config) ->
unique_phash2, TMarkUniformFullRange, OverheadUniformFullRange),
_ =
measure_1(
- fun (State) -> half_range(State) bsl 1 end,
+ fun (State) ->
+ rand:seed(State),
+ half_range(State) bsl 1
+ end,
fun (State, Range, Mod) ->
measure_loop(
fun (St) ->
@@ -1377,6 +1414,26 @@ do_measure(_Config) ->
State)
end,
procdict, TMarkUniformFullRange, OverheadUniformFullRange),
+ _ =
+ measure_1(
+ fun (State) ->
+ _ = put(lcg35_procdict, State),
+ 1 bsl 35
+ end,
+ fun (State, Range, _) ->
+ measure_loop(
+ fun (State0) ->
+ case
+ put(lcg35_procdict,
+ rand:lcg35(get(lcg35_procdict)))
+ of
+ R when is_integer(R), 0 =< R, R < Range ->
+ State0
+ end
+ end,
+ State)
+ end,
+ lcg35_procdict, TMarkUniformFullRange, OverheadUniformFullRange),
%%
ct:pal("~nRNG uniform integer full range + 1 performance~n",[]),
_ =
@@ -1629,23 +1686,28 @@ measure_1(RangeFun, Fun, Alg, TMark, Overhead) ->
{?MODULE, <<>>};
unique_phash2 ->
{?MODULE, ignored_state};
+ system_time_rem ->
+ {?MODULE, ignored_state};
mcg35_inline ->
{_, S} = rand:seed_s(dummy),
{?MODULE, (S rem ((1 bsl 35)-31 - 1)) + 1};
lcg35_inline ->
{_, S} = rand:seed_s(dummy),
{?MODULE, S bsr (58-35)};
+ lcg35_procdict ->
+ {_, S} = rand:seed_s(dummy),
+ {?MODULE, S bsr (58-35)};
exsp_inline ->
{_, S} = rand:seed_s(exsp),
{?MODULE, S};
procdict ->
- {rand, rand:seed(exsss)};
+ {rand, rand:seed_s(exsss)};
_ ->
{rand, rand:seed_s(Alg)}
end,
- Range = RangeFun(State),
Pid = spawn_link(
fun() ->
+ Range = RangeFun(State),
{T, ok} = timer:tc(fun () -> Fun(State, Range, Mod) end),
Time = T - Overhead,
Percent =
--
2.34.1