File 2191-Implement-fast-niche-PRNGs.patch of Package erlang

From 10fd8779f5620aac9974f13235a6be7353368fe5 Mon Sep 17 00:00:00 2001
From: Raimo Niskanen <raimo@erlang.org>
Date: Mon, 14 Mar 2022 15:30:02 +0100
Subject: [PATCH 01/11] Implement fast niche PRNGs

* Add a warm-up set to measure/1
* Present measure/1 overhead better
* Measure process dictionary variant
* Remove redundant mask operations on the argument
  to splitmix64_next/1

Implement, document and measure niche PRNGs:
* splitmix64_next
* exsp_next & exsp_jump
* mcg35
* lcg35
---
 lib/stdlib/doc/src/rand.xml    | 244 ++++++++++++++++++++--
 lib/stdlib/src/rand.erl        | 169 ++++++++++++----
 lib/stdlib/test/rand_SUITE.erl | 360 +++++++++++++++++++++++++++++++--
 3 files changed, 704 insertions(+), 69 deletions(-)

diff --git a/lib/stdlib/doc/src/rand.xml b/lib/stdlib/doc/src/rand.xml
index 4a9d2d1ea4..361692f33c 100644
--- a/lib/stdlib/doc/src/rand.xml
+++ b/lib/stdlib/doc/src/rand.xml
@@ -54,7 +54,8 @@
       non-overlapping sequences for parallel computations.
       The jump functions perform calculations
       equivalent to perform a large number of repeated calls
-      for calculating new states.
+      for calculating new states, but execute in a time
+      roughly equivalent to one regular iteration per generator bit.
     </p>
 
     <p>
@@ -290,9 +291,10 @@ tests. We suggest to use a sign test to extract a random Boolean value.</pre>
       If this is a problem; to generate a boolean with these algorithms
       use something like this:
     </p>
-    <pre>(rand:uniform(16) > 8)</pre>
+    <pre>(rand:uniform(256) > 128) % -> boolean()</pre>
+    <pre>((rand:uniform(256) - 1) bsr 7) % -> 0 | 1</pre>
     <p>
-      And for a general range, with <c>N = 1</c> for <c>exrop</c>,
+      For a general range, with <c>N = 1</c> for <c>exrop</c>,
       and <c>N = 3</c> for <c>exs1024s</c>:
     </p>
     <pre>(((rand:uniform(Range bsl N) - 1) bsr N) + 1)</pre>
@@ -380,13 +382,37 @@ tests. We suggest to use a sign test to extract a random Boolean value.</pre>
       <name name="dummy_state"/>
       <desc><p>Algorithm specific internal state</p></desc>
     </datatype>
+    <datatype>
+      <name name="splitmix64_state"/>
+      <desc><p>Algorithm specific state</p></desc>
+    </datatype>
+    <datatype>
+      <name name="uint58"/>
+      <desc><p>0 .. (2^58 - 1)</p></desc>
+    </datatype>
+    <datatype>
+      <name name="uint64"/>
+      <desc><p>0 .. (2^64 - 1)</p></desc>
+    </datatype>
+    <datatype>
+      <name name="mcg35_state"/>
+      <desc><p>1 .. ((2^35 - 31) - 1)</p></desc>
+    </datatype>
+    <datatype>
+      <name name="lcg35_state"/>
+      <desc><p>0 .. (2^35 - 1)</p></desc>
+    </datatype>
   </datatypes>
 
+
   <funcs>
+    <fsdescription>
+      <title>Plug-in framework API</title>
+    </fsdescription>
     <func>
       <name name="bytes" arity="1" since="OTP 24.0"/>
       <fsummary>Return a random binary.</fsummary>
-      <desc><marker id="bytes-1"/>
+      <desc>
       <p>
         Returns, for a specified integer <c><anno>N</anno> >= 0</c>,
         a <c>binary()</c> with that number of random bytes.
@@ -400,7 +426,7 @@ tests. We suggest to use a sign test to extract a random Boolean value.</pre>
     <func>
       <name name="bytes_s" arity="2" since="OTP 24.0"/>
       <fsummary>Return a random binary.</fsummary>
-      <desc><marker id="bytes-1"/>
+      <desc>
       <p>
         Returns, for a specified integer <c><anno>N</anno> >= 0</c>
         and a state, a <c>binary()</c> with that number of random bytes,
@@ -415,7 +441,7 @@ tests. We suggest to use a sign test to extract a random Boolean value.</pre>
     <func>
       <name name="export_seed" arity="0" since="OTP 18.0"/>
       <fsummary>Export the random number generation state.</fsummary>
-      <desc><marker id="export_seed-0"/>
+      <desc>
         <p>Returns the random number state in an external format.
           To be used with <seemfa marker="#seed/1"><c>seed/1</c></seemfa>.</p>
       </desc>
@@ -424,7 +450,7 @@ tests. We suggest to use a sign test to extract a random Boolean value.</pre>
     <func>
       <name name="export_seed_s" arity="1" since="OTP 18.0"/>
       <fsummary>Export the random number generation state.</fsummary>
-      <desc><marker id="export_seed_s-1"/>
+      <desc>
         <p>Returns the random number generator state in an external format.
           To be used with <seemfa marker="#seed/1"><c>seed/1</c></seemfa>.</p>
       </desc>
@@ -434,7 +460,7 @@ tests. We suggest to use a sign test to extract a random Boolean value.</pre>
       <name name="jump" arity="0" since="OTP 20.0"/>
       <fsummary>Return the seed after performing jump calculation
           to the state in the process dictionary.</fsummary>
-      <desc><marker id="jump-0" />
+      <desc>
           <p>Returns the state
               after performing jump calculation
               to the state in the process dictionary.</p>
@@ -448,7 +474,7 @@ tests. We suggest to use a sign test to extract a random Boolean value.</pre>
     <func>
       <name name="jump" arity="1" since="OTP 20.0"/>
       <fsummary>Return the seed after performing jump calculation.</fsummary>
-      <desc><marker id="jump-1" />
+      <desc>
           <p>Returns the state after performing jump calculation
               to the given state. </p>
       <p>This function generates a <c>not_implemented</c> error exception
@@ -500,7 +526,6 @@ tests. We suggest to use a sign test to extract a random Boolean value.</pre>
       <name name="seed" arity="1" clause_i="2" since="OTP 24.0"/>
       <fsummary>Seed random number generator.</fsummary>
       <desc>
-        <marker id="seed-1"/>
         <p>
 	  Seeds random number generation with the specifed algorithm and
           time-dependent data if <c><anno>AlgOrStateOrExpState</anno></c>
@@ -575,7 +600,7 @@ tests. We suggest to use a sign test to extract a random Boolean value.</pre>
     <func>
       <name name="uniform" arity="0" since="OTP 18.0"/>
       <fsummary>Return a random float.</fsummary>
-      <desc><marker id="uniform-0"/>
+      <desc>
         <p>
 	  Returns a random float uniformly distributed in the value
           range <c>0.0 =&lt; <anno>X</anno> &lt; 1.0</c> and
@@ -611,7 +636,7 @@ end.</pre>
     <func>
       <name name="uniform_real" arity="0" since="OTP 21.0"/>
       <fsummary>Return a random float.</fsummary>
-      <desc><marker id="uniform_real-0"/>
+      <desc>
         <p>
 	  Returns a random float
 	  uniformly distributed in the value range
@@ -647,7 +672,7 @@ end.</pre>
     <func>
       <name name="uniform" arity="1" since="OTP 18.0"/>
       <fsummary>Return a random integer.</fsummary>
-      <desc><marker id="uniform-1"/>
+      <desc>
         <p>Returns, for a specified integer <c><anno>N</anno> >= 1</c>,
           a random integer uniformly distributed in the value range
           <c>1 =&lt; <anno>X</anno> =&lt; <anno>N</anno></c> and
@@ -764,4 +789,197 @@ end.</pre>
       </desc>
     </func>
   </funcs>
+
+
+  <funcs>
+    <fsdescription>
+      <title>Niche algorithms API</title>
+    </fsdescription>
+    <func>
+      <name name="splitmix64_next" arity="1" since="OTP 25.0"/>
+      <fsummary>Return a random integer and new state.</fsummary>
+      <desc>
+        <p>
+          Returns a random 64-bit integer <c><anno>X</anno></c>
+          and a new generator state <c><anno>NewAlgState</anno></c>,
+          according to the SplitMix64 algorithm.
+        </p>
+        <p>
+          This generator is used internally in the <c>rand</c>
+          module for seeding other generators since it is of a
+          quite different breed which reduces the probability for
+          creating an accidentally bad seed.
+        </p>
+      </desc>
+    </func>
+    <func>
+      <name name="exsp_next" arity="1" since="OTP 25.0"/>
+      <fsummary>Return a random integer and new state.</fsummary>
+      <desc>
+        <p>
+          Returns a random 58-bit integer <c><anno>X</anno></c>
+          and a new generator state <c><anno>NewAlgState</anno></c>,
+          according to the Xorshift116+ algorithm.
+        </p>
+        <p>
+          This is an API function into the internal implementation of the
+          <seeerl marker="#algorithms"><c>exsp</c></seeerl>
+          algorithm that enables using it without the overhead
+          of the plug-in framework, which might be useful
+          for time critial applications.
+          On a typical 64 bit Erlang VM this approach executes
+          in just above 30% (1/3) of the time
+          for the default algorithm through
+          this module's normal plug-in framework.
+        </p>
+        <p>
+          To seed this generator use
+          <seemfa marker="#seed_s/1">
+            <c>{_, <anno>AlgState</anno>} = rand:seed_s(exsp)</c>
+          </seemfa>
+          or
+          <seemfa marker="#seed_s/1">
+            <c>{_, <anno>AlgState</anno>} = rand:seed_s(exsp, Seed)</c>
+          </seemfa>
+          with a specific <c>Seed</c>.
+        </p>
+        <note>
+          <p>
+            This function offers no help in generating a number
+            on a selected range, nor in generating a floating point number.
+            It is easy to accidentally mess up the fairly good
+            statistical properties of this generator when doing either.
+            Note also the caveat about weak low bits that
+            this generator suffers from.
+            The generator is exported in this form primarily for speed.
+          </p>
+        </note>
+      </desc>
+    </func>
+    <func>
+      <name name="exsp_jump" arity="1" since="OTP 25.0"/>
+      <fsummary>Return the new state as from 2^64 iterations.</fsummary>
+      <desc>
+        <p>
+          Returns a new generator state equivalent of the state
+          after iterating over
+          <seemfa marker="#exsp_next/1"><c>exsp_next/1</c></seemfa>
+          2^64 times.
+        </p>
+        <p>
+          See the description of jump functions
+          at the top of this module description.
+        </p>
+      </desc>
+    </func>
+    <func>
+      <name name="mcg35" arity="1" since="OTP 25.0"/>
+      <fsummary>Return a new generator state / number.</fsummary>
+      <desc>
+        <p>
+          Returns a new generator state which is also
+          the generated number, <c><anno>X1</anno></c>,
+          according to a classical Multiplicative Congruential Generator
+          (a.k.a Multiplicative Linear Congruential Generator,
+          Lehmer random number generator,
+          Park-Miller random number generator).
+        </p>
+        <p>
+          This generator uses the modulus 2^35 - 31
+          and the multiplication constant 185852
+          from the paper "Tables of Linear Congruential Generators
+          of different sizes and good lattice structure" by
+          Pierre L'Ecuyer (1997) and they are selected
+          for speed to keep the computation under
+          the Erlang bignum limit.
+        </p>
+        <p>
+          The generator may be written as
+          <c><anno>X1</anno>&nbsp;=&nbsp;(185852*<anno>X0</anno>)&nbsp;rem&nbsp;((1&nbsp;bsl&nbsp;35)-31)</c>,
+          but the properties of the chosen constants has allowed
+          an optimization of the otherwise expensive <c>rem</c> operation.
+        </p>
+        <p>
+          On a typical 64 bit Erlang VM this generator executes
+          in just below 10% (1/10) of the time
+          for the default algorithm in this module.
+        </p>
+        <note>
+          <p>
+            This generator is only suitable for insensitive
+            special niche applications since it has
+            a short period (2^35 - 32),
+            few bits (under 35),
+            is not a power of 2 generator (range 1 .. (2^35 - 32)),
+            offers no help in generating numbers on a specified range,
+            etc...
+          </p>
+          <p>
+            But for pseudo random load distribution
+            and such it might be useful, since it is very fast.
+            It normally beats even the known-to-be-fast trick
+            <c>erlang:phash2(erlang:unique_integer())</c>.
+          </p>
+        </note>
+      </desc>
+    </func>
+    <func>
+      <name name="lcg35" arity="1" since="OTP 25.0"/>
+      <fsummary>Return a new generator state / number.</fsummary>
+      <desc>
+        <p>
+          Returns a new generator state which is also
+          the generated number, <c><anno>X1</anno></c>,
+          according to a classical Linear Congruential Generator,
+          a power of 2 mixed congruential generator.
+        </p>
+        <p>
+          This generator uses the modulus 2^35
+          and the multiplication constant 15319397
+          from the paper "Tables of Linear Congruential Generators
+          of different sizes and good lattice structure" by
+          Pierre L'Ecuyer (1997) and they are selected
+          for speed to keep the computation under
+          the Erlang bignum limit.
+          The addition constant has been selected to
+          15366142135 (it has to be odd) which looks
+          more interesting than simply 1.
+        </p>
+        <p>
+          The generator may be written as
+          <c><anno>X1</anno>&nbsp;=&nbsp;((15319397*<anno>X0</anno>)&nbsp;+&nbsp;15366142135)&nbsp;band&nbsp;((1&nbsp;bsl&nbsp;35)-1)</c>.
+        </p>
+        <p>
+          On a typical 64 bit Erlang VM this generator executes
+          in just below 7% (1/15) of the time
+          for the default algorithm in this module,
+          which is the fastest generator the author has seen.
+          It can hardly be beaten by even a BIF
+          implementation of any generator since the execution
+          time is close to the overhead of a BIF call.
+        </p>
+        <note>
+          <p>
+            This generator is only suitable for insensitive
+            special niche applications since it has
+            a short period (2^35), few bits (35),
+            offers no help in generating numbers on a specified range,
+            and has, among others, the known statistical artifact
+            that the lowest bit simply alternates,
+            the next to lowest has a period of 4,
+            and so on, so it is only the highest bits
+            that achieve any form of statistical quality.
+          </p>
+          <p>
+            But for pseudo random load distribution
+            and such it might be useful, since it is extremely fast.
+            The <seemfa marker="#mcg35/1"><c>mcg35/1</c></seemfa>
+            generator above has got less statistical artifacts,
+            but instead other pecularities since it is not
+            a power of 2 generator.
+          </p>
+        </note>
+      </desc>
+    </func>
+  </funcs>
 </erlref>
diff --git a/lib/stdlib/src/rand.erl b/lib/stdlib/src/rand.erl
index b2c3d4a557..d9ae09100e 100644
--- a/lib/stdlib/src/rand.erl
+++ b/lib/stdlib/src/rand.erl
@@ -36,6 +36,9 @@
          normal/0, normal/2, normal_s/1, normal_s/3
 	]).
 
+%% Utilities
+-export([exsp_next/1, exsp_jump/1, splitmix64_next/1, mcg35/1, lcg35/1]).
+
 %% Test, dev and internal
 -export([exro928_jump_2pow512/1, exro928_jump_2pow20/1,
 	 exro928_seed/1, exro928_next/1, exro928_next_state/1,
@@ -44,7 +47,7 @@
 %% Debug
 -export([make_float/3, float2str/1, bc64/1]).
 
--compile({inline, [exs64_next/1, exsplus_next/1, exsss_next/1,
+-compile({inline, [exs64_next/1, exsp_next/1, exsss_next/1,
 		   exs1024_next/1, exs1024_calc/2,
                    exro928_next_state/4,
                    exrop_next/1, exrop_next_s/2,
@@ -143,6 +146,8 @@
 -export_type(
    [exsplus_state/0, exro928_state/0, exrop_state/0, exs1024_state/0,
     exs64_state/0, dummy_state/0]).
+-export_type(
+   [uint58/0, uint64/0, splitmix64_state/0, mcg35_state/0, lcg35_state/0]).
 
 %% =====================================================================
 %% Range macro and helper
@@ -680,11 +685,11 @@ mk_alg(exs64) ->
     {#{type=>exs64, max=>?MASK(64), next=>fun exs64_next/1},
      fun exs64_seed/1};
 mk_alg(exsplus) ->
-    {#{type=>exsplus, max=>?MASK(58), next=>fun exsplus_next/1,
+    {#{type=>exsplus, max=>?MASK(58), next=>fun exsp_next/1,
        jump=>fun exsplus_jump/1},
      fun exsplus_seed/1};
 mk_alg(exsp) ->
-    {#{type=>exsp, bits=>58, weak_low_bits=>1, next=>fun exsplus_next/1,
+    {#{type=>exsp, bits=>58, weak_low_bits=>1, next=>fun exsp_next/1,
        uniform=>fun exsp_uniform/1, uniform_n=>fun exsp_uniform/2,
        jump=>fun exsplus_jump/1},
      fun exsplus_seed/1};
@@ -730,7 +735,7 @@ exs64_seed(L) when is_list(L) ->
     [R] = seed64_nz(1, L),
     R;
 exs64_seed(A) when is_integer(A) ->
-    [R] = seed64(1, ?MASK(64, A)),
+    [R] = seed64(1, A),
     R;
 %%
 %% Traditional integer triplet seed
@@ -794,15 +799,15 @@ exsplus_seed(L) when is_list(L) ->
     [S0,S1] = seed58_nz(2, L),
     [S0|S1];
 exsplus_seed(X) when is_integer(X) ->
-    [S0,S1] = seed58(2, ?MASK(64, X)),
+    [S0,S1] = seed58(2, X),
     [S0|S1];
 %%
 %% Traditional integer triplet seed
 exsplus_seed({A1, A2, A3}) ->
-    {_, R1} = exsplus_next(
+    {_, R1} = exsp_next(
                 [?MASK(58, (A1 * 4294967197) + 1)|
                  ?MASK(58, (A2 * 4294967231) + 1)]),
-    {_, R2} = exsplus_next(
+    {_, R2} = exsp_next(
                 [?MASK(58, (A3 * 4294967279) + 1)|
                  tl(R1)]),
     R2.
@@ -813,14 +818,14 @@ exsss_seed(L) when is_list(L) ->
     [S0,S1] = seed58_nz(2, L),
     [S0|S1];
 exsss_seed(X) when is_integer(X) ->
-    [S0,S1] = seed58(2, ?MASK(64, X)),
+    [S0,S1] = seed58(2, X),
     [S0|S1];
 %%
 %% Seed from traditional integer triple - mix into splitmix
 exsss_seed({A1, A2, A3}) ->
-    {_, X0} = seed58(?MASK(64, A1)),
-    {S0, X1} = seed58(?MASK(64, A2) bxor X0),
-    {S1, _} = seed58(?MASK(64, A3) bxor X1),
+    {_, X0} = seed58(A1),
+    {S0, X1} = seed58(A2 bxor X0),
+    {S1, _} = seed58(A3 bxor X1),
     [S0|S1].
 
 %% Advance Xorshift116 state one step
@@ -842,18 +847,16 @@ exsss_seed({A1, A2, A3}) ->
        ?MASK(58, V_b + ?BSL(58, V_b, 3)) % V_b * 9
    end).
 
--dialyzer({no_improper_lists, exsplus_next/1}).
 
 %% Advance state and generate 58bit unsigned integer
--spec exsplus_next(exsplus_state()) -> {uint58(), exsplus_state()}.
-exsplus_next([S1|S0]) ->
+%%
+-dialyzer({no_improper_lists, exsp_next/1}).
+-spec exsp_next(AlgState :: exsplus_state()) ->
+                       {X :: uint58(), NewAlgState :: exsplus_state()}.
+exsp_next([S1|S0]) ->
     %% Note: members s0 and s1 are swapped here
     NewS1 = ?exs_next(S0, S1, S1_1),
     {?MASK(58, S0 + NewS1), [S0|NewS1]}.
-%%    %% Note: members s0 and s1 are swapped here
-%%    S11 = S1 bxor ?BSL(58, S1, 24),
-%%    S12 = S11 bxor S0 bxor (S11 bsr 11) bxor (S0 bsr 41),
-%%    {?MASK(58, S0 + S12), [S0|S12]}.
 
 -dialyzer({no_improper_lists, exsss_next/1}).
 
@@ -862,10 +865,9 @@ exsss_next([S1|S0]) ->
     %% Note: members s0 and s1 are swapped here
     NewS1 = ?exs_next(S0, S1, S1_1),
     {?scramble_starstar(S0, V_0, V_1), [S0|NewS1]}.
-%%    {?MASK(58, S0 + NewS1), [S0|NewS1]}.
 
 exsp_uniform({AlgHandler, R0}) ->
-    {I, R1} = exsplus_next(R0),
+    {I, R1} = exsp_next(R0),
     %% Waste the lowest bit since it is of lower
     %% randomness quality than the others
     {(I bsr (58-53)) * ?TWO_POW_MINUS53, {AlgHandler, R1}}.
@@ -875,7 +877,7 @@ exsss_uniform({AlgHandler, R0}) ->
     {(I bsr (58-53)) * ?TWO_POW_MINUS53, {AlgHandler, R1}}.
 
 exsp_uniform(Range, {AlgHandler, R}) ->
-    {V, R1} = exsplus_next(R),
+    {V, R1} = exsp_next(R),
     MaxMinusRange = ?BIT(58) - Range,
     ?uniform_range(Range, AlgHandler, R1, V, MaxMinusRange, I).
 
@@ -885,7 +887,7 @@ exsss_uniform(Range, {AlgHandler, R}) ->
     ?uniform_range(Range, AlgHandler, R1, V, MaxMinusRange, I).
 
 
-%% This is the jump function for the exs* generators,
+%% This is the jump function for the exs... generators,
 %% i.e the Xorshift116 generators,  equivalent
 %% to 2^64 calls to next/1; it can be used to generate 2^52
 %% non-overlapping subsequences for parallel computations.
@@ -924,15 +926,21 @@ exsss_uniform(Range, {AlgHandler, R}) ->
 -spec exsplus_jump({alg_handler(), exsplus_state()}) ->
                           {alg_handler(), exsplus_state()}.
 exsplus_jump({AlgHandler, S}) ->
+    {AlgHandler, exsp_jump(S)}.
+
+-dialyzer({no_improper_lists, exsp_jump/1}).
+-spec exsp_jump(AlgState :: exsplus_state()) ->
+                       NewAlgState :: exsplus_state().
+exsp_jump(S) ->
     {S1, AS1} = exsplus_jump(S, [0|0], ?JUMPCONST1, ?JUMPELEMLEN),
     {_,  AS2} = exsplus_jump(S1, AS1,  ?JUMPCONST2, ?JUMPELEMLEN),
-    {AlgHandler, AS2}.
+    AS2.
 
 -dialyzer({no_improper_lists, exsplus_jump/4}).
 exsplus_jump(S, AS, _, 0) ->
     {S, AS};
 exsplus_jump(S, [AS0|AS1], J, N) ->
-    {_, NS} = exsplus_next(S),
+    {_, NS} = exsp_next(S),
     case ?MASK(1, J) of
         1 ->
             [S0|S1] = S,
@@ -952,7 +960,7 @@ exsplus_jump(S, [AS0|AS1], J, N) ->
 exs1024_seed(L) when is_list(L) ->
     {seed64_nz(16, L), []};
 exs1024_seed(X) when is_integer(X) ->
-    {seed64(16, ?MASK(64, X)), []};
+    {seed64(16, X), []};
 %%
 %% Seed from traditional triple, remain backwards compatible
 exs1024_seed({A1, A2, A3}) ->
@@ -1141,13 +1149,13 @@ exs1024_jump({L, RL}, AS, JL, J, N, TN) ->
 exro928_seed(L) when is_list(L) ->
     {seed58_nz(16, L), []};
 exro928_seed(X) when is_integer(X) ->
-    {seed58(16, ?MASK(64, X)), []};
+    {seed58(16, X), []};
 %%
 %% Seed from traditional integer triple - mix into splitmix
 exro928_seed({A1, A2, A3}) ->
-    {S0, X0} = seed58(?MASK(64, A1)),
-    {S1, X1} = seed58(?MASK(64, A2) bxor X0),
-    {S2, X2} = seed58(?MASK(64, A3) bxor X1),
+    {S0, X0} = seed58(A1),
+    {S1, X1} = seed58(A2 bxor X0),
+    {S2, X2} = seed58(A3 bxor X1),
     {[S0,S1,S2|seed58(13, X2)], []}.
 
 
@@ -1313,7 +1321,7 @@ exrop_seed(L) when is_list(L) ->
     [S0,S1] = seed58_nz(2, L),
     [S0|S1];
 exrop_seed(X) when is_integer(X) ->
-    [S0,S1] = seed58(2, ?MASK(64, X)),
+    [S0,S1] = seed58(2, X),
     [S0|S1];
 %%
 %% Traditional integer triplet seed
@@ -1382,24 +1390,28 @@ exrop_jump([S__0|S__1] = _S, S0, S1, J, Js) ->
 %% =====================================================================
 %% dummy "PRNG": Benchmark dummy overhead reference
 %%
-%% As fast as possible - return a constant; to measure overhead.
+%% As fast as possible - return something daft and update state;
+%% to measure plug-in framework overhead.
 %%
 %% =====================================================================
 
--opaque dummy_state() :: 0..?MASK(58).
+-type dummy_state() :: uint58().
 
-dummy_uniform(_Range, State) -> {1, State}.
-dummy_next(R)                -> {?BIT(57), R}.
-dummy_uniform(State)         -> {0.5, State}.
+dummy_uniform(_Range, {AlgHandler,R}) ->
+    {1, {AlgHandler,(R bxor ?MASK(58))}}. % 1 is always in Range
+dummy_next(R) ->
+    {R, R bxor ?MASK(58)}.
+dummy_uniform({AlgHandler,R}) ->
+    {0.5, {AlgHandler,(R bxor ?MASK(58))}}. % Perfect mean value
 
-%% Serious looking seed, to avoid at least seed test failure
+%% Serious looking seed, to avoid rand_SUITE seed test failure
 %%
 dummy_seed(L) when is_list(L) ->
     case L of
         [X] when is_integer(X) ->
             ?MASK(58, X);
         [X|_] when is_integer(X) ->
-            erlang:error(too_many_seed_integer);
+            erlang:error(too_many_seed_integers);
         [_|_] ->
             erlang:error(non_integer_seed)
     end;
@@ -1412,6 +1424,83 @@ dummy_seed({A1, A2, A3}) ->
     {Z3, _} = splitmix64_next(A3 bxor X2),
     ?MASK(58, Z3).
 
+
+%% =====================================================================
+%% mcg35 PRNG: Multiplicative Linear Congruential Generator
+%%
+%% Parameters by Pierre L'Ecuyer from the paper:
+%%   TABLES OF LINEAR CONGRUENTIAL GENERATORS
+%% OF DIFFERENT SIZES AND GOOD LATTICE STRUCTURE
+%%
+%% X1 = (A * X0) rem M
+%%
+%% A = 185852
+%% M = 2^B - D
+%%     B = 35, D = 31
+%%
+%% X cannot, and never will be 0.
+%% Take X1 as output value with range 1..(M-1).
+%%
+%% Use this generator for speed, not for quality.
+%% The calculation should avoid bignum operations,
+%% so A * X0 should not become a bignum.
+%% M and A has been selected accordingly.
+%% The period is M - 1 since 0 cannot occur.
+%%
+%% =====================================================================
+-define(MCG35_A, (185852)).
+-define(MCG35_B, (35)).
+-define(MCG35_D, (31)).
+-define(MCG35_M, (?BIT(?MCG35_B) - ?MCG35_D)).
+
+-type mcg35_state() :: 1..(?MCG35_M-1).
+
+-spec mcg35(X0 :: mcg35_state()) -> X1 :: mcg35_state().
+mcg35(X0) ->
+    X = ?MCG35_A * X0,
+    %% rem M = rem (2^B - D), optimization to avoid 'rem'
+    X1 = ?MASK(?MCG35_B, X) + ?MCG35_D*(X bsr ?MCG35_B),
+    if
+        X1 < ?MCG35_M -> X1;
+        true          -> X1 - ?MCG35_M
+    end.
+
+
+%% =====================================================================
+%% lcg35 PRNG: Multiplicative Linear Congruential Generator
+%%
+%% Parameters by Pierre L'Ecuyer from the paper:
+%%   TABLES OF LINEAR CONGRUENTIAL GENERATORS
+%% OF DIFFERENT SIZES AND GOOD LATTICE STRUCTURE
+%%
+%% X1 = (A * X0 + C) rem M
+%%
+%% M = 2^35, A = 15319397, C = 1
+%%
+%% Choosing C = 15366142135, which is an odd value
+%% about 2^35 / sqrt(5) gives a perhaps nicer value in
+%% the sequence after 0 (-> C).
+%%
+%% Take X1 as output value with range 1..(M-1).
+%%
+%% Use this generator for speed, not for quality.
+%% The calculation should avoid bignum operations,
+%% so A * X0 + C should not become a bignum.
+%% M, A and C has been selected accordingly.
+%% The period is M.
+%%
+%% =====================================================================
+-define(LCG35_A, (15319397)).
+-define(LCG35_C, (15366142135)). % (1 bsl 35) / sqrt(5), odd
+-define(LCG35_B, 35). % Number of bits
+
+-type lcg35_state() :: 0..?MASK(?LCG35_B).
+
+-spec lcg35(X0 :: lcg35_state()) -> X1 :: lcg35_state().
+lcg35(X0) ->
+    ?MASK(?LCG35_B, ?LCG35_A * X0 + ?LCG35_C).
+
+
 %% =====================================================================
 %% Mask and fill state list, ensure not all zeros
 %% =====================================================================
@@ -1475,6 +1564,7 @@ seed64(X_0) ->
 	    ZX
     end.
 
+%% =====================================================================
 %% The SplitMix64 generator:
 %%
 %% uint64_t splitmix64_next() {
@@ -1484,6 +1574,11 @@ seed64(X_0) ->
 %% 	return z ^ (z >> 31);
 %% }
 %%
+
+-type splitmix64_state() :: uint64().
+
+-spec splitmix64_next(AlgState :: integer()) ->
+                             {X :: uint64(), NewAlgState :: splitmix64_state()}.
 splitmix64_next(X_0) ->
     X = ?MASK(64, X_0 + 16#9e3779b97f4a7c15),
     Z_0 = ?MASK(64, (X bxor (X bsr 30)) * 16#bf58476d1ce4e5b9),
diff --git a/lib/stdlib/test/rand_SUITE.erl b/lib/stdlib/test/rand_SUITE.erl
index ff0ad72d99..da721ca900 100644
--- a/lib/stdlib/test/rand_SUITE.erl
+++ b/lib/stdlib/test/rand_SUITE.erl
@@ -35,6 +35,7 @@ all() ->
     [seed, interval_int, interval_float,
      bytes_count,
      api_eq,
+     mcg35_api, mcg35_rem, lcg35_api, exsp_next_api, exsp_jump_api,
      reference,
      {group, basic_stats},
      {group, distr_stats},
@@ -205,6 +206,90 @@ api_eq_1(S00) ->
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
+-define(MCG35_M, ((1 bsl 35) - 31)).
+
+%% Verify mcg35 behaviour
+%%
+mcg35_api(Config) when is_list(Config) ->
+    mcg35_api(1, 1000000).
+
+mcg35_api(X0, 0) ->
+    X = 30203473714,
+    {X, X} = {X0, X},
+    ok;
+mcg35_api(X0, N)
+  when is_integer(X0), 1 =< X0, X0 < ?MCG35_M ->
+    mcg35_api(rand:mcg35(X0), N - 1).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%% Verify the 'rem' optimization in mcg35
+%%
+mcg35_rem(Config) when is_list(Config) ->
+    {_, X0} = rand:seed_s(dummy),
+    mcg35_rem((X0 rem (?MCG35_M - 1)) + 1, 10000000).
+
+mcg35_rem(_X0, 0) ->
+    ok;
+mcg35_rem(X0, N) ->
+    X1 = (185852 * X0) rem ?MCG35_M,
+    {X1, X1, X0, N} = {rand:mcg35(X0), X1, X0, N},
+    mcg35_rem(X1, N - 1).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%% Verify lcg35 behaviour
+%%
+lcg35_api(Config) when is_list(Config) ->
+    lcg35_api(0, 1000000).
+
+lcg35_api(X0, 0) ->
+    X = 19243690048,
+    {X, X} = {X0, X},
+    ok;
+lcg35_api(X0, N)
+  when is_integer(X0), 0 =< X0, X0 < 1 bsl 35 ->
+    lcg35_api(rand:lcg35(X0), N - 1).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%% Verify exsp_next behaviour
+%%
+exsp_next_api(Config) when is_list(Config) ->
+    {_, AlgState} = State = rand:seed_s(exsp, 87654321),
+    exsp_next_api(State, AlgState, 1000000).
+
+exsp_next_api(_State, _AlgState, 0) ->
+    ok;
+exsp_next_api(State, AlgState, N) ->
+    {X, NewState} = rand:uniform_s(1 bsl 58, State),
+    {Y, NewAlgState} = rand:exsp_next(AlgState),
+    Y1 = Y + 1,
+    {X, X, N} = {Y1, X, N},
+    exsp_next_api(NewState, NewAlgState, N - 1).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%% Verify exsp_jump behaviour
+%%
+exsp_jump_api(Config) when is_list(Config) ->
+    {_, AlgState} = State = rand:seed_s(exsp, 12345678),
+    exsp_jump_api(State, AlgState, 10000).
+
+exsp_jump_api(_State, _AlgState, 0) ->
+    ok;
+exsp_jump_api(State, AlgState, N) ->
+    {X, NewState} = rand:uniform_s(1 bsl 58, State),
+    {Y, NewAlgState} = rand:exsp_next(AlgState),
+    Y1 = Y + 1,
+    {X, X, N} = {Y1, X, N},
+    exsp_jump_api(
+      rand:jump(NewState),
+      rand:exsp_jump(NewAlgState),
+      N - 1).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
 %% Check that uniform/1 returns values within the proper interval.
 interval_int(Config) when is_list(Config) ->
     Algs = [default|algs()],
@@ -1003,7 +1088,7 @@ do_measure(_Config) ->
         end,
     %%
     ct:pal("~nRNG uniform integer range 10000 performance~n",[]),
-    _ =
+    [TMarkUniformRange10000,OverheadUniformRange1000|_] =
         measure_1(
           fun (_) -> 10000 end,
           fun (State, Range, Mod) ->
@@ -1016,9 +1101,75 @@ do_measure(_Config) ->
                     State)
           end,
           Algs),
+    _ =
+        measure_1(
+          fun (_) -> 10000 end,
+          fun (State, Range, _) ->
+                  measure_loop(
+                    fun (State0) ->
+                            State1 = rand:mcg35(State0),
+                            %% Just a 'rem' with slightly skewed distribution
+                            case (State1 rem Range) + 1 of
+                                R when is_integer(R), 0 < R, R =< Range ->
+                                    State1
+                            end
+                    end,
+                    State)
+          end,
+          mcg35_inline, TMarkUniformRange10000, OverheadUniformRange1000),
+    _ =
+        measure_1(
+          fun (_) -> 10000 end,
+          fun (State, Range, _) ->
+                  measure_loop(
+                    fun (State0) ->
+                            State1 = rand:lcg35(State0),
+                            %% Just a 'rem' with slightly skewed distribution
+                            case (State1 rem Range) + 1 of
+                                R when is_integer(R), 0 < R, R =< Range ->
+                                    State1
+                            end
+                    end,
+                    State)
+          end,
+          lcg35_inline, TMarkUniformRange10000, OverheadUniformRange1000),
+    _ =
+        measure_1(
+          fun (_) -> 10000 end,
+          fun (State, Range, _) ->
+                  measure_loop(
+                    fun (State0) ->
+                            {V,State1} = rand:exsp_next(State0),
+                            %% Just a 'rem' with slightly skewed distribution
+                            case (V rem Range) + 1 of
+                                R when is_integer(R), 0 < R, R =< Range ->
+                                    State1
+                            end
+                    end,
+                    State)
+          end,
+          exsp_inline, TMarkUniformRange10000, OverheadUniformRange1000),
+    _ =
+        measure_1(
+          fun (_) -> 10000 end,
+          fun (State, Range, _) ->
+                  measure_loop(
+                    fun (State0) ->
+                            %% Just a 'rem' with slightly skewed distribution
+                            case
+                                erlang:phash2(erlang:unique_integer(), Range)
+                            of
+                                R
+                                  when is_integer(R), 0 =< R, R < Range ->
+                                    State0
+                            end
+                    end,
+                    State)
+          end,
+          unique_phash2, TMarkUniformRange10000, OverheadUniformRange1000),
     %%
     ct:pal("~nRNG uniform integer 32 bit performance~n",[]),
-    _ =
+    [TMarkUniform32Bit,OverheadUniform32Bit|_] =
         measure_1(
           fun (_) -> 1 bsl 32 end,
           fun (State, Range, Mod) ->
@@ -1031,6 +1182,52 @@ do_measure(_Config) ->
                     State)
           end,
           Algs),
+    _ =
+        measure_1(
+          fun (_) -> 1 bsl 32 end,
+          fun (State, Range, _) ->
+                  measure_loop(
+                    fun (State0) ->
+                            case rand:lcg35(State0) bsr (35-32) of
+                                R when is_integer(R), 0 =< R, R < Range ->
+                                    R
+                            end
+                    end,
+                    State)
+          end,
+          lcg35_inline, TMarkUniform32Bit, OverheadUniform32Bit),
+    _ =
+        measure_1(
+          fun (_) -> 1 bsl 32 end,
+          fun (State, Range, _) ->
+                  measure_loop(
+                    fun (State0) ->
+                            {V, State1} = rand:exsp_next(State0),
+                            case V bsr (58-32) of
+                                R when is_integer(R), 0 =< R, R < Range ->
+                                    State1
+                            end
+                    end,
+                    State)
+          end,
+          exsp_inline, TMarkUniform32Bit, OverheadUniform32Bit),
+    _ =
+        measure_1(
+          fun (_) -> 1 bsl 32 end,
+          fun (State, Range, _) ->
+                  measure_loop(
+                    fun (State0) ->
+                            case
+                                erlang:phash2(erlang:unique_integer(), Range)
+                            of
+                                R
+                                  when is_integer(R), 0 =< R, R < Range ->
+                                    State0
+                            end
+                    end,
+                    State)
+          end,
+          unique_phash2, TMarkUniform32Bit, OverheadUniform32Bit),
     %%
     ct:pal("~nRNG uniform integer half range performance~n",[]),
     _ =
@@ -1078,7 +1275,7 @@ do_measure(_Config) ->
           Algs),
     %%
     ct:pal("~nRNG uniform integer full range performance~n",[]),
-    [TMarkUniformFullRange|_] =
+    [TMarkUniformFullRange,OverheadUniformFullRange|_] =
         measure_1(
           fun (State) -> half_range(State) bsl 1 end,
           fun (State, Range, Mod) ->
@@ -1093,13 +1290,62 @@ do_measure(_Config) ->
           Algs ++ [dummy]),
     _ =
         measure_1(
-          fun (_) -> 0 end,
-          fun (State, _, _) ->
+          fun (_) -> ((1 bsl 35) - 31) - 1 end,
+          fun (State, Range, _) ->
+                  measure_loop(
+                    fun (State0) ->
+                            case rand:mcg35(State0) of
+                                R when is_integer(R), 1 =< R, R =< Range ->
+                                    R
+                            end
+                    end,
+                    State)
+          end,
+          mcg35_inline, TMarkUniformFullRange, OverheadUniformFullRange),
+    _ =
+        measure_1(
+          fun (_) -> 1 bsl 35 end,
+          fun (State, Range, _) ->
+                  measure_loop(
+                    fun (State0) ->
+                            case rand:lcg35(State0) of
+                                R when is_integer(R), 0 =< R, R < Range ->
+                                    R
+                            end
+                    end,
+                    State)
+          end,
+          lcg35_inline, TMarkUniformFullRange, OverheadUniformFullRange),
+    _ =
+        measure_1(
+          fun (_) -> 1 bsl 58 end,
+          fun (State, Range, _) ->
+                  measure_loop(
+                    fun (State0) ->
+                            {V, State1} = rand:exsp_next(State0),
+                            case V of
+                                V when is_integer(V), 0 =< V, V < Range ->
+                                    State1
+                            end
+                    end,
+                    State)
+          end,
+          exsp_inline, TMarkUniformFullRange, OverheadUniformFullRange),
+    _ =
+        measure_1(
+          fun (_) -> 1 bsl 27 end,
+          fun (State, Range, _) ->
                   measure_loop(
-                    fun (State0) -> State0 end,
+                    fun (State0) ->
+                            case erlang:phash2(erlang:unique_integer()) of
+                                R
+                                  when is_integer(R), 0 =< R, R < Range ->
+                                    State0
+                            end
+                    end,
                     State)
           end,
-          benchmark_dummy, TMarkUniformFullRange),
+          unique_phash2, TMarkUniformFullRange, OverheadUniformFullRange),
     _ =
         measure_1(
           fun (State) -> half_range(State) bsl 1 end,
@@ -1113,7 +1359,7 @@ do_measure(_Config) ->
                     end,
                     State)
           end,
-          procdict, TMarkUniformFullRange),
+          procdict, TMarkUniformFullRange, OverheadUniformFullRange),
     %%
     ct:pal("~nRNG uniform integer full range + 1 performance~n",[]),
     _ =
@@ -1201,7 +1447,7 @@ do_measure(_Config) ->
           end),
     %%
     ct:pal("~nRNG uniform float performance~n",[]),
-    _ =
+    [TMarkUniformFloat,OverheadUniformFloat|_] =
         measure_1(
           fun (_) -> 0 end,
           fun (State, _, Mod) ->
@@ -1212,6 +1458,53 @@ do_measure(_Config) ->
                     State)
           end,
           Algs),
+    _ =
+        measure_1(
+          fun (_) -> 0 end,
+          fun (State, _, _) ->
+                  measure_loop(
+                    fun (State0) ->
+                            State1 = rand:mcg35(State0),
+                            %% Too few bits for full mantissa
+                            case (State1 - 1) * (1/((1 bsl 35) - 31)) of
+                                R when is_float(R), 0.0 =< R, R < 1.0 ->
+                                    State1
+                            end
+                    end,
+                    State)
+          end,
+          mcg35_inline, TMarkUniformFloat, OverheadUniformFloat),
+    _ =
+        measure_1(
+          fun (_) -> 0 end,
+          fun (State, _, _) ->
+                  measure_loop(
+                    fun (State0) ->
+                            State1 = rand:lcg35(State0),
+                            %% Too few bits for full mantissa
+                            case State1 * (1/(1 bsl 35)) of
+                                R when is_float(R), 0.0 =< R, R < 1.0 ->
+                                    State1
+                            end
+                    end,
+                    State)
+          end,
+          lcg35_inline, TMarkUniformFloat, OverheadUniformFloat),
+    _ =
+        measure_1(
+          fun (_) -> 0 end,
+          fun (State, _, _) ->
+                  measure_loop(
+                    fun (State0) ->
+                            {V,State1} = rand:exsp_next(State0),
+                            case V * (1/(1 bsl 58)) of
+                                R when is_float(R), 0.0 =< R, R < 1.0 ->
+                                    State1
+                            end
+                    end,
+                    State)
+          end,
+          exsp_inline, TMarkUniformFloat, OverheadUniformFloat),
     %%
     ct:pal("~nRNG uniform_real float performance~n",[]),
     _ =
@@ -1227,7 +1520,7 @@ do_measure(_Config) ->
           Algs),
     %%
     ct:pal("~nRNG normal float performance~n",[]),
-    [TMarkNormalFloat|_] =
+    [TMarkNormalFloat, OverheadNormalFloat|_] =
         measure_1(
           fun (_) -> 0 end,
           fun (State, _, Mod) ->
@@ -1261,7 +1554,7 @@ do_measure(_Config) ->
                     end,
                     State)
           end,
-          exsss, TMarkNormalFloat),
+          exsss, TMarkNormalFloat, OverheadNormalFloat),
     ok.
 
 -define(LOOP_MEASURE, (?LOOP div 5)).
@@ -1275,14 +1568,32 @@ measure_loop(_, _, _) ->
     ok.
 
 measure_1(RangeFun, Fun, Algs) ->
-    TMark = measure_1(RangeFun, Fun, hd(Algs), undefined),
-    [TMark] ++
-        [measure_1(RangeFun, Fun, Alg, TMark) || Alg <- tl(Algs)].
+    WMark = measure_1(RangeFun, Fun, hd(Algs), warm_up, 0),
+    Overhead =
+        measure_1(
+          fun (_) -> 1 end,
+          fun (State, Range, _) ->
+                  measure_loop(
+                    fun (State0)
+                          when
+                              is_integer(State0),
+                              1 =< State0,
+                              State0 =< Range ->
+                            State0
+                    end,
+                    State)
+          end,
+          overhead, WMark, 0),
+    TMark = measure_1(RangeFun, Fun, hd(Algs), undefined, Overhead),
+    [TMark,Overhead] ++
+        [measure_1(RangeFun, Fun, Alg, TMark, Overhead) || Alg <- tl(Algs)].
 
-measure_1(RangeFun, Fun, Alg, TMark) ->
+measure_1(RangeFun, Fun, Alg, TMark, Overhead) ->
     Parent = self(),
     {Mod, State} =
         case Alg of
+            overhead ->
+                {?MODULE, 1};
             crypto64 ->
                 {rand, crypto64_seed()};
             crypto_cache ->
@@ -1299,21 +1610,32 @@ measure_1(RangeFun, Fun, Alg, TMark) ->
                 {?MODULE, ignored_state};
             crypto_bytes_cached ->
                 {?MODULE, <<>>};
-            benchmark_dummy ->
+            unique_phash2 ->
                 {?MODULE, ignored_state};
+            mcg35_inline ->
+                {_, S} = rand:seed_s(dummy),
+                {?MODULE, (S rem ((1 bsl 35)-31 - 1)) + 1};
+            lcg35_inline ->
+                {_, S} = rand:seed_s(dummy),
+                {?MODULE, S bsr (58-35)};
+            exsp_inline ->
+                {_, S} = rand:seed_s(exsp),
+                {?MODULE, S};
             procdict ->
-                {rand, rand:seed(default)};
+                {rand, rand:seed(exsss)};
             _ ->
                 {rand, rand:seed_s(Alg)}
         end,
     Range = RangeFun(State),
     Pid = spawn_link(
             fun() ->
-                    {Time, ok} = timer:tc(fun () -> Fun(State, Range, Mod) end),
+                    {T, ok} = timer:tc(fun () -> Fun(State, Range, Mod) end),
+                    Time = T - Overhead,
                     Percent =
                         case TMark of
+                            warm_up   -> TMark;
                             undefined -> 100;
-                            _ -> (Time * 100 + 50) div TMark
+                            _         -> (Time * 100 + 50) div TMark
                         end,
                     io:format(
                       "~.20w: ~p ns ~p% [16#~.16b]~n",
-- 
2.34.1
Places

File 2191-Implement-fast-niche-PRNGs.patch of Package erlang

Places