File 2956-Tweak-mwc59-generator-constants.patch of Package erlang

From f27eafccf2b372cf61f3a1fb4d607b6d69569a41 Mon Sep 17 00:00:00 2001
From: Raimo Niskanen <raimo@erlang.org>
Date: Mon, 9 May 2022 15:51:46 +0200
Subject: [PATCH 6/6] Tweak mwc59 generator constants

After thorough testing by Sebastiano Vigna we decided on the
scramblers Xorshift 8 as a fast scrambler with still some
small remaining problems in 2- and 3-dimensional collision
and birthday spacings tests, and Xorshift 4 followed by
Xorshift 27 as a thorough scrambler that passes extensive
PRNG tests.
---
 lib/stdlib/doc/src/rand.xml    | 69 ++++++++++++++++++++++------------
 lib/stdlib/src/rand.erl        | 31 +++++++--------
 lib/stdlib/test/rand_SUITE.erl | 48 +++++++++++------------
 3 files changed, 81 insertions(+), 67 deletions(-)

diff --git a/lib/stdlib/doc/src/rand.xml b/lib/stdlib/doc/src/rand.xml
index 0465ee21e1..25dec14a51 100644
--- a/lib/stdlib/doc/src/rand.xml
+++ b/lib/stdlib/doc/src/rand.xml
@@ -909,29 +909,49 @@ end.</pre>
           <c><anno>CX1</anno>&nbsp;=&nbsp;16#7fa6502&nbsp;*&nbsp;X&nbsp;+&nbsp;C</c>
         </p>
         <p>
-          The quality of the generated bits is best in the low end;
-          the low 16 bits of the generator state
-          <c><anno>CX1</anno></c> can be used without any scrambling.
-          <seemfa marker="#mwc59_value/1"><c>mwc59_value</c></seemfa>
-          is a fast scrambler that gives 32 high quality bits.
-          With the slightly slower
-          <seemfa marker="#mwc59_full_value/1">
-            <c>mwc59_full_value</c>
+          Because the generator uses a multiplier that is
+          a power of 2 it gets statistical flaws for collision tests
+          and birthday spacings tests in 2 and 3 dimensions,
+          so the state should be scrambled, to create
+          an output value.
+        </p><p>
+          The quality of the state bits <c><anno>CX1</anno></c>
+          as a random value is far from good, but if speed is
+          much more important than these imperfections, the lowest
+          say 16 bits of the generator state could be used
+          without scrambling.
+        </p>
+        <p>
+          Function
+          <seemfa marker="#mwc59_fast_value/1">
+            <c>mwc59_fast_value</c>
           </seemfa>
-          all 58 returned bits are of good quality, and
+          is a fast scrambler that gives 32 decent bits, but still
+          some problems in 2- and 3-dimensional collision tests
+          show through.
+          The slightly slower
+          <seemfa marker="#mwc59_value/1">
+            <c>mwc59_value</c>
+          </seemfa>
+          scrambler returns 58 bits of very good quality, and
           <seemfa marker="#mwc59_float/1"><c>mwc59_float</c></seemfa>
-          returns a good quality <c>float()</c>.
+          returns a <c>float()</c> of very good quality.
         </p>
         <p>
           On a typical 64 bit Erlang VM this generator executes
           in below 8% (1/13) of the time
-          for the default algorithm in this module.
-          Adding the
-          <seemfa marker="#mwc59_value/1"><c>mwc59_value</c></seemfa>
+          for the default algorithm in the
+          <seeerl marker="#plug_in_api">
+            plug-in framework API
+          </seeerl>
+          of this module.  With the
+          <seemfa marker="#mwc59_fast_value/1">
+            <c>mwc59_fast_value</c>
+          </seemfa>
           scrambler the total time becomes 16% (1/6),
           and with
-          <seemfa marker="#mwc59_full_value/1">
-            <c>mwc59_full_value</c>
+          <seemfa marker="#mwc59_value/1">
+            <c>mwc59_value</c>
           </seemfa>
           it becomes 20% (1/5) of the time for the default algorithm.
           With
@@ -945,9 +965,8 @@ end.</pre>
             applications.  It has a much shorter period
             than the default generator, which in itself
             is a quality concern, although when used with the
-            value scramblers it passes strict PRNG tests
-            (most of TestU01 BigCrush and PractRand 2 TB).
-            The generator is significantly faster than
+            value scramblers it passes strict PRNG tests.
+            The generator is much faster than
             <seemfa marker="#exsp_next/1"><c>exsp_next/1</c></seemfa>
             but with a bit lower quality.
           </p>
@@ -955,34 +974,34 @@ end.</pre>
       </desc>
     </func>
     <func>
-      <name name="mwc59_value" arity="1" since="OTP 25.0"/>
+      <name name="mwc59_fast_value" arity="1" since="OTP 25.0"/>
       <fsummary>Return the generator value.</fsummary>
       <desc>
         <p>
           Returns a 58-bit value <c><anno>V</anno></c>
           from a generator state <c><anno>CX</anno></c>.
           The generator state is scrambled using
-          a 10-bit xorshift which masks
+          an 8-bit xorshift which masks
           the statistical imperfecions of the base generator
           <seemfa marker="#mwc59/1"><c>mwc59</c></seemfa>
-          enough that the 32 lowest bits are of good quality.
+          enough that the 32 lowest bits are of decent quality.
           It is not recommended to generate numbers
           in a range > 2^32 with this function.
         </p>
       </desc>
     </func>
     <func>
-      <name name="mwc59_full_value" arity="1" since="OTP 25.0"/>
+      <name name="mwc59_value" arity="1" since="OTP 25.0"/>
       <fsummary>Return the generator value.</fsummary>
       <desc>
         <p>
           Returns a 58-bit value <c><anno>V</anno></c>
           from a generator state <c><anno>CX</anno></c>.
           The generator state is scrambled using
-          an 8-bit followed by a 16-bit xorshift which masks
+          an 4-bit followed by a 27-bit xorshift, which masks
           the statistical imperfecions of the base generator
           <seemfa marker="#mwc59/1"><c>mwc59</c></seemfa>
-          enough that all 58 bits are of good quality.
+          enough that all 58 bits are of very good quality.
           Beware of bias in the generated numbers if
           generating on a non power of 2 range too close to 2^58.
         </p>
@@ -998,7 +1017,7 @@ end.</pre>
           as a <c>float()</c>.
           The generator state is scrambled as with
           <seemfa marker="#mwc59_value/1">
-            <c>mwc59_full_value/1</c>
+            <c>mwc59_value/1</c>
           </seemfa>
           before converted to a <c>float()</c>.
         </p>
diff --git a/lib/stdlib/src/rand.erl b/lib/stdlib/src/rand.erl
index 47766f2547..6c85a79323 100644
--- a/lib/stdlib/src/rand.erl
+++ b/lib/stdlib/src/rand.erl
@@ -38,7 +38,7 @@
 
 %% Utilities
 -export([exsp_next/1, exsp_jump/1, splitmix64_next/1,
-         mwc59/1, mwc59_value/1, mwc59_full_value/1, mwc59_float/1,
+         mwc59/1, mwc59_fast_value/1, mwc59_value/1, mwc59_float/1,
          mwc59_seed/0, mwc59_seed/1]).
 
 %% Test, dev and internal
@@ -1478,8 +1478,8 @@ dummy_seed({A1, A2, A3}) ->
 %% The chosen parameters are:
 %% A = 16#7fa6502
 %% B = 32
-%% Single Xorshift: 16
-%% Double Xorshift: 8, 16
+%% Single Xorshift: 8
+%% Double Xorshift: 4, 27
 %%
 %% These parameters gives the MWC "digit" size 32 bits
 %% which gives them theoretical statistical guarantees,
@@ -1490,19 +1490,13 @@ dummy_seed({A1, A2, A3}) ->
 %% be masked or rem:ed out.
 %%
 %% =====================================================================
-%%% -define(MWC_A, (6)).
-%%% -define(MWC_B, (3)).
-
-%%% -define(MWC59_A, (16#20075dc0)). % 16#1ffb0729
-%%% -define(MWC59_B, (29)).
-
--define(MWC59_A, (16#7fa6502)). % 16#7f17555 16#3f35301
+-define(MWC59_A, (16#7fa6502)).
 -define(MWC59_B, (32)).
 -define(MWC59_P, ((?MWC59_A bsl ?MWC59_B) - 1)).
 
--define(MWC59_XS, 16).
--define(MWC59_XS1, 8).
--define(MWC59_XS2, 16).
+-define(MWC59_XS, 8).
+-define(MWC59_XS1, 4).
+-define(MWC59_XS2, 27).
 
 -type mwc59_state() :: 1..?MWC59_P-1.
 
@@ -1513,13 +1507,13 @@ mwc59(CX0) -> % when is_integer(CX0), 1 =< CX0, CX0 < ?MWC59_P ->
     X = ?MASK(?MWC59_B, CX),
     ?MWC59_A * X + C.
 
--spec mwc59_value(CX :: mwc59_state()) -> V :: 0..?MASK(58).
-mwc59_value(CX1) -> % when is_integer(CX0), 1 =< CX0, CX0 < ?MWC59_P ->
+-spec mwc59_fast_value(CX :: mwc59_state()) -> V :: 0..?MASK(58).
+mwc59_fast_value(CX1) -> % when is_integer(CX1), 1 =< CX1, CX1 < ?MWC59_P ->
     CX = ?MASK(58, CX1),
     CX bxor ?BSL(58, CX, ?MWC59_XS).
 
--spec mwc59_full_value(CX :: mwc59_state()) -> V :: 0..?MASK(58).
-mwc59_full_value(CX1) -> % when is_integer(CX0), 1 =< CX0, CX0 < ?MWC59_P ->
+-spec mwc59_value(CX :: mwc59_state()) -> V :: 0..?MASK(58).
+mwc59_value(CX1) -> % when is_integer(CX1), 1 =< CX1, CX1 < ?MWC59_P ->
     CX = ?MASK(58, CX1),
     CX2 = CX bxor ?BSL(58, CX, ?MWC59_XS1),
     CX2 bxor ?BSL(58, CX2, ?MWC59_XS2).
@@ -1528,7 +1522,8 @@ mwc59_full_value(CX1) -> % when is_integer(CX0), 1 =< CX0, CX0 < ?MWC59_P ->
 mwc59_float(CX1) ->
     CX = ?MASK(53, CX1),
     CX2 = CX bxor ?BSL(53, CX, ?MWC59_XS1),
-    (CX2 bxor ?BSL(53, CX2, ?MWC59_XS2)) * ?TWO_POW_MINUS53.
+    CX3 = CX2 bxor ?BSL(53, CX2, ?MWC59_XS2),
+    CX3 * ?TWO_POW_MINUS53.
 
 -spec mwc59_seed() -> CX :: mwc59_state().
 mwc59_seed() ->
diff --git a/lib/stdlib/test/rand_SUITE.erl b/lib/stdlib/test/rand_SUITE.erl
index eabb9396ad..c03a687b3a 100644
--- a/lib/stdlib/test/rand_SUITE.erl
+++ b/lib/stdlib/test/rand_SUITE.erl
@@ -216,11 +216,11 @@ mwc59_api(Config) when is_list(Config) ->
 mwc59_api(CX0, 0) ->
     CX = 216355295181821136,
     {CX, CX} = {CX0, CX},
-    V0 = rand:mwc59_value(CX0),
-    V = 215617979550160080,
+    V0 = rand:mwc59_fast_value(CX0),
+    V = 262716604851324112,
     {V, V} = {V0, V},
-    W0 = rand:mwc59_full_value(CX0),
-    W = 70209996550472912,
+    W0 = rand:mwc59_value(CX0),
+    W = 240206843777063376,
     {W, W} = {W0, W},
     F0 = rand:mwc59_float(CX0),
     F = (W band ((1 bsl 53) - 1)) * (1 / (1 bsl 53)),
@@ -228,8 +228,8 @@ mwc59_api(CX0, 0) ->
     ok;
 mwc59_api(CX, N)
   when is_integer(CX), 1 =< CX, CX < (16#7fa6502 bsl 32) - 1 ->
-    V = rand:mwc59_value(CX),
-    W = rand:mwc59_full_value(CX),
+    V = rand:mwc59_fast_value(CX),
+    W = rand:mwc59_value(CX),
     F = rand:mwc59_float(CX),
     true = 0 =< V,
     true = V < 1 bsl 58,
@@ -1129,13 +1129,13 @@ do_measure(Iterations) ->
                   fun (St0) ->
                           St1 = rand:mwc59(St0),
                           %% Just a 'rem' with slightly skewed distribution
-                            case (rand:mwc59_value(St1) rem Range) + 1 of
+                            case (rand:mwc59_fast_value(St1) rem Range) + 1 of
                                 R when is_integer(R), 0 < R, R =< Range ->
                                     St1
                             end
                   end
           end,
-          {mwc59,value_mod}, Iterations,
+          {mwc59,fast_mod}, Iterations,
           TMarkUniformRange10000, OverheadUniformRange1000),
     _ =
         measure_1(
@@ -1144,13 +1144,13 @@ do_measure(Iterations) ->
                   fun (St0) ->
                           St1 = rand:mwc59(St0),
                           %% Just a 'rem' with slightly skewed distribution
-                            case (rand:mwc59_full_value(St1) rem Range) + 1 of
+                            case (rand:mwc59_value(St1) rem Range) + 1 of
                                 R when is_integer(R), 0 < R, R =< Range ->
                                     St1
                             end
                   end
           end,
-          {mwc59,full_mod}, Iterations,
+          {mwc59,value_mod}, Iterations,
           TMarkUniformRange10000, OverheadUniformRange1000),
     _ =
         measure_1(
@@ -1231,13 +1231,13 @@ do_measure(Iterations) ->
                   Range = 1 bsl 32,
                   fun (St0) ->
                           St1 = rand:mwc59(St0),
-                          case rand:mwc59_value(St1) band ((1 bsl 32)-1) of
+                          case rand:mwc59_fast_value(St1) band ((1 bsl 32)-1) of
                               R when is_integer(R), 0 =< R, R < Range ->
                                   St1
                           end
                   end
           end,
-          {mwc59,value_mask}, Iterations,
+          {mwc59,fast_mask}, Iterations,
           TMarkUniform32Bit, OverheadUniform32Bit),
     _ =
         measure_1(
@@ -1246,7 +1246,7 @@ do_measure(Iterations) ->
                   fun (St0) ->
                           St1 = rand:mwc59(St0),
                           case
-                              rand:mwc59_full_value(St1) band
+                              rand:mwc59_value(St1) band
                               ((1 bsl 32)-1)
                           of
                               R when is_integer(R), 0 =< R, R < Range ->
@@ -1254,7 +1254,7 @@ do_measure(Iterations) ->
                           end
                   end
           end,
-          {mwc59,full_mask}, Iterations,
+          {mwc59,value_mask}, Iterations,
           TMarkUniform32Bit, OverheadUniform32Bit),
     _ =
         measure_1(
@@ -1372,14 +1372,14 @@ do_measure(Iterations) ->
                   Range = 1 bsl 58,
                   fun (St0) ->
                           St1 = rand:mwc59(St0),
-                          V = rand:mwc59_value(St1),
+                          V = rand:mwc59_fast_value(St1),
                           if
                               is_integer(V), 0 =< V, V < Range ->
                                   St1
                           end
                   end
           end,
-          {mwc59,value}, Iterations,
+          {mwc59,fast}, Iterations,
           TMarkUniformFullRange, OverheadUniformFullRange),
     _ =
         measure_1(
@@ -1387,14 +1387,14 @@ do_measure(Iterations) ->
                   Range = 1 bsl 58,
                   fun (St0) ->
                           St1 = rand:mwc59(St0),
-                          V = rand:mwc59_full_value(St1),
+                          V = rand:mwc59_value(St1),
                           if
                               is_integer(V), 0 =< V, V < Range ->
                                   St1
                           end
                   end
           end,
-          {mwc59,full_value}, Iterations,
+          {mwc59,value}, Iterations,
           TMarkUniformFullRange, OverheadUniformFullRange),
     _ =
         measure_1(
@@ -1791,19 +1791,19 @@ mwc59_bytes(N, R0, Bin) when is_integer(N), 7*4 =< N ->
     R2 = rand:mwc59(R1),
     R3 = rand:mwc59(R2),
     R4 = rand:mwc59(R3),
-    V1 = rand:mwc59_value(R1),
-    V2 = rand:mwc59_value(R2),
-    V3 = rand:mwc59_value(R3),
-    V4 = rand:mwc59_value(R4),
+    V1 = rand:mwc59_fast_value(R1),
+    V2 = rand:mwc59_fast_value(R2),
+    V3 = rand:mwc59_fast_value(R3),
+    V4 = rand:mwc59_fast_value(R4),
     mwc59_bytes(N-7*4, R4, <<Bin/binary, V1:56, V2:56, V3:56, V4:56>>);
 mwc59_bytes(N, R0, Bin) when is_integer(N), 7 =< N ->
     R1 = rand:mwc59(R0),
-    V = rand:mwc59_value(R1),
+    V = rand:mwc59_fast_value(R1),
     mwc59_bytes(N-7, R1, <<Bin/binary, V:56>>);
 mwc59_bytes(N, R0, Bin) when is_integer(N), 0 < N ->
     R1 = rand:mwc59(R0),
     Bits = N bsl 3,
-    V = rand:mwc59_value(R1),
+    V = rand:mwc59_fast_value(R1),
     {<<Bin/binary, V:Bits>>, R1};
 mwc59_bytes(0, R0, Bin) ->
     {Bin, R0}.
-- 
2.35.3

openSUSE Build Service is sponsored by