File 3621-ets-use-initial-salt-different-from-phash2.patch of Package erlang

From b37a99f5f45eb701602462b8bf3f423d05dca1d2 Mon Sep 17 00:00:00 2001
From: Maxim Fedorov <maximfca@gmail.com>
Date: Fri, 15 Jan 2021 18:52:17 -0800
Subject: [PATCH 1/2] ets: use initial salt different from phash2

It is convenient to use erlang:phash2 to determine frag (ETS table)
to use. If salt is the same, both phash2 and ets hashing generate
the same frag/bucket number.
Since ETS hash is not portable, there is no need to keep backwards
compatibility with previous releases.

Co-authored-by: Sverker Eriksson <sverker@erlang.org>
---
 erts/emulator/beam/utils.c                   |  12 ++-
 erts/emulator/test/map_SUITE.erl             |  12 ++-
 erts/emulator/test/persistent_term_SUITE.erl | 102 ++++++++++++++-----
 lib/stdlib/test/ets_SUITE.erl                |  13 ++-
 4 files changed, 107 insertions(+), 32 deletions(-)

diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c
index afe02f8f13..7df04420da 100644
--- a/erts/emulator/beam/utils.c
+++ b/erts/emulator/beam/utils.c
@@ -2061,7 +2061,7 @@ trapping_make_hash2(Eterm term, Eterm* state_mref_write_back, Process* p)
  * One IMPORTANT property must hold (for hamt).
  * EVERY BIT of the term that is significant for equality (see EQ)
  * MUST BE USED AS INPUT FOR THE HASH. Two different terms must always have a
- * chance of hashing different when salted: hash([Salt|A]) vs hash([Salt|B]).
+ * chance of hashing different when salted.
  *
  * This is why we can not use cached hash values for atoms for example.
  *
@@ -2073,14 +2073,19 @@ do {  /* Lightweight mixing of constant (type info) */  \
     hash = (hash << 17) ^ (hash >> (32-17));            \
 } while (0)
 
+/*
+ * Start with salt, 32-bit prime number, to avoid getting same hash as phash2
+ * which can cause bad hashing in distributed ETS tables for example.
+ */
+#define INTERNAL_HASH_SALT 3432918353U
+
 Uint32
 make_internal_hash(Eterm term, Uint32 salt)
 {
-    Uint32 hash;
+    Uint32 hash = salt ^ INTERNAL_HASH_SALT;
 
     /* Optimization. Simple cases before declaration of estack. */
     if (primary_tag(term) == TAG_PRIMARY_IMMED1) {
-        hash = salt;
     #if ERTS_SIZEOF_ETERM == 8
         UINT32_HASH_2((Uint32)term, (Uint32)(term >> 32), HCONST);
     #elif ERTS_SIZEOF_ETERM == 4
@@ -2094,7 +2099,6 @@ make_internal_hash(Eterm term, Uint32 salt)
     Eterm tmp;
     DECLARE_ESTACK(s);
 
-    hash = salt;
     for (;;) {
 	switch (primary_tag(term)) {
 	case TAG_PRIMARY_LIST:
diff --git a/erts/emulator/test/map_SUITE.erl b/erts/emulator/test/map_SUITE.erl
index dbf6fa58ed..a2a23d122a 100644
--- a/erts/emulator/test/map_SUITE.erl
+++ b/erts/emulator/test/map_SUITE.erl
@@ -2815,10 +2815,16 @@ hashmap_balance(KeyFun) ->
 	end,
 
     {_,{MaxDiff,MaxMap}} = lists:foldl(F,
-				       {#{}, {0, 0}},
+				       {#{}, {0, undefined}},
 				       lists:seq(1,10000)),
-    io:format("Max std dev diff ~p for map of size ~p (nodes=~p, flatsize=~p)\n",
-	      [MaxDiff, maps:size(MaxMap), hashmap_nodes(MaxMap), erts_debug:flat_size(MaxMap)]),
+    case MaxMap of
+	undefined ->
+	    io:format("Wow, no maps below \"average\"\n", []);
+	_ ->
+	    io:format("Max std dev diff ~p for map of size ~p (nodes=~p, flatsize=~p)\n",
+		      [MaxDiff, maps:size(MaxMap), hashmap_nodes(MaxMap),
+		       erts_debug:flat_size(MaxMap)])
+    end,
 
     true = (MaxDiff < 6),  % The probability of this line failing is about 0.000000001
                            % for a uniform hash. I've set the probability this "high" for now
diff --git a/lib/stdlib/test/ets_SUITE.erl b/lib/stdlib/test/ets_SUITE.erl
index a7dd2341cc..e161470f75 100644
--- a/lib/stdlib/test/ets_SUITE.erl
+++ b/lib/stdlib/test/ets_SUITE.erl
@@ -31,6 +31,7 @@
 -export([match_delete3/1]).
 -export([firstnext/1,firstnext_concurrent/1]).
 -export([slot/1]).
+-export([hash_clash/1]).
 -export([match1/1, match2/1, match_object/1, match_object2/1]).
 -export([dups/1, misc1/1, safe_fixtable/1, info/1, tab2list/1]).
 -export([tab2file/1, tab2file2/1, tabfile_ext1/1,
@@ -132,7 +133,7 @@ suite() ->
 
 all() ->
     [{group, new}, {group, insert}, {group, lookup},
-     {group, delete}, firstnext, firstnext_concurrent, slot,
+     {group, delete}, firstnext, firstnext_concurrent, slot, hash_clash,
      {group, match}, t_match_spec_run,
      {group, lookup_element}, {group, misc}, {group, files},
      {group, heavy}, ordered, ordered_match,
@@ -4563,6 +4564,16 @@ slot_loop(Tab,SlotNo,EltsSoFar) ->
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
+hash_clash(Config) when is_list(Config) ->
+    %% ensure that erlang:phash2 and ets:slot use different hash seed
+    Tab = ets:new(tab, [set]),
+    Buckets = erlang:element(1, ets:info(Tab, stats)),
+    Phash = erlang:phash2(<<"123">>, Buckets),
+    true = ets:insert(Tab, {<<"123">>, "extra"}),
+    [] = ets:slot(Tab, Phash).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
 
 match1(Config) when is_list(Config) ->
     repeat_for_opts(fun match1_do/1).
-- 
2.26.2

openSUSE Build Service is sponsored by