File 3621-ets-use-initial-salt-different-from-phash2.patch of Package erlang
From b37a99f5f45eb701602462b8bf3f423d05dca1d2 Mon Sep 17 00:00:00 2001
From: Maxim Fedorov <maximfca@gmail.com>
Date: Fri, 15 Jan 2021 18:52:17 -0800
Subject: [PATCH 1/2] ets: use initial salt different from phash2
It is convenient to use erlang:phash2 to determine frag (ETS table)
to use. If salt is the same, both phash2 and ets hashing generate
the same frag/bucket number.
Since ETS hash is not portable, there is no need to keep backwards
compatibility with previous releases.
Co-authored-by: Sverker Eriksson <sverker@erlang.org>
---
erts/emulator/beam/utils.c | 12 ++-
erts/emulator/test/map_SUITE.erl | 12 ++-
erts/emulator/test/persistent_term_SUITE.erl | 102 ++++++++++++++-----
lib/stdlib/test/ets_SUITE.erl | 13 ++-
4 files changed, 107 insertions(+), 32 deletions(-)
diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c
index afe02f8f13..7df04420da 100644
--- a/erts/emulator/beam/utils.c
+++ b/erts/emulator/beam/utils.c
@@ -2061,7 +2061,7 @@ trapping_make_hash2(Eterm term, Eterm* state_mref_write_back, Process* p)
* One IMPORTANT property must hold (for hamt).
* EVERY BIT of the term that is significant for equality (see EQ)
* MUST BE USED AS INPUT FOR THE HASH. Two different terms must always have a
- * chance of hashing different when salted: hash([Salt|A]) vs hash([Salt|B]).
+ * chance of hashing different when salted.
*
* This is why we can not use cached hash values for atoms for example.
*
@@ -2073,14 +2073,19 @@ do { /* Lightweight mixing of constant (type info) */ \
hash = (hash << 17) ^ (hash >> (32-17)); \
} while (0)
+/*
+ * Start with salt, 32-bit prime number, to avoid getting same hash as phash2
+ * which can cause bad hashing in distributed ETS tables for example.
+ */
+#define INTERNAL_HASH_SALT 3432918353U
+
Uint32
make_internal_hash(Eterm term, Uint32 salt)
{
- Uint32 hash;
+ Uint32 hash = salt ^ INTERNAL_HASH_SALT;
/* Optimization. Simple cases before declaration of estack. */
if (primary_tag(term) == TAG_PRIMARY_IMMED1) {
- hash = salt;
#if ERTS_SIZEOF_ETERM == 8
UINT32_HASH_2((Uint32)term, (Uint32)(term >> 32), HCONST);
#elif ERTS_SIZEOF_ETERM == 4
@@ -2094,7 +2099,6 @@ make_internal_hash(Eterm term, Uint32 salt)
Eterm tmp;
DECLARE_ESTACK(s);
- hash = salt;
for (;;) {
switch (primary_tag(term)) {
case TAG_PRIMARY_LIST:
diff --git a/erts/emulator/test/map_SUITE.erl b/erts/emulator/test/map_SUITE.erl
index dbf6fa58ed..a2a23d122a 100644
--- a/erts/emulator/test/map_SUITE.erl
+++ b/erts/emulator/test/map_SUITE.erl
@@ -2815,10 +2815,16 @@ hashmap_balance(KeyFun) ->
end,
{_,{MaxDiff,MaxMap}} = lists:foldl(F,
- {#{}, {0, 0}},
+ {#{}, {0, undefined}},
lists:seq(1,10000)),
- io:format("Max std dev diff ~p for map of size ~p (nodes=~p, flatsize=~p)\n",
- [MaxDiff, maps:size(MaxMap), hashmap_nodes(MaxMap), erts_debug:flat_size(MaxMap)]),
+ case MaxMap of
+ undefined ->
+ io:format("Wow, no maps below \"average\"\n", []);
+ _ ->
+ io:format("Max std dev diff ~p for map of size ~p (nodes=~p, flatsize=~p)\n",
+ [MaxDiff, maps:size(MaxMap), hashmap_nodes(MaxMap),
+ erts_debug:flat_size(MaxMap)])
+ end,
true = (MaxDiff < 6), % The probability of this line failing is about 0.000000001
% for a uniform hash. I've set the probability this "high" for now
diff --git a/lib/stdlib/test/ets_SUITE.erl b/lib/stdlib/test/ets_SUITE.erl
index a7dd2341cc..e161470f75 100644
--- a/lib/stdlib/test/ets_SUITE.erl
+++ b/lib/stdlib/test/ets_SUITE.erl
@@ -31,6 +31,7 @@
-export([match_delete3/1]).
-export([firstnext/1,firstnext_concurrent/1]).
-export([slot/1]).
+-export([hash_clash/1]).
-export([match1/1, match2/1, match_object/1, match_object2/1]).
-export([dups/1, misc1/1, safe_fixtable/1, info/1, tab2list/1]).
-export([tab2file/1, tab2file2/1, tabfile_ext1/1,
@@ -132,7 +133,7 @@ suite() ->
all() ->
[{group, new}, {group, insert}, {group, lookup},
- {group, delete}, firstnext, firstnext_concurrent, slot,
+ {group, delete}, firstnext, firstnext_concurrent, slot, hash_clash,
{group, match}, t_match_spec_run,
{group, lookup_element}, {group, misc}, {group, files},
{group, heavy}, ordered, ordered_match,
@@ -4563,6 +4564,16 @@ slot_loop(Tab,SlotNo,EltsSoFar) ->
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+hash_clash(Config) when is_list(Config) ->
+ %% ensure that erlang:phash2 and ets:slot use different hash seed
+ Tab = ets:new(tab, [set]),
+ Buckets = erlang:element(1, ets:info(Tab, stats)),
+ Phash = erlang:phash2(<<"123">>, Buckets),
+ true = ets:insert(Tab, {<<"123">>, "extra"}),
+ [] = ets:slot(Tab, Phash).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
match1(Config) when is_list(Config) ->
repeat_for_opts(fun match1_do/1).
--
2.26.2