File 0503-ets-use-initial-salt-different-from-phash2.patch of Package erlang
From b37a99f5f45eb701602462b8bf3f423d05dca1d2 Mon Sep 17 00:00:00 2001
From: Maxim Fedorov <maximfca@gmail.com>
Date: Fri, 15 Jan 2021 18:52:17 -0800
Subject: [PATCH 1/2] ets: use initial salt different from phash2
It is convenient to use erlang:phash2 to determine frag (ETS table)
to use. If salt is the same, both phash2 and ets hashing generate
the same frag/bucket number.
Since ETS hash is not portable, there is no need to keep backwards
compatibility with previous releases.
Co-authored-by: Sverker Eriksson <sverker@erlang.org>
---
erts/emulator/beam/utils.c | 12 ++-
erts/emulator/test/map_SUITE.erl | 12 ++-
erts/emulator/test/persistent_term_SUITE.erl | 102 ++++++++++++++-----
lib/stdlib/test/ets_SUITE.erl | 13 ++-
4 files changed, 107 insertions(+), 32 deletions(-)
diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c
index afe02f8f13..7df04420da 100644
--- a/erts/emulator/beam/utils.c
+++ b/erts/emulator/beam/utils.c
@@ -2061,7 +2061,7 @@ trapping_make_hash2(Eterm term, Eterm* state_mref_write_back, Process* p)
* One IMPORTANT property must hold (for hamt).
* EVERY BIT of the term that is significant for equality (see EQ)
* MUST BE USED AS INPUT FOR THE HASH. Two different terms must always have a
- * chance of hashing different when salted: hash([Salt|A]) vs hash([Salt|B]).
+ * chance of hashing different when salted.
*
* This is why we cannot use cached hash values for atoms for example.
*
@@ -2073,14 +2073,19 @@ do { /* Lightweight mixing of constant (type info) */ \
hash = (hash << 17) ^ (hash >> (32-17)); \
} while (0)
+/*
+ * Start with salt, 32-bit prime number, to avoid getting same hash as phash2
+ * which can cause bad hashing in distributed ETS tables for example.
+ */
+#define INTERNAL_HASH_SALT 3432918353U
+
Uint32
make_internal_hash(Eterm term, Uint32 salt)
{
- Uint32 hash;
+ Uint32 hash = salt ^ INTERNAL_HASH_SALT;
/* Optimization. Simple cases before declaration of estack. */
if (primary_tag(term) == TAG_PRIMARY_IMMED1) {
- hash = salt;
#if ERTS_SIZEOF_ETERM == 8
UINT32_HASH_2((Uint32)term, (Uint32)(term >> 32), HCONST);
#elif ERTS_SIZEOF_ETERM == 4
@@ -2094,7 +2099,6 @@ make_internal_hash(Eterm term, Uint32 salt)
Eterm tmp;
DECLARE_ESTACK(s);
- hash = salt;
for (;;) {
switch (primary_tag(term)) {
case TAG_PRIMARY_LIST:
diff --git a/erts/emulator/test/map_SUITE.erl b/erts/emulator/test/map_SUITE.erl
index dbf6fa58ed..a2a23d122a 100644
--- a/erts/emulator/test/map_SUITE.erl
+++ b/erts/emulator/test/map_SUITE.erl
@@ -2815,10 +2815,16 @@ hashmap_balance(KeyFun) ->
end,
{_,{MaxDiff,MaxMap}} = lists:foldl(F,
- {#{}, {0, 0}},
+ {#{}, {0, undefined}},
lists:seq(1,10000)),
- io:format("Max std dev diff ~p for map of size ~p (nodes=~p, flatsize=~p)\n",
- [MaxDiff, maps:size(MaxMap), hashmap_nodes(MaxMap), erts_debug:flat_size(MaxMap)]),
+ case MaxMap of
+ undefined ->
+ io:format("Wow, no maps below \"average\"\n", []);
+ _ ->
+ io:format("Max std dev diff ~p for map of size ~p (nodes=~p, flatsize=~p)\n",
+ [MaxDiff, maps:size(MaxMap), hashmap_nodes(MaxMap),
+ erts_debug:flat_size(MaxMap)])
+ end,
true = (MaxDiff < 6), % The probability of this line failing is about 0.000000001
% for a uniform hash. I've set the probability this "high" for now
diff --git a/erts/emulator/test/persistent_term_SUITE.erl b/erts/emulator/test/persistent_term_SUITE.erl
index 15dafaf2e7..e9ace3cd99 100644
--- a/erts/emulator/test/persistent_term_SUITE.erl
+++ b/erts/emulator/test/persistent_term_SUITE.erl
@@ -35,6 +35,10 @@
%%
-export([test_init_restart_cmd/1]).
+%% Test writing helper
+-export([find_colliding_keys/0]).
+
+
suite() ->
[{ct_hooks,[ts_install_cth]},
{timetrap,{minutes,10}}].
@@ -589,30 +593,28 @@ collisions_delete([], _) ->
ok.
colliding_keys() ->
- %% Collisions found by Jesper L. Andersen for breaking maps.
- L = [[764492191,2361333849],
- [49527266765044,90940896816021,20062927283041,267080852079651],
- [249858369443708,206247021789428,20287304470696,25847120931175],
- [10645228898670,224705626119556,267405565521452,258214397180678],
- [264783762221048,166955943492306,98802957003141,102012488332476],
- [69425677456944,177142907243411,137138950917722,228865047699598],
- [116031213307147,29203342183358,37406949328742,255198080174323],
- [200358182338308,235207156008390,120922906095920,116215987197289],
- [58728890318426,68877471005069,176496507286088,221041411345780],
- [91094120814795,50665258299931,256093108116737,19777509566621],
- [74646746200247,98350487270564,154448261001199,39881047281135],
- [23408943649483,164410325820923,248161749770122,274558342231648],
- [169531547115055,213630535746863,235098262267796,200508473898303],
- [235098564415817,85039146398174,51721575960328,173069189684390],
- [176136386396069,155368359051606,147817099696487,265419485459634],
- [137542881551462,40028925519736,70525669519846,63445773516557],
- [173854695142814,114282444507812,149945832627054,99605565798831],
- [177686773562184,127158716984798,132495543008547],
- [227073396444896,139667311071766,158915951283562],
- [26212438434289,94902985796531,198145776057315],
- [266279278943923,58550737262493,74297973216378],
- [32373606512065,131854353044428,184642643042326],
- [34335377662439,85341895822066,273492717750246]],
+ %% Collisions found by find_colliding_keys() below
+ L = [[77674392,148027],
+ [103370644,950908],
+ [106444046,870178],
+ [22217246,735880],
+ [18088843,694607],
+ [63426007,612179],
+ [117354942,906431],
+ [121434305,94282311,816072],
+ [118441466,93873772,783366],
+ [124338174,1414801,123089],
+ [20240282,17113486,923647],
+ [126495528,61463488,164994],
+ [125341723,5729072,445539],
+ [127450932,80442669,348245],
+ [123354692,85724182,14241288,180793],
+ [99159367,65959274,61680971,289939],
+ [107637580,104512101,62639807,181644],
+ [139547511,51654420,2062545,151944],
+ [88078274,73031465,53388204,428872],
+ [141314238,75761379,55699508,861797],
+ [88045216,59272943,21030492,180903]],
%% Verify that the keys still collide (this will fail if the
%% internal hash function has been changed).
@@ -636,6 +638,58 @@ verify_colliding_keys([]) ->
internal_hash(Term) ->
erts_debug:get_internal_state({internal_hash,Term}).
+%% Use this function to (re)generate the list in colliding_keys/0
+find_colliding_keys() ->
+ MaxCollSz = 4,
+ OfEachSz = 7,
+ erts_debug:set_internal_state(available_internal_state, true),
+ MaxInserts = 1 bsl 20,
+ T = ets:new(x, [set, private]),
+ ok = fck_loop_1(T, 1, MaxInserts, MaxCollSz, OfEachSz),
+ fck_collect(T, MaxCollSz, OfEachSz, []).
+
+fck_collect(_T, 1, _OfEachSz, Acc) ->
+ Acc;
+fck_collect(T, CollSz, OfEachSz, Acc) ->
+ {List, _} = ets:select(T,
+ [{{'$1','$2'}, [{'==',{length,'$2'},CollSz}], ['$2']}],
+ OfEachSz),
+ fck_collect(T, CollSz-1, OfEachSz, List ++ Acc).
+
+
+fck_loop_1(T, Key, 0, MaxCollSz, MaxSzLeft) ->
+ fck_loop_2(T, Key, MaxCollSz, MaxSzLeft);
+fck_loop_1(T, Key, Inserts, MaxCollSz, MaxSzLeft) ->
+ Hash = internal_hash(Key),
+ case ets:insert_new(T, {Hash, [Key]}) of
+ true ->
+ fck_loop_1(T, Key+1, Inserts-1, MaxCollSz, MaxSzLeft);
+ false ->
+ [{Hash, KeyList}] = ets:lookup(T, Hash),
+ true = ets:insert(T, {Hash, [Key | KeyList]}),
+ fck_loop_1(T, Key+1, Inserts, MaxCollSz, MaxSzLeft)
+ end.
+
+fck_loop_2(_T, _Key, _MaxCollSz, 0) ->
+ ok;
+fck_loop_2(T, Key, MaxCollSz, MaxSzLeft0) ->
+ Hash = internal_hash(Key),
+ case ets:lookup(T, Hash) of
+ [] ->
+ fck_loop_2(T, Key+1, MaxCollSz, MaxSzLeft0);
+ [{Hash, KeyList}] ->
+ true = ets:insert(T, {Hash, [Key | KeyList]}),
+ MaxSzLeft1 = case length(KeyList)+1 of
+ MaxCollSz ->
+ MaxSzLeft0 - 1;
+ _ ->
+ MaxSzLeft0
+ end,
+ fck_loop_2(T, Key+1, MaxCollSz, MaxSzLeft1)
+ end.
+
+
+
%% OTP-17700 Bug skipped refc++ of shared magic reference
shared_magic_ref(_Config) ->
Ref = atomics:new(10, []),
diff --git a/lib/stdlib/test/ets_SUITE.erl b/lib/stdlib/test/ets_SUITE.erl
index a7dd2341cc..e161470f75 100644
--- a/lib/stdlib/test/ets_SUITE.erl
+++ b/lib/stdlib/test/ets_SUITE.erl
@@ -31,6 +31,7 @@
-export([match_delete3/1]).
-export([firstnext/1,firstnext_concurrent/1]).
-export([slot/1]).
+-export([hash_clash/1]).
-export([match1/1, match2/1, match_object/1, match_object2/1]).
-export([dups/1, misc1/1, safe_fixtable/1, info/1, tab2list/1]).
-export([info_binary_stress/1]).
@@ -132,7 +133,7 @@ suite() ->
all() ->
[{group, new}, {group, insert}, {group, lookup},
- {group, delete}, firstnext, firstnext_concurrent, slot,
+ {group, delete}, firstnext, firstnext_concurrent, slot, hash_clash,
{group, match}, t_match_spec_run,
{group, lookup_element}, {group, misc}, {group, files},
{group, heavy}, {group, insert_list}, ordered, ordered_match,
@@ -4563,6 +4564,16 @@ slot_loop(Tab,SlotNo,EltsSoFar) ->
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+hash_clash(Config) when is_list(Config) ->
+ %% ensure that erlang:phash2 and ets:slot use different hash seed
+ Tab = ets:new(tab, [set]),
+ Buckets = erlang:element(1, ets:info(Tab, stats)),
+ Phash = erlang:phash2(<<"123">>, Buckets),
+ true = ets:insert(Tab, {<<"123">>, "extra"}),
+ [] = ets:slot(Tab, Phash).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
match1(Config) when is_list(Config) ->
repeat_for_opts_all_set_table_types(fun match1_do/1).
--
2.26.2