File 4624-erts-Change-the-default-hash-implementation-to-use-f.patch of Package erlang

From 5ad0c86aa11af73950804cadc284b2242684a231 Mon Sep 17 00:00:00 2001
From: Lukas Larsson <lukas@erlang.org>
Date: Fri, 9 Aug 2019 09:04:46 +0200
Subject: [PATCH 4/6] erts: Change the default hash implementation to use
 fibonacci hash

Fibonacci hashing is a variant of multiplicative hashing
that uses a constant based on the golden ratio.
---
 erts/emulator/beam/export.c   |  2 +-
 erts/emulator/beam/hash.c     | 96 +++++++++++++++++--------------------------
 erts/emulator/beam/hash.h     | 46 +++++++++++++++++----
 erts/emulator/beam/register.c |  4 +-
 4 files changed, 78 insertions(+), 70 deletions(-)

diff --git a/erts/emulator/beam/export.c b/erts/emulator/beam/export.c
index 946ffeffb8..c17fb436e2 100644
--- a/erts/emulator/beam/export.c
+++ b/erts/emulator/beam/export.c
@@ -218,7 +218,7 @@ erts_find_export_entry(Eterm m, Eterm f, unsigned int a, ErtsCodeIndex code_ix)
     int ix;
     HashBucket* b;
 
-    ix = hval % export_tables[code_ix].htable.size;
+    ix = hash_get_slot(&export_tables[code_ix].htable, hval);
     b = export_tables[code_ix].htable.bucket[ix];
 
     /*
diff --git a/erts/emulator/beam/hash.c b/erts/emulator/beam/hash.c
index cc816ac163..cdab53b3f2 100644
--- a/erts/emulator/beam/hash.c
+++ b/erts/emulator/beam/hash.c
@@ -29,38 +29,20 @@
 #include "sys.h"
 #include "hash.h"
 
-/*
-** List of sizes (all are primes)
-*/
-static const int h_size_table[] = {
-    2, 5, 11, 23, 47, 97, 197, 397, 797,  /* double upto here */
-    1201,   1597,
-    2411,   3203,
-    4813,   6421,
-    9643,   12853,
-    19289,  25717,
-    51437,
-    102877,
-    205759,
-    411527,
-    823117,
-    1646237,
-    3292489,
-    6584983,
-    13169977,
-    26339969,
-    52679969,
-    -1
-};
-
 /*
 ** Get info about hash
 **
 */
 
+#define MAX_SHIFT (ERTS_SIZEOF_TERM * 8)
+
+static int hash_get_slots(Hash *h) {
+    return UWORD_CONSTANT(1) << (MAX_SHIFT - h->shift);
+}
+
 void hash_get_info(HashInfo *hi, Hash *h)
 {
-    int size = h->size;
+    int size = hash_get_slots(h);
     int i;
     int max_depth = 0;
     int objects = 0;
@@ -84,7 +66,7 @@ void hash_get_info(HashInfo *hi, Hash *h)
     ASSERT(objects == h->nobjs);
 
     hi->name  = h->name;
-    hi->size  = h->size;
+    hi->size  = hash_get_slots(h);
     hi->used  = used;
     hi->objs  = h->nobjs;
     hi->depth = max_depth;
@@ -118,15 +100,15 @@ hash_table_sz(Hash *h)
   int i;
   for(i=0;h->name[i];i++);
   i++;
-  return sizeof(Hash) + h->size*sizeof(HashBucket*) + i;
+  return sizeof(Hash) + hash_get_slots(h)*sizeof(HashBucket*) + i;
 }
 
 
 static ERTS_INLINE void set_thresholds(Hash* h)
 {
-    h->grow_threshold = (8*h->size)/5;   /* grow at 160% load */
-    if (h->size_ix > h->min_size_ix)
-        h->shrink_threshold = h->size / 5;  /* shrink at 20% load */
+    h->grow_threshold = (8*hash_get_slots(h))/5;   /* grow at 160% load */
+    if (h->shift < h->max_shift)
+        h->shrink_threshold = hash_get_slots(h) / 5;  /* shrink at 20% load */
     else
         h->shrink_threshold = -1;  /* never shrink below initial size */
 }
@@ -138,29 +120,27 @@ static ERTS_INLINE void set_thresholds(Hash* h)
 Hash* hash_init(int type, Hash* h, char* name, int size, HashFunctions fun)
 {
     int sz;
-    int ix = 0;
+    int shift = 1;
 
     h->meta_alloc_type = type;
 
-    while (h_size_table[ix] != -1 && h_size_table[ix] < size)
-	ix++;
-    if (h_size_table[ix] == -1)
-	return NULL;
-
-    size = h_size_table[ix];
-    sz = size*sizeof(HashBucket*);
-
-    h->bucket = (HashBucket**) fun.meta_alloc(h->meta_alloc_type, sz);
+    while ((UWORD_CONSTANT(1) << shift) < size)
+        shift++;
 
-    memzero(h->bucket, sz);
     h->is_allocated = 0;
     h->name = name;
     h->fun = fun;
-    h->size = size;
-    h->size_ix = ix;
-    h->min_size_ix = ix;
+    h->shift = MAX_SHIFT - shift;
+    h->max_shift = h->shift;
     h->nobjs = 0;
     set_thresholds(h);
+
+    sz = hash_get_slots(h) * sizeof(HashBucket*);
+    h->bucket = (HashBucket**) fun.meta_alloc(h->meta_alloc_type, sz);
+    memzero(h->bucket, sz);
+
+    ASSERT(h->shift > 0 && h->shift < 64);
+
     return h;
 }
 
@@ -183,7 +163,7 @@ Hash* hash_new(int type, char* name, int size, HashFunctions fun)
 */
 void hash_delete(Hash* h)
 {
-    int old_size = h->size;
+    int old_size = hash_get_slots(h);
     int i;
 
     for (i = 0; i < old_size; i++) {
@@ -206,22 +186,20 @@ void hash_delete(Hash* h)
 static void rehash(Hash* h, int grow)
 {
     int sz;
-    int old_size = h->size;
+    int old_size = hash_get_slots(h);
     HashBucket** new_bucket;
     int i;
 
     if (grow) {
-	if ((h_size_table[h->size_ix+1]) == -1)
-	    return;
-	h->size_ix++;
+	h->shift--;
     }
     else {
-	if (h->size_ix == 0)
+	if (h->shift == h->max_shift)
 	    return;
-	h->size_ix--;
+	h->shift++;
     }
-    h->size = h_size_table[h->size_ix];
-    sz = h->size*sizeof(HashBucket*);
+
+    sz = hash_get_slots(h)*sizeof(HashBucket*);
 
     new_bucket = (HashBucket **) h->fun.meta_alloc(h->meta_alloc_type, sz);
     memzero(new_bucket, sz);
@@ -230,7 +208,7 @@ static void rehash(Hash* h, int grow)
 	HashBucket* b = h->bucket[i];
 	while (b != (HashBucket*) 0) {
 	    HashBucket* b_next = b->next;
-	    int ix = b->hvalue % h->size;
+	    Uint ix = hash_get_slot(h, b->hvalue);
 	    b->next = new_bucket[ix];
 	    new_bucket[ix] = b;
 	    b = b_next;
@@ -248,7 +226,7 @@ static void rehash(Hash* h, int grow)
 void* hash_get(Hash* h, void* tmpl)
 {
     HashValue hval = h->fun.hash(tmpl);
-    int ix = hval % h->size;
+    int ix = hash_get_slot(h, hval);
     HashBucket* b = h->bucket[ix];
 
     while(b != (HashBucket*) 0) {
@@ -265,7 +243,7 @@ void* hash_get(Hash* h, void* tmpl)
 void* hash_put(Hash* h, void* tmpl)
 {
     HashValue hval = h->fun.hash(tmpl);
-    int ix = hval % h->size;
+    Uint ix = hash_get_slot(h, hval);
     HashBucket* b = h->bucket[ix];
 
     while(b != (HashBucket*) 0) {
@@ -291,7 +269,7 @@ void* hash_put(Hash* h, void* tmpl)
 void* hash_erase(Hash* h, void* tmpl)
 {
     HashValue hval = h->fun.hash(tmpl);
-    int ix = hval % h->size;
+    Uint ix = hash_get_slot(h, hval);
     HashBucket* b = h->bucket[ix];
     HashBucket* prev = 0;
 
@@ -323,7 +301,7 @@ void *
 hash_remove(Hash *h, void *tmpl)
 {
     HashValue hval = h->fun.hash(tmpl);
-    int ix = hval % h->size;
+    Uint ix = hash_get_slot(h, hval);
     HashBucket *b = h->bucket[ix];
     HashBucket *prev = NULL;
 
@@ -347,7 +325,7 @@ void hash_foreach(Hash* h, HFOREACH_FUN func, void *func_arg2)
 {
     int i;
 
-    for (i = 0; i < h->size; i++) {
+    for (i = 0; i < hash_get_slots(h); i++) {
 	HashBucket* b = h->bucket[i];
 	while(b != (HashBucket*) 0) {
 	    (*func)((void *) b, func_arg2);
diff --git a/erts/emulator/beam/hash.h b/erts/emulator/beam/hash.h
index 38b401548a..cbd75f3025 100644
--- a/erts/emulator/beam/hash.h
+++ b/erts/emulator/beam/hash.h
@@ -18,16 +18,16 @@
  * %CopyrightEnd%
  */
 
-/*
-** General hash functions
-**
-*/
+/**
+ * General hash functions
+ *
+ **/
 #ifndef __HASH_H__
 #define __HASH_H__
 
 #include "sys.h"
 
-typedef unsigned long HashValue;
+typedef UWord HashValue;
 typedef struct hash Hash;
 
 typedef int (*HCMP_FUN)(void*, void*);
@@ -76,11 +76,10 @@ struct hash
     int is_allocated;    /* 0 iff hash structure is on stack or is static */
     int meta_alloc_type; /* argument to pass to meta_alloc and meta_free */
     char* name;          /* Table name (static string, for debugging) */
-    int size;		 /* Number of slots */
+    int shift;		 /* How much to shift the hash value */
+    int max_shift;       /* Never shift more than this value */
     int shrink_threshold;
     int grow_threshold;
-    int size_ix;         /* Size index in size table */
-    int min_size_ix;     /* Never shrink table smaller than this */
     int nobjs;		 /* Number of objects in table */
     HashBucket** bucket; /* Vector of bucket pointers (objects) */
 };
@@ -99,4 +98,35 @@ void* hash_erase(Hash*, void*);
 void* hash_remove(Hash*, void*);
 void  hash_foreach(Hash*, HFOREACH_FUN, void *);
 
+ERTS_GLB_INLINE Uint hash_get_slot(Hash *h, HashValue hv);
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+ERTS_GLB_INLINE Uint
+hash_get_slot(Hash *h, HashValue hv)
+{
+    /* This slot mapping function uses fibonacci hashing in order to
+     * protect itself against a very bad hash function. This is not
+     * a hash function, so the user of hash.h should still spend time
+     * to figure out a good hash function for its data.
+     *
+     * See https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
+     * for some thoughts and ideas about fibonacci hashing.
+     */
+
+    /* This is not strictly part of the fibonacci hashing algorithm
+     * but it does help to spread the values of the mapping function better.
+     */
+    hv ^= hv >> h->shift;
+#ifdef ARCH_64
+    /* 2^64 / 1.61803398875 = 11400714819323198485.... */
+    return (UWORD_CONSTANT(11400714819323198485) * hv) >> h->shift;
+#else
+    /* 2^32 / 1.61803398875 = 2654435769.... */
+    return (UWORD_CONSTANT(2654435769) * hv) >> h->shift;
+#endif
+}
+
+#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */
+
 #endif
diff --git a/erts/emulator/beam/register.c b/erts/emulator/beam/register.c
index d69632d8c8..edf1e5b0cb 100644
--- a/erts/emulator/beam/register.c
+++ b/erts/emulator/beam/register.c
@@ -279,7 +279,7 @@ erts_whereis_name_to_id(Process *c_p, Eterm name)
         erts_proc_lock(c_p, ERTS_PROC_LOCK_MAIN);
 
     hval = REG_HASH(name);
-    ix = hval % process_reg.size;
+    ix = hash_get_slot(&process_reg, hval);
     b = process_reg.bucket[ix];
 
     /*
@@ -343,7 +343,7 @@ erts_whereis_name(Process *c_p,
      */
 
     hval = REG_HASH(name);
-    ix = hval % process_reg.size;
+    ix = hash_get_slot(&process_reg, hval);
     b = process_reg.bucket[ix];
 
     /*
-- 
2.16.4

openSUSE Build Service is sponsored by