File 2382-erts-Stop-overestimating-external-format-size.patch of Package erlang

From 49024e83a2377a15f651fa95d39c43e71834a54a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?John=20H=C3=B6gberg?= <john@erlang.org>
Date: Fri, 3 Nov 2023 13:45:18 +0100
Subject: [PATCH 2/3] erts: Stop overestimating external format size

Debugging differences between the calculated and actual size is no
fun at all when certain kinds of data is overestimated, as a simple
`ASSERT(after_encode <= &before_encode[size])` check will often
succeed when it shouldn't (e.g. if part of one object is
overestimated while another part is underestimated).

This commit fixes the differences that I've noticed, and adds an
assertion that the encoded size should be exactly equal to the
calculated size so that no new differences can fly under the radar
from now on.
---
 erts/emulator/beam/erl_db_util.c | 10 +++-
 erts/emulator/beam/external.c    | 84 +++++++++++++++++++++++++-------
 2 files changed, 74 insertions(+), 20 deletions(-)

diff --git a/erts/emulator/beam/erl_db_util.c b/erts/emulator/beam/erl_db_util.c
index 3524866647..160cf7b0f4 100644
--- a/erts/emulator/beam/erl_db_util.c
+++ b/erts/emulator/beam/erl_db_util.c
@@ -3384,8 +3384,14 @@ static void* copy_to_comp(int keypos, Eterm obj, DbTerm* dest,
 		tpl[i] = src[i];
 	    }
 	    else {
-		tpl[i] = ext2elem(tpl, top.cp);
-		top.cp = erts_encode_ext_ets(src[i], top.cp, &dest->first_oh);
+#ifdef DEBUG
+                Uint encoded_size = erts_encode_ext_size_ets(src[i]);
+                byte *orig_cp = top.cp;
+#endif
+                tpl[i] = ext2elem(tpl, top.cp);
+
+                top.cp = erts_encode_ext_ets(src[i], top.cp, &dest->first_oh);
+                ASSERT(top.cp == &orig_cp[encoded_size]);
 	    }
 	}
     }
diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c
index 19fec0f8df..66ef8266c3 100644
--- a/erts/emulator/beam/external.c
+++ b/erts/emulator/beam/external.c
@@ -5361,9 +5361,9 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj,
     }
 
 #define LIST_TAIL_OP ((0 << _TAG_PRIMARY_SIZE) | TAG_PRIMARY_HEADER)
-#define TERM_ARRAY_OP(N) (((N) << _TAG_PRIMARY_SIZE) | TAG_PRIMARY_HEADER)
-#define TERM_ARRAY_OP_DEC(OP) ((OP) - (1 << _TAG_PRIMARY_SIZE))
-
+#define HASHMAP_NODE_OP ((1 << _TAG_PRIMARY_SIZE) | TAG_PRIMARY_HEADER)
+#define TERM_ARRAY_OP(N) (((N) << _HEADER_ARITY_OFFS) | TAG_PRIMARY_HEADER)
+#define TERM_ARRAY_OP_DEC(OP) ((OP) - (1 << _HEADER_ARITY_OFFS))
 
     for (;;) {
 	ASSERT(!is_header(obj));
@@ -5513,18 +5513,20 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj,
                     erts_exit(ERTS_ERROR_EXIT, "bad header\r\n");
 		}
 
-		ptr++;
-		WSTACK_RESERVE(s, node_sz*2);
-		while(node_sz--) {
-                    if (is_list(*ptr)) {
-			WSTACK_FAST_PUSH(s, CAR(list_val(*ptr)));
-			WSTACK_FAST_PUSH(s, CDR(list_val(*ptr)));
+                ptr++;
+                WSTACK_RESERVE(s, node_sz * 2);
+                while(node_sz--) {
+                    Eterm node = *ptr++;
+
+                    if (is_list(node) || is_tuple(node)) {
+                        WSTACK_FAST_PUSH(s, (UWord)node);
+                        WSTACK_FAST_PUSH(s, (UWord)HASHMAP_NODE_OP);
                     } else {
-			WSTACK_FAST_PUSH(s, *ptr);
-		    }
-		    ptr++;
-		}
-	    }
+                        ASSERT(is_map(node));
+                        WSTACK_FAST_PUSH(s, node);
+                    }
+                }
+            }
 	    break;
 	case FLOAT_DEF:
 	    if (dflags & DFLAG_NEW_FLOATS) {
@@ -5625,15 +5627,24 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj,
                 ErlFunThing *funp = (ErlFunThing *) fun_val(obj);
 
                 if (is_local_fun(funp)) {
-                    result += 20+1+1+4;	/* New ID + Tag */
-                    result += 4; /* Length field (number of free variables */
+                    ErlFunEntry *fe = funp->entry.fun;
+
+                    result += 1 /* tag */
+                            + 4 /* length field (size of free variables) */
+                            + 1 /* arity */
+                            + 16 /* uniq */
+                            + 4 /* index */
+                            + 4; /* free variables */
+                    result += encode_atom_size(acmp, fe->module, dflags);
+                    result += encode_small_size(acmp, make_small(fe->old_index), dflags);
+                    result += encode_small_size(acmp, make_small(fe->old_uniq), dflags);
                     result += encode_pid_size(acmp, erts_init_process_id, dflags);
-                    result += encode_atom_size(acmp, funp->entry.fun->module, dflags);
-                    result += 2 * (1+4);	/* Index, Uniq */
+
                     if (fun_num_free(funp) > 1) {
                         WSTACK_PUSH2(s, (UWord) (funp->env + 1),
                                     (UWord) TERM_ARRAY_OP(fun_num_free(funp)-1));
                     }
+
                     if (fun_num_free(funp) != 0) {
                         obj = funp->env[0];
                         continue; /* big loop */
@@ -5672,6 +5683,43 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj,
 		    obj = CAR(cons);
 		}
 		break;
+	    case HASHMAP_NODE_OP: {
+                Eterm *cons;
+
+                obj = (Eterm)WSTACK_POP(s);
+
+                if (is_tuple(obj)) {
+                    /* Collision node */
+                    Eterm *node_terms;
+                    Uint node_size;
+
+                    node_terms = tuple_val(obj);
+                    node_size = arityval(*node_terms);
+                    ASSERT(node_size >= 2);
+
+                    WSTACK_RESERVE(s, node_size * 2);
+                    for (Uint i = 1; i < node_size; i++) {
+                         ASSERT(is_list(node_terms[i]));
+                         WSTACK_FAST_PUSH(s, (UWord)node_terms[i]);
+                         WSTACK_FAST_PUSH(s, (UWord)HASHMAP_NODE_OP);
+                    }
+
+                    /* The last collision leaf must be handled below, or it
+                     * will be wrongly treated as a normal cons cell by the
+                     * main loop. */
+                    obj = node_terms[node_size];
+                    ASSERT(is_list(obj));
+                }
+
+                if (is_list(obj)) {
+                    /* Leaf node */
+                    cons = list_val(obj);
+
+                    WSTACK_PUSH(s, CDR(cons));
+                    obj = CAR(cons);
+                }
+                break;
+            }
 	    case TERM_ARRAY_OP(1):
 		obj = *(Eterm*)WSTACK_POP(s);
 		break;
-- 
2.35.3

openSUSE Build Service is sponsored by