File 1443-erts-Optimize-atom_to_binary-1.patch of Package erlang
From d1fca18d8c40d6851c53f90035cf3715e330e54e Mon Sep 17 00:00:00 2001
From: Isabell Huang <isabell@erlang.org>
Date: Thu, 11 Jul 2024 18:09:19 +0200
Subject: [PATCH 3/3] erts: Optimize atom_to_binary/1
When an atom is created, we now create a binary literal of it, so that atom_to_binary returns the pre-allocated binary literal instead of a newly converted one.
---
erts/emulator/beam/atom.c | 132 +++++++++------------
erts/emulator/beam/atom.h | 11 +-
erts/emulator/beam/bif.c | 4 +-
erts/emulator/beam/dist.c | 2 +-
erts/emulator/beam/erl_bif_ddll.c | 4 +-
erts/emulator/beam/erl_bif_info.c | 2 +-
erts/emulator/beam/erl_bif_re.c | 2 +-
erts/emulator/beam/erl_db_util.c | 4 +-
erts/emulator/beam/erl_nif.c | 10 +-
erts/emulator/beam/erl_printf_term.c | 2 +-
erts/emulator/beam/erl_process_dump.c | 2 +-
erts/emulator/beam/erl_unicode.c | 58 +++++----
erts/emulator/beam/erl_utils.h | 6 +-
erts/emulator/beam/external.c | 12 +-
erts/emulator/beam/global.h | 2 +-
erts/emulator/beam/jit/beam_jit_common.cpp | 2 +-
erts/emulator/test/bif_SUITE.erl | 6 +
erts/etc/unix/etp-commands.in | 33 +++++-
18 files changed, 155 insertions(+), 139 deletions(-)
diff --git a/erts/emulator/beam/atom.c b/erts/emulator/beam/atom.c
index 036816df2f..390ca04435 100644
--- a/erts/emulator/beam/atom.c
+++ b/erts/emulator/beam/atom.c
@@ -28,6 +28,7 @@
#include "global.h"
#include "hash.h"
#include "atom.h"
+#include "erl_global_literals.h"
#define ATOM_SIZE 3000
@@ -48,19 +49,6 @@ static erts_rwmtx_t atom_table_lock;
static erts_atomic_t atom_put_ops;
#endif
-/* Functions for allocating space for the ext of atoms. We do not
- * use malloc for each atom to prevent excessive memory fragmentation
- */
-
-typedef struct _atom_text {
- struct _atom_text* next;
- unsigned char text[ATOM_TEXT_SIZE];
-} AtomText;
-
-static AtomText* text_list; /* List of text buffers */
-static byte *atom_text_pos;
-static byte *atom_text_end;
-static Uint reserved_atom_space; /* Total amount of atom text space */
static Uint atom_space; /* Amount of atom text space used */
/*
@@ -81,44 +69,8 @@ void atom_info(fmtfn_t to, void *to_arg)
atom_read_unlock();
}
-/*
- * Allocate an atom text segment.
- */
-static void
-more_atom_space(void)
-{
- AtomText* ptr;
-
- ptr = (AtomText*) erts_alloc(ERTS_ALC_T_ATOM_TXT, sizeof(AtomText));
- ptr->next = text_list;
- text_list = ptr;
- atom_text_pos = ptr->text;
- atom_text_end = atom_text_pos + ATOM_TEXT_SIZE;
- reserved_atom_space += sizeof(AtomText);
-
- VERBOSE(DEBUG_SYSTEM,("Allocated %d atom space\n",ATOM_TEXT_SIZE));
-}
-
-/*
- * Allocate string space within an atom text segment.
- */
-
-static byte*
-atom_text_alloc(int bytes)
-{
- byte *res;
-
- ASSERT(bytes <= MAX_ATOM_SZ_LIMIT);
- if (atom_text_pos + bytes >= atom_text_end) {
- more_atom_space();
- }
- res = atom_text_pos;
- atom_text_pos += bytes;
- atom_space += bytes;
- return res;
-}
/*
* Calculate atom hash value (using the hash algorithm
@@ -128,7 +80,7 @@ atom_text_alloc(int bytes)
static HashValue
atom_hash(Atom* obj)
{
- byte* p = obj->name;
+ byte* p = obj->u.name;
int len = obj->len;
HashValue h = 0, g;
byte v;
@@ -150,12 +102,23 @@ atom_hash(Atom* obj)
return h;
}
+const byte *erts_atom_get_name(const Atom *atom)
+{
+ byte *name;
+ Uint size;
+ Uint offset;
+ ERTS_GET_BITSTRING(atom->u.bin, name, offset, size);
+ ASSERT(offset == 0 && (size % 8) == 0);
+ (void) size;
+ (void) offset;
+ return name;
+}
static int
atom_cmp(Atom* tmpl, Atom* obj)
{
if (tmpl->len == obj->len &&
- sys_memcmp(tmpl->name, obj->name, tmpl->len) == 0)
+ sys_memcmp(tmpl->u.name, erts_atom_get_name(obj), tmpl->len) == 0)
return 0;
return 1;
}
@@ -164,13 +127,39 @@ atom_cmp(Atom* tmpl, Atom* obj)
static Atom*
atom_alloc(Atom* tmpl)
{
- Atom* obj = (Atom*) erts_alloc(ERTS_ALC_T_ATOM, sizeof(Atom));
+ Atom *obj = (Atom*) erts_alloc(ERTS_ALC_T_ATOM, sizeof(Atom));
+
+ {
+ Eterm *hp;
+ Uint heap_size = 0;
+ ErtsHeapFactory factory;
+ ErlOffHeap oh;
+ struct erl_off_heap_header **literal_ohp;
+
+ if (tmpl->len <= ERL_ONHEAP_BINARY_LIMIT) {
+ heap_size = heap_bits_size(NBITS(tmpl->len));
+ } else {
+ heap_size = ERL_REFC_BITS_SIZE;
+ }
- obj->name = atom_text_alloc(tmpl->len);
- sys_memcpy(obj->name, tmpl->name, tmpl->len);
+ hp = erts_global_literal_allocate(heap_size, &literal_ohp);
+ ERTS_INIT_OFF_HEAP(&oh);
+ oh.first = *literal_ohp;
+
+ erts_factory_static_init(&factory, hp, heap_size, &oh);
+ *literal_ohp = oh.first;
+ obj->u.bin = erts_hfact_new_binary_from_data(&factory,
+ 0,
+ tmpl->len,
+ tmpl->u.name);
+ erts_global_literal_register(&obj->u.bin, hp, heap_size);
+ }
+
obj->len = tmpl->len;
obj->latin1_chars = tmpl->latin1_chars;
obj->slot.index = -1;
+ atom_space += tmpl->len;
+
/*
* Precompute ordinal value of first 3 bytes + 7 bits.
@@ -186,7 +175,7 @@ atom_alloc(Atom* tmpl)
j = (tmpl->len < 4) ? tmpl->len : 4;
for(i = 0; i < j; ++i)
- c[i] = tmpl->name[i];
+ c[i] = tmpl->u.name[i];
for(; i < 4; ++i)
c[i] = '\0';
obj->ord0 = (c[0] << 23) + (c[1] << 15) + (c[2] << 7) + (c[3] >> 1);
@@ -293,7 +282,7 @@ erts_atom_put_index(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc)
}
a.len = tlen;
- a.name = (byte *) text;
+ a.u.name = (byte *) text;
atom_read_lock();
aix = index_get(&erts_atom_table, (void*) &a);
atom_read_unlock();
@@ -333,7 +322,7 @@ erts_atom_put_index(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc)
a.len = tlen;
a.latin1_chars = (Sint16) no_latin1_chars;
- a.name = (byte *) text;
+ a.u.name = (byte *) text;
atom_write_lock();
aix = index_put(&erts_atom_table, (void*) &a);
atom_write_unlock();
@@ -400,7 +389,7 @@ erts_atom_get(const char *name, Uint len, Eterm* ap, ErtsAtomEncoding enc)
latin1_to_utf8(utf8_copy, sizeof(utf8_copy), (const byte**)&name, &len);
- a.name = (byte*)name;
+ a.u.name = (byte*)name;
a.len = (Sint16)len;
break;
case ERTS_ATOM_ENC_7BIT_ASCII:
@@ -415,7 +404,7 @@ erts_atom_get(const char *name, Uint len, Eterm* ap, ErtsAtomEncoding enc)
}
a.len = (Sint16)len;
- a.name = (byte*)name;
+ a.u.name = (byte*)name;
break;
case ERTS_ATOM_ENC_UTF8:
if (len > MAX_ATOM_SZ_LIMIT) {
@@ -427,7 +416,7 @@ erts_atom_get(const char *name, Uint len, Eterm* ap, ErtsAtomEncoding enc)
* name will fail. */
a.len = (Sint16)len;
- a.name = (byte*)name;
+ a.u.name = (byte*)name;
break;
}
@@ -446,7 +435,7 @@ erts_atom_get_text_space_sizes(Uint *reserved, Uint *used)
if (lock)
atom_read_lock();
if (reserved)
- *reserved = reserved_atom_space;
+ *reserved = atom_space;
if (used)
*used = atom_space;
if (lock)
@@ -479,33 +468,28 @@ init_atom_table(void)
f.meta_free = (HMFREE_FUN) erts_free;
f.meta_print = (HMPRINT_FUN) erts_print;
- atom_text_pos = NULL;
- atom_text_end = NULL;
- reserved_atom_space = 0;
- atom_space = 0;
- text_list = NULL;
-
erts_index_init(ERTS_ALC_T_ATOM_TABLE, &erts_atom_table,
"atom_tab", ATOM_SIZE, erts_atom_table_size, f);
- more_atom_space();
- /* Ordinary atoms */
+ /* Ordinary atoms. a is a template for creating an entry in the atom table */
for (i = 0; erl_atom_names[i] != 0; i++) {
int ix;
a.len = sys_strlen(erl_atom_names[i]);
a.latin1_chars = a.len;
- a.name = (byte*)erl_atom_names[i];
+ a.u.name = (byte*)erl_atom_names[i];
a.slot.index = i;
+
+
#ifdef DEBUG
/* Verify 7-bit ascii */
for (ix = 0; ix < a.len; ix++) {
- ASSERT((a.name[ix] & 0x80) == 0);
+ ASSERT((a.u.name[ix] & 0x80) == 0);
}
#endif
ix = index_put(&erts_atom_table, (void*) &a);
- atom_text_pos -= a.len;
- atom_space -= a.len;
- atom_tab(ix)->name = (byte*)erl_atom_names[i];
+ (void) ix;
+ /* Assert that the entry in the atom table is not a template */
+ ASSERT(erts_atom_get_name(atom_tab(ix)));
}
}
diff --git a/erts/emulator/beam/atom.h b/erts/emulator/beam/atom.h
index 681bd4586f..ff05bc35d2 100644
--- a/erts/emulator/beam/atom.h
+++ b/erts/emulator/beam/atom.h
@@ -50,7 +50,10 @@ typedef struct atom {
Sint16 len; /* length of atom name (UTF-8 encoded) */
Sint16 latin1_chars; /* 0-255 if atom can be encoded in latin1; otherwise, -1 */
int ord0; /* ordinal value of first 3 bytes + 7 bits */
- byte* name; /* name of atom */
+ union{
+ byte* name; /* name of atom, used by templates */
+ Eterm bin; /* name of atom, used when atom is in table*/
+ } u;
} Atom;
extern IndexTable erts_atom_table;
@@ -59,6 +62,8 @@ ERTS_GLB_INLINE Atom* atom_tab(Uint i);
ERTS_GLB_INLINE int erts_is_atom_utf8_bytes(byte *text, size_t len, Eterm term);
ERTS_GLB_INLINE int erts_is_atom_str(const char *str, Eterm term, int is_latin1);
+const byte *erts_atom_get_name(const Atom *atom);
+
#if ERTS_GLB_INLINE_INCL_FUNC_DEF
ERTS_GLB_INLINE Atom*
atom_tab(Uint i)
@@ -73,7 +78,7 @@ ERTS_GLB_INLINE int erts_is_atom_utf8_bytes(byte *text, size_t len, Eterm term)
return 0;
a = atom_tab(atom_val(term));
return (len == (size_t) a->len
- && sys_memcmp((void *) a->name, (void *) text, len) == 0);
+ && sys_memcmp((void *) erts_atom_get_name(a), (void *) text, len) == 0);
}
ERTS_GLB_INLINE int erts_is_atom_str(const char *str, Eterm term, int is_latin1)
@@ -87,7 +92,7 @@ ERTS_GLB_INLINE int erts_is_atom_str(const char *str, Eterm term, int is_latin1)
return 0;
a = atom_tab(atom_val(term));
len = a->len;
- aname = a->name;
+ aname = erts_atom_get_name(a);
if (is_latin1) {
for (i = 0; i < len; s++) {
if (aname[i] < 0x80) {
diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c
index 9d6fd75807..d48292c2ae 100644
--- a/erts/emulator/beam/bif.c
+++ b/erts/emulator/beam/bif.c
@@ -2929,10 +2929,10 @@ BIF_RETTYPE atom_to_list_1(BIF_ALIST_1)
BIF_RET(NIL); /* the empty atom */
ares =
- erts_analyze_utf8(ap->name, ap->len, &err_pos, &num_chars, NULL);
+ erts_analyze_utf8(erts_atom_get_name(ap), ap->len, &err_pos, &num_chars, NULL);
ASSERT(ares == ERTS_UTF8_OK); (void)ares;
- res = erts_utf8_to_list(BIF_P, num_chars, ap->name, ap->len, ap->len,
+ res = erts_utf8_to_list(BIF_P, num_chars, erts_atom_get_name(ap), ap->len, ap->len,
&num_built, &num_eaten, NIL);
ASSERT(num_built == num_chars);
ASSERT(num_eaten == ap->len);
diff --git a/erts/emulator/beam/dist.c b/erts/emulator/beam/dist.c
index cc0535de5d..920fa81f70 100644
--- a/erts/emulator/beam/dist.c
+++ b/erts/emulator/beam/dist.c
@@ -838,7 +838,7 @@ int is_node_name_atom(Eterm a)
return 0;
i = atom_val(a);
ASSERT((i > 0) && (i < atom_table_size()) && (atom_tab(i) != NULL));
- return is_node_name((char*)atom_tab(i)->name, atom_tab(i)->len);
+ return is_node_name((char*)erts_atom_get_name(atom_tab(i)), atom_tab(i)->len);
}
static void
diff --git a/erts/emulator/beam/erl_bif_ddll.c b/erts/emulator/beam/erl_bif_ddll.c
index 9279b5cce7..15f37e3fd5 100644
--- a/erts/emulator/beam/erl_bif_ddll.c
+++ b/erts/emulator/beam/erl_bif_ddll.c
@@ -1725,7 +1725,7 @@ static int errdesc_to_code(Eterm errdesc, int *code /* out */)
for (i = 0; errcode_tab[i].atm != NULL; ++i) {
int len = sys_strlen(errcode_tab[i].atm);
if (len == ap->len &&
- !sys_strncmp(errcode_tab[i].atm,(char *) ap->name,len)) {
+ !sys_strncmp(errcode_tab[i].atm,(char *) erts_atom_get_name(ap),len)) {
*code = errcode_tab[i].code;
return 0;
}
@@ -1799,7 +1799,7 @@ static char *pick_list_or_atom(Eterm name_term)
goto error;
}
name = erts_alloc(ERTS_ALC_T_DDLL_TMP_BUF, ap->len + 1);
- sys_memcpy(name,ap->name,ap->len);
+ sys_memcpy(name,erts_atom_get_name(ap),ap->len);
name[ap->len] = '\0';
} else {
if (erts_iolist_size(name_term, &name_len)) {
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c
index c0536012f9..08cd96ee65 100644
--- a/erts/emulator/beam/erl_bif_info.c
+++ b/erts/emulator/beam/erl_bif_info.c
@@ -2703,7 +2703,7 @@ c_compiler_used(Eterm **hpp, Uint *szp)
static int is_snif_term(Eterm module_atom) {
int i;
Atom *a = atom_tab(atom_val(module_atom));
- char *aname = (char *) a->name;
+ char *aname = (char *) erts_atom_get_name(a);
/* if a->name has a '.' then the bif (snif) is bogus i.e a package */
for (i = 0; i < a->len; i++) {
diff --git a/erts/emulator/beam/erl_bif_re.c b/erts/emulator/beam/erl_bif_re.c
index 91554f65f7..38072f3fcd 100644
--- a/erts/emulator/beam/erl_bif_re.c
+++ b/erts/emulator/beam/erl_bif_re.c
@@ -1024,7 +1024,7 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code)
}
}
ASSERT(tmpb != NULL);
- sys_memcpy(tmpb,ap->name,ap->len);
+ sys_memcpy(tmpb,erts_atom_get_name(ap),ap->len);
tmpb[ap->len] = '\0';
} else {
ErlDrvSizeT slen;
diff --git a/erts/emulator/beam/erl_db_util.c b/erts/emulator/beam/erl_db_util.c
index b7666a0eb2..43526995b1 100644
--- a/erts/emulator/beam/erl_db_util.c
+++ b/erts/emulator/beam/erl_db_util.c
@@ -3851,13 +3851,13 @@ bool db_eq_comp(DbTableCommon* tb, Eterm a, DbTerm* b)
int db_is_variable(Eterm obj)
{
- byte *b;
+ const byte *b;
int n;
int N;
if (is_not_atom(obj))
return -1;
- b = atom_tab(atom_val(obj))->name;
+ b = erts_atom_get_name(atom_tab(atom_val(obj)));
if ((n = atom_tab(atom_val(obj))->len) < 2)
return -1;
if (*b++ != '$')
diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c
index d9c61182a3..2563d40ad5 100644
--- a/erts/emulator/beam/erl_nif.c
+++ b/erts/emulator/beam/erl_nif.c
@@ -1799,9 +1799,9 @@ int enif_get_atom(ErlNifEnv* env, Eterm atom, char* buf, unsigned len,
return 0;
}
if (ap->latin1_chars == ap->len) {
- sys_memcpy(buf, ap->name, ap->len);
+ sys_memcpy(buf, erts_atom_get_name(ap), ap->len);
} else {
- int dlen = erts_utf8_to_latin1((byte*)buf, ap->name, ap->len);
+ int dlen = erts_utf8_to_latin1((byte*)buf, erts_atom_get_name(ap), ap->len);
ASSERT(dlen == ap->latin1_chars); (void)dlen;
}
buf[ap->latin1_chars] = '\0';
@@ -1810,7 +1810,7 @@ int enif_get_atom(ErlNifEnv* env, Eterm atom, char* buf, unsigned len,
if (ap->len >= len) {
return 0;
}
- sys_memcpy(buf, ap->name, ap->len);
+ sys_memcpy(buf, erts_atom_get_name(ap), ap->len);
buf[ap->len] = '\0';
return ap->len + 1;
}
@@ -4480,8 +4480,8 @@ void erts_print_nif_taints(fmtfn_t to, void* to_arg)
t = (struct tainted_module_t*) erts_atomic_read_nob(&first_taint);
for ( ; t; t = t->next) {
- const Atom* atom = atom_tab(atom_val(t->module_atom));
- erts_cbprintf(to,to_arg,"%s%.*s", delim, atom->len, atom->name);
+ Atom* atom = atom_tab(atom_val(t->module_atom));
+ erts_cbprintf(to,to_arg,"%s%.*s", delim, atom->len, erts_atom_get_name(atom));
delim = ",";
}
erts_cbprintf(to,to_arg,"\n");
diff --git a/erts/emulator/beam/erl_printf_term.c b/erts/emulator/beam/erl_printf_term.c
index 9b7eeac22a..95f94c9bea 100644
--- a/erts/emulator/beam/erl_printf_term.c
+++ b/erts/emulator/beam/erl_printf_term.c
@@ -254,7 +254,7 @@ static int print_atom_name(fmtfn_t fn, void* arg, Eterm atom, long *dcount)
return result;
}
- s = entry->name;
+ s = erts_atom_get_name(entry);
length = entry->len;
*dcount -= entry->len;
diff --git a/erts/emulator/beam/erl_process_dump.c b/erts/emulator/beam/erl_process_dump.c
index 8648d72f90..89e7865bc7 100644
--- a/erts/emulator/beam/erl_process_dump.c
+++ b/erts/emulator/beam/erl_process_dump.c
@@ -323,7 +323,7 @@ dump_element(fmtfn_t to, void *to_arg, Eterm x)
erts_print(to, to_arg, "H" PTR_FMT, boxed_val(x));
} else if (is_immed(x)) {
if (is_atom(x)) {
- unsigned char* s = atom_tab(atom_val(x))->name;
+ const byte* s = erts_atom_get_name(atom_tab(atom_val(x)));
int len = atom_tab(atom_val(x))->len;
int i;
diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c
index 970ca89bd9..9e2924946d 100644
--- a/erts/emulator/beam/erl_unicode.c
+++ b/erts/emulator/beam/erl_unicode.c
@@ -1347,7 +1347,7 @@ static Eterm do_utf8_to_list(Process *p, Uint num, const byte *bytes, Uint sz,
num_built, num_eaten,
tail);
}
-Eterm erts_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, Uint left,
+Eterm erts_utf8_to_list(Process *p, Uint num, const byte *bytes, Uint sz, Uint left,
Uint *num_built, Uint *num_eaten, Eterm tail)
{
return do_utf8_to_list(p, num, bytes, sz, left, num_built, num_eaten, tail);
@@ -1366,7 +1366,7 @@ Uint erts_atom_to_string_length(Eterm atom)
const byte* err_pos;
Uint num_chars;
int ares =
- erts_analyze_utf8(ap->name, ap->len, &err_pos, &num_chars, NULL);
+ erts_analyze_utf8(erts_atom_get_name(ap), ap->len, &err_pos, &num_chars, NULL);
ASSERT(ares == ERTS_UTF8_OK); (void)ares;
return num_chars;
@@ -1380,7 +1380,7 @@ Eterm erts_atom_to_string(Eterm **hpp, Eterm atom, Eterm tail)
ASSERT(is_atom(atom));
ap = atom_tab(atom_val(atom));
if (ap->latin1_chars >= 0)
- return buf_to_intlist(hpp, (char*)ap->name, ap->len, tail);
+ return buf_to_intlist(hpp, (char*)erts_atom_get_name(ap), ap->len, tail);
else {
Eterm res;
const byte* err_pos;
@@ -1389,10 +1389,10 @@ Eterm erts_atom_to_string(Eterm **hpp, Eterm atom, Eterm tail)
Eterm *hp_start = *hpp;
int ares =
#endif
- erts_analyze_utf8(ap->name, ap->len, &err_pos, &num_chars, NULL);
+ erts_analyze_utf8(erts_atom_get_name(ap), ap->len, &err_pos, &num_chars, NULL);
ASSERT(ares == ERTS_UTF8_OK);
- res = erts_make_list_from_utf8_buf(hpp, num_chars, ap->name, ap->len,
+ res = erts_make_list_from_utf8_buf(hpp, num_chars, erts_atom_get_name(ap), ap->len,
&num_built, &num_eaten, tail);
ASSERT(num_built == num_chars);
@@ -1924,26 +1924,23 @@ BIF_RETTYPE atom_to_binary_2(BIF_ALIST_2)
ap = atom_tab(atom_val(BIF_ARG_1));
if (BIF_ARG_2 == am_latin1) {
- Eterm bin_term;
-
+ Eterm bin_term;
if (ap->latin1_chars < 0) {
goto error;
}
if (ap->latin1_chars == ap->len) {
- bin_term = erts_new_binary_from_data(BIF_P, ap->len, ap->name);
+ BIF_RET(ap->u.bin);
} else {
byte* bin_p;
int dbg_sz;
-
bin_term = erts_new_binary(BIF_P, ap->latin1_chars, &bin_p);
- dbg_sz = erts_utf8_to_latin1(bin_p, ap->name, ap->len);
+ dbg_sz = erts_utf8_to_latin1(bin_p, erts_atom_get_name(ap), ap->len);
ASSERT(dbg_sz == ap->latin1_chars); (void)dbg_sz;
+ BIF_RET(bin_term);
}
-
- BIF_RET(bin_term);
} else if (BIF_ARG_2 == am_utf8 || BIF_ARG_2 == am_unicode) {
- BIF_RET(erts_new_binary_from_data(BIF_P, ap->len, ap->name));
+ BIF_RET(ap->u.bin);
} else {
error:
BIF_ERROR(BIF_P, BADARG);
@@ -2233,12 +2230,13 @@ Sint erts_native_filename_need(Eterm ioterm, int encoding)
need = 2* ap->latin1_chars;
}
else {
+ const byte * name = erts_atom_get_name(ap);
for (i = 0; i < ap->len; ) {
- if (ap->name[i] < 0x80) {
+ if (name[i] < 0x80) {
i++;
- } else if (ap->name[i] < 0xE0) {
+ } else if (name[i] < 0xE0) {
i += 2;
- } else if (ap->name[i] < 0xF0) {
+ } else if (name[i] < 0xF0) {
i += 3;
} else {
need = -1;
@@ -2256,7 +2254,7 @@ Sint erts_native_filename_need(Eterm ioterm, int encoding)
* the middle of filenames
*/
if (need > 0) {
- byte *name = ap->name;
+ const byte *name = erts_atom_get_name(ap);
int len = ap->len;
for (i = 0; i < len; i++) {
if (name[i] == 0) {
@@ -2398,33 +2396,33 @@ void erts_native_filename_put(Eterm ioterm, int encoding, byte *p)
switch (encoding) {
case ERL_FILENAME_LATIN1:
for (i = 0; i < ap->len; i++) {
- if (ap->name[i] < 0x80) {
- *p++ = ap->name[i];
+ if (erts_atom_get_name(ap)[i] < 0x80) {
+ *p++ = erts_atom_get_name(ap)[i];
} else {
- ASSERT(ap->name[i] < 0xC4);
- *p++ = ((ap->name[i] & 3) << 6) | (ap->name[i+1] & 0x3F);
+ ASSERT(erts_atom_get_name(ap)[i] < 0xC4);
+ *p++ = ((erts_atom_get_name(ap)[i] & 3) << 6) | (erts_atom_get_name(ap)[i+1] & 0x3F);
i++;
}
}
break;
case ERL_FILENAME_UTF8_MAC:
case ERL_FILENAME_UTF8:
- sys_memcpy(p, ap->name, ap->len);
+ sys_memcpy(p, erts_atom_get_name(ap), ap->len);
break;
case ERL_FILENAME_WIN_WCHAR:
for (i = 0; i < ap->len; i++) {
/* Little endian */
- if (ap->name[i] < 0x80) {
- *p++ = ap->name[i];
+ if (erts_atom_get_name(ap)[i] < 0x80) {
+ *p++ = erts_atom_get_name(ap)[i];
*p++ = 0;
- } else if (ap->name[i] < 0xE0) {
- *p++ = ((ap->name[i] & 3) << 6) | (ap->name[i+1] & 0x3F);
- *p++ = ((ap->name[i] & 0x1C) >> 2);
+ } else if (erts_atom_get_name(ap)[i] < 0xE0) {
+ *p++ = ((erts_atom_get_name(ap)[i] & 3) << 6) | (erts_atom_get_name(ap)[i+1] & 0x3F);
+ *p++ = ((erts_atom_get_name(ap)[i] & 0x1C) >> 2);
i++;
} else {
- ASSERT(ap->name[i] < 0xF0);
- *p++ = ((ap->name[i+1] & 3) << 6) | (ap->name[i+2] & 0x3C);
- *p++ = ((ap->name[i] & 0xF) << 4) | ((ap->name[i+1] & 0x3C) >> 2);
+ ASSERT(erts_atom_get_name(ap)[i] < 0xF0);
+ *p++ = ((erts_atom_get_name(ap)[i+1] & 3) << 6) | (erts_atom_get_name(ap)[i+2] & 0x3C);
+ *p++ = ((erts_atom_get_name(ap)[i] & 0xF) << 4) | ((erts_atom_get_name(ap)[i+1] & 0x3C) >> 2);
i += 2;
}
}
diff --git a/erts/emulator/beam/erl_utils.h b/erts/emulator/beam/erl_utils.h
index e29555e0de..d6ebc1c41d 100644
--- a/erts/emulator/beam/erl_utils.h
+++ b/erts/emulator/beam/erl_utils.h
@@ -192,7 +192,7 @@ ERTS_GLB_INLINE int erts_cmp_atoms(Eterm a, Eterm b) {
Atom *aa = atom_tab(atom_val(a));
Atom *bb = atom_tab(atom_val(b));
- byte *name_a, *name_b;
+ const byte *name_a, *name_b;
int len_a, len_b, diff;
diff = aa->ord0 - bb->ord0;
@@ -201,8 +201,8 @@ ERTS_GLB_INLINE int erts_cmp_atoms(Eterm a, Eterm b) {
return diff;
}
- name_a = &aa->name[3];
- name_b = &bb->name[3];
+ name_a = &erts_atom_get_name(aa)[3];
+ name_b = &erts_atom_get_name(bb)[3];
len_a = aa->len-3;
len_b = bb->len-3;
diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c
index cfa51387b8..d815235eac 100644
--- a/erts/emulator/beam/external.c
+++ b/erts/emulator/beam/external.c
@@ -591,7 +591,7 @@ Sint erts_encode_ext_dist_header_finalize(ErtsDistOutputBuf* ob,
a = atom_tab(atom_val(atom));
sz = a->len;
ep -= sz;
- sys_memcpy((void *) ep, (void *) a->name, sz);
+ sys_memcpy((void *) ep, (void *) erts_atom_get_name(a), sz);
if (long_atoms) {
ep -= 2;
put_int16(sz, ep);
@@ -2910,16 +2910,16 @@ enc_atom(ErtsAtomCacheMap *acmp, Eterm atom, byte *ep, Uint64 dflags)
put_int8(len, ep);
ep += 1;
}
- sys_memcpy((char *) ep, (char *) a->name, len);
+ sys_memcpy((char *) ep, (char *) erts_atom_get_name(a), len);
}
else {
if (a->latin1_chars <= 255 && (dflags & DFLAG_SMALL_ATOM_TAGS)) {
*ep++ = SMALL_ATOM_EXT;
if (len == a->latin1_chars) {
- sys_memcpy(ep+1, a->name, len);
+ sys_memcpy(ep+1, erts_atom_get_name(a), len);
}
else {
- len = erts_utf8_to_latin1(ep+1, a->name, len);
+ len = erts_utf8_to_latin1(ep+1, erts_atom_get_name(a), len);
ASSERT(len == a->latin1_chars);
}
put_int8(len, ep);
@@ -2928,10 +2928,10 @@ enc_atom(ErtsAtomCacheMap *acmp, Eterm atom, byte *ep, Uint64 dflags)
else {
*ep++ = ATOM_EXT;
if (len == a->latin1_chars) {
- sys_memcpy(ep+2, a->name, len);
+ sys_memcpy(ep+2, erts_atom_get_name(a), len);
}
else {
- len = erts_utf8_to_latin1(ep+2, a->name, len);
+ len = erts_utf8_to_latin1(ep+2, erts_atom_get_name(a), len);
ASSERT(len == a->latin1_chars);
}
put_int16(len, ep);
diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h
index 218aea136a..dce7e2956c 100644
--- a/erts/emulator/beam/global.h
+++ b/erts/emulator/beam/global.h
@@ -1491,7 +1491,7 @@ char *erts_convert_filename_to_wchar(const byte* bytes, Uint size,
ErtsAlcType_t alloc_type, Sint* used,
Uint extra_wchars);
Eterm erts_convert_native_to_filename(Process *p, size_t size, byte *bytes);
-Eterm erts_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, Uint left,
+Eterm erts_utf8_to_list(Process *p, Uint num, const byte *bytes, Uint sz, Uint left,
Uint *num_built, Uint *num_eaten, Eterm tail);
Eterm
erts_make_list_from_utf8_buf(Eterm **hpp, Uint num,
diff --git a/erts/emulator/beam/jit/beam_jit_common.cpp b/erts/emulator/beam/jit/beam_jit_common.cpp
index aac4239485..fde678606f 100644
--- a/erts/emulator/beam/jit/beam_jit_common.cpp
+++ b/erts/emulator/beam/jit/beam_jit_common.cpp
@@ -39,7 +39,7 @@ extern "C"
static std::string getAtom(Eterm atom) {
Atom *ap = atom_tab(atom_val(atom));
- return std::string((char *)ap->name, ap->len);
+ return std::string((char *)erts_atom_get_name(ap), ap->len);
}
BeamAssemblerCommon::BeamAssemblerCommon(BaseAssembler &assembler_)
diff --git a/erts/emulator/test/bif_SUITE.erl b/erts/emulator/test/bif_SUITE.erl
index fd50e9e2bb..196bb70cdc 100644
--- a/erts/emulator/test/bif_SUITE.erl
+++ b/erts/emulator/test/bif_SUITE.erl
@@ -657,6 +657,12 @@ t_atom_to_binary(Config) when is_list(Config) ->
<<>> = atom_to_binary('', unicode),
<<127>> = atom_to_binary('\177', utf8),
<<"abcdef">> = atom_to_binary(abcdef, utf8),
+ <<"qwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwe">> =
+ atom_to_binary(qwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwe, utf8),
+ <<"qwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwer">> =
+ atom_to_binary(qwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwer, utf8),
+ <<"qwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerq">> =
+ atom_to_binary(qwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerq, utf8),
HalfLongBin = atom_to_binary(HalfLongAtom, utf8),
HalfLongBin = atom_to_binary(HalfLongAtom),
LongAtomBin = atom_to_binary(LongAtom, utf8),
diff --git a/erts/etc/unix/etp-commands.in b/erts/etc/unix/etp-commands.in
index 559b45262c..0b2e8e1e7a 100644
--- a/erts/etc/unix/etp-commands.in
+++ b/erts/etc/unix/etp-commands.in
@@ -612,7 +612,8 @@ define etp-atom-1
else
set $etp_atom_1_ap = (Atom*)erts_atom_table.seg_table[(Eterm)($arg0)>>16][((Eterm)($arg0)>>6)&0x3FF]
set $etp_atom_1_i = ($etp_atom_1_ap)->len
- set $etp_atom_1_p = ($etp_atom_1_ap)->name
+ etp-bitstring-data-1 ($etp_atom_1_ap)->u.bin
+ set $etp_atom_1_p = ($etp_bitstring_data)
set $etp_atom_1_quote = 1
# Check if atom has to be quoted
if ($etp_atom_1_i > 0)
@@ -642,7 +643,7 @@ define etp-atom-1
printf "'"
end
set $etp_atom_1_i = ($etp_atom_1_ap)->len
- set $etp_atom_1_p = ($etp_atom_1_ap)->name
+ set $etp_atom_1_p = ($etp_bitstring_data)
while $etp_atom_1_i > 0
etp-char-1 (*$etp_atom_1_p) '\''
set $etp_atom_1_p++
@@ -654,6 +655,25 @@ define etp-atom-1
end
end
+define etp-bitstring-data-1
+# Args: Eterm bitstring
+#
+# Non-reentrant
+#
+# Unbox and retrieve the binary data pointer from any bitstring
+ set $etp_bitstring_unboxed = ((Eterm*)(($arg0) & etp_ptr_mask))
+ set $etp_bitstring_subtag = ($etp_bitstring_unboxed[0] & etp_header_subtag_mask)
+ if ($etp_bitstring_subtag == etp_sub_bits_subtag)
+ set $etp_bitstring_ptr = (ErlSubBits *) $etp_bitstring_unboxed
+ set $etp_bitstring_size = ($etp_bitstring_ptr)->end - ($etp_bitstring_ptr)->start
+ set $etp_bitstring_data = (byte *)(($etp_bitstring_ptr)->base_flags & ~(UWord)3)
+ else
+ set $etp_bitstring_ptr = (ErlHeapBits *) $etp_bitstring_unboxed
+ set $etp_bitstring_size = ($etp_bitstring_ptr)->size
+ set $etp_bitstring_data = (byte *)&($etp_bitstring_ptr)->data
+ end
+end
+
define etp-string-to-atom
# Args: (char*) null-terminated
@@ -691,8 +711,10 @@ define etp-string-to-atom
# search hash bucket list
while $etp_p
set $etp_i = 0
+ etp-bitstring-data-1 ($etp_p)->u.bin
+ set $etp_atom_1_p = ($etp_bitstring_data)
while $etp_i < $etp_p->len && ($arg0)[$etp_i]
- if $etp_p->name[$etp_i] != ($arg0)[$etp_i]
+ if $etp_atom_1_p[$etp_i] != ($arg0)[$etp_i]
loop_break
end
set $etp_i++
@@ -2094,7 +2116,8 @@ define etp-term-dump-atom
# Args: atom term
set $etp_atom_1_ap = (Atom*)erts_atom_table.seg_table[(Eterm)($arg0)>>16][((Eterm)($arg0)>>6)&0x3FF]
set $etp_atom_1_i = ($etp_atom_1_ap)->len
- set $etp_atom_1_p = ($etp_atom_1_ap)->name
+ etp-bitstring-data-1 ($etp_atom_1_ap)->u.bin
+ set $etp_atom_1_p = ($etp_bitstring_data)
set $etp_atom_1_quote = 1
set $etp_atom_indent = 13
@@ -2142,7 +2165,7 @@ define etp-term-dump-atom
printf "'"
end
set $etp_atom_1_i = ($etp_atom_1_ap)->len
- set $etp_atom_1_p = ($etp_atom_1_ap)->name
+ set $etp_atom_1_p = ($etp_bitstring_data)
while $etp_atom_1_i > 0
etp-char-1 (*$etp_atom_1_p) '\''
set $etp_atom_1_p++
--
2.35.3