File CVE-2026-28364.patch of Package ocaml.42958
From b0a2614684a52acded784ec213f14ddfe085d146 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@college-de-france.fr>
Date: Mon, 12 Jan 2026 11:56:01 +0100
Subject: [PATCH] robustify intern.c
---
byterun/caml/misc.h | 9 ++
byterun/intern.c | 196 ++++++++++++++++++++++++++++++++------------
2 files changed, 151 insertions(+), 54 deletions(-)
Index: b/byterun/caml/misc.h
===================================================================
--- a/byterun/caml/misc.h
+++ b/byterun/caml/misc.h
@@ -119,6 +119,15 @@ CAMLextern void caml_fatal_error_arg2 (c
char *fmt2, char *arg2)
CAMLnoreturn_end;
+/* Branch prediction */
+#if defined(__GNUC__)
+#define CAMLlikely(e) __builtin_expect(!!(e), 1)
+#define CAMLunlikely(e) __builtin_expect(!!(e), 0)
+#else
+#define CAMLlikely(e) (e)
+#define CAMLunlikely(e) (e)
+#endif
+
/* Safe string operations */
CAMLextern char * caml_strdup(const char * s);
Index: b/byterun/intern.c
===================================================================
--- a/byterun/intern.c
+++ b/byterun/intern.c
@@ -38,6 +38,9 @@
static unsigned char * intern_src;
/* Reading pointer in block holding input data. */
+static unsigned char * intern_src_end;
+/* Pointer to the end of the readable data. */
+
static unsigned char * intern_input = NULL;
/* Pointer to beginning of block holding input data,
if non-NULL this pointer will be freed by the cleanup function. */
@@ -45,6 +48,9 @@ static unsigned char * intern_input = NU
static header_t * intern_dest;
/* Writing pointer in destination block */
+static header_t * intern_dest_end;
+/* Pointer to the end of the destination block */
+
static char * intern_extra_block = NULL;
/* If non-NULL, point to new heap chunk allocated with caml_alloc_for_heap. */
@@ -54,6 +60,9 @@ static asize_t obj_counter;
static value * intern_obj_table = NULL;
/* The pointers to objects already seen */
+static uintnat intern_num_objects;
+/* How many objects are expected (from the header) */
+
static unsigned int intern_color;
/* Color to assign to newly created headers */
@@ -72,31 +81,64 @@ CAMLnoreturn_start
static void intern_bad_code_pointer(unsigned char digest[16])
CAMLnoreturn_end;
+CAMLnoreturn_start
+static void intern_cleanup_failwith(const char * msg)
+CAMLnoreturn_end;
+
static void intern_free_stack(void);
+static inline void intern_check_read(uintnat len)
+{
+ if (CAMLunlikely(len > intern_src_end - intern_src)) {
+ intern_cleanup_failwith("input_value: invalid read");
+ }
+}
+
+static inline void intern_record_obj(value v)
+{
+ if (intern_obj_table != NULL) {
+ if (CAMLunlikely(obj_counter >= intern_num_objects)) {
+ intern_cleanup_failwith("input_value: too many objects");
+ }
+ intern_obj_table[obj_counter++] = v;
+ }
+}
+
static inline unsigned char read8u(void)
-{ return *intern_src++; }
+{
+ intern_check_read(1);
+ return *intern_src++;
+}
static inline signed char read8s(void)
-{ return *intern_src++; }
+{
+ intern_check_read(1);
+ return *intern_src++;
+}
static inline uint16_t read16u(void)
{
- uint16_t res = (intern_src[0] << 8) + intern_src[1];
+ uint16_t res;
+ intern_check_read(2);
+ res = (intern_src[0] << 8) + intern_src[1];
intern_src += 2;
return res;
}
static inline int16_t read16s(void)
{
- int16_t res = (intern_src[0] << 8) + intern_src[1];
+ int16_t res;
+ intern_check_read(2);
+ res = (intern_src[0] << 8) + intern_src[1];
intern_src += 2;
return res;
}
static inline uint32_t read32u(void)
{
- uint32_t res =
+ uint32_t res;
+ intern_check_read(4);
+ res =
((uint32_t)(intern_src[0]) << 24) + (intern_src[1] << 16)
+ (intern_src[2] << 8) + intern_src[3];
intern_src += 4;
@@ -105,7 +147,9 @@ static inline uint32_t read32u(void)
static inline int32_t read32s(void)
{
- int32_t res =
+ int32_t res;
+ intern_check_read(4);
+ res =
((uint32_t)(intern_src[0]) << 24) + (intern_src[1] << 16)
+ (intern_src[2] << 8) + intern_src[3];
intern_src += 4;
@@ -115,7 +159,9 @@ static inline int32_t read32s(void)
#ifdef ARCH_SIXTYFOUR
static uintnat read64u(void)
{
- uintnat res =
+ uintnat res;
+ intern_check_read(8);
+ res =
((uintnat) (intern_src[0]) << 56)
+ ((uintnat) (intern_src[1]) << 48)
+ ((uintnat) (intern_src[2]) << 40)
@@ -129,13 +175,14 @@ static uintnat read64u(void)
}
#endif
-static inline void readblock(void * dest, intnat len)
+static inline void readblock(void * dest, uintnat len)
{
+ intern_check_read(len);
memcpy(dest, intern_src, len);
intern_src += len;
}
-static void intern_init(void * src, void * input)
+static void intern_init(void * src, uintnat len, void * input)
{
/* This is asserted at the beginning of demarshaling primitives.
If it fails, it probably means that an exception was raised
@@ -143,6 +190,7 @@ static void intern_init(void * src, void
Assert (intern_input == NULL && intern_obj_table == NULL \
&& intern_extra_block == NULL && intern_block == 0);
intern_src = src;
+ intern_src_end = intern_src + len;
intern_input = input;
}
@@ -169,6 +217,12 @@ static void intern_cleanup(void)
intern_free_stack();
}
+static void intern_cleanup_failwith(const char * msg)
+{
+ intern_cleanup();
+ caml_failwith(msg);
+}
+
static void readfloat(double * dest, unsigned int code)
{
if (sizeof(double) != 8) {
@@ -310,6 +364,17 @@ static struct intern_item * intern_resiz
} \
} while(0)
+static inline value intern_alloc_obj(mlsize_t wosize, tag_t tag)
+{
+ value v = Val_hp(intern_dest);
+ if (CAMLunlikely(wosize >= intern_dest_end - intern_dest)) {
+ intern_cleanup_failwith("input_value: invalid allocation");
+ }
+ *intern_dest = Make_header_allocated_here(wosize, tag, intern_color);
+ intern_dest += 1 + wosize;
+ return v;
+}
+
static void intern_rec(value *dest)
{
unsigned int code;
@@ -342,9 +407,19 @@ static void intern_rec(value *dest)
/* Pop item and iterate */
sp--;
break;
- case OShift:
+ case OShift: {
/* Shift value by an offset */
- *dest += sp->arg;
+ value v = *dest;
+ intnat ofs = sp->arg;
+ if (Is_block(v)
+ && (uintnat) ofs % sizeof(value) == 0
+ && ofs >= 0 && ofs < Bosize_val(v)
+ && Tag_val(v + ofs) == Infix_tag
+ && Infix_offset_val(v + ofs) == ofs)
+ *dest = v + ofs;
+ else
+ intern_cleanup_failwith("input_value: bad infix offset");
+ }
/* Pop item and iterate */
sp--;
break;
@@ -363,13 +438,12 @@ static void intern_rec(value *dest)
if (size == 0) {
v = Atom(tag);
} else {
- v = Val_hp(intern_dest);
- if (intern_obj_table != NULL) intern_obj_table[obj_counter++] = v;
- *intern_dest = Make_header_allocated_here(size, tag, intern_color);
- intern_dest += 1 + size;
+ v = intern_alloc_obj(size, tag);
+ intern_record_obj(v);
/* For objects, we need to freshen the oid */
if (tag == Object_tag) {
- Assert(size >= 2);
+ if (CAMLunlikely(size < 2))
+ intern_cleanup_failwith("input_value: bad object block");
/* Request to read rest of the elements of the block */
ReadItems(&Field(v, 2), size - 2);
/* Request freshing OID */
@@ -392,11 +466,11 @@ static void intern_rec(value *dest)
/* Small string */
len = (code & 0x1F);
read_string:
+ if (CAMLunlikely(len > Bsize_wsize (Max_wosize) - 1))
+ intern_cleanup_failwith("input_value: string too large");
size = (len + sizeof(value)) / sizeof(value);
- v = Val_hp(intern_dest);
- if (intern_obj_table != NULL) intern_obj_table[obj_counter++] = v;
- *intern_dest = Make_header_allocated_here(size, String_tag, intern_color);
- intern_dest += 1 + size;
+ v = intern_alloc_obj(size, String_tag);
+ intern_record_obj(v);
Field(v, size - 1) = 0;
ofs_ind = Bsize_wsize(size) - 1;
Byte(v, ofs_ind) = ofs_ind - len;
@@ -424,10 +498,12 @@ static void intern_rec(value *dest)
case CODE_SHARED8:
ofs = read8u();
read_shared:
- Assert (ofs > 0);
- Assert (ofs <= obj_counter);
- Assert (intern_obj_table != NULL);
- v = intern_obj_table[obj_counter - ofs];
+ ofs = obj_counter - ofs;
+ /* If intern_obj_table is NULL, obj_counter is 0 and the check fails */
+ if (CAMLunlikely(ofs >= obj_counter)) {
+ intern_cleanup_failwith("input_value: invalid shared reference");
+ }
+ v = intern_obj_table[ofs];
break;
case CODE_SHARED16:
ofs = read16u();
@@ -444,12 +520,16 @@ static void intern_rec(value *dest)
header = (header_t) read32u();
tag = Tag_hd(header);
size = Wosize_hd(header);
+ if (CAMLunlikely(tag >= No_scan_tag || tag == Infix_tag))
+ intern_cleanup_failwith("input_value: invalid block32");
goto read_block;
#ifdef ARCH_SIXTYFOUR
case CODE_BLOCK64:
header = (header_t) read64u();
tag = Tag_hd(header);
size = Wosize_hd(header);
+ if (CAMLunlikely(tag >= No_scan_tag || tag == Infix_tag))
+ intern_cleanup_failwith("input_value: invalid block64");
goto read_block;
#endif
case CODE_STRING8:
@@ -465,23 +545,19 @@ static void intern_rec(value *dest)
#endif
case CODE_DOUBLE_LITTLE:
case CODE_DOUBLE_BIG:
- v = Val_hp(intern_dest);
- if (intern_obj_table != NULL) intern_obj_table[obj_counter++] = v;
- *intern_dest = Make_header_allocated_here(Double_wosize, Double_tag,
- intern_color);
- intern_dest += 1 + Double_wosize;
+ v = intern_alloc_obj(Double_wosize, Double_tag);
+ intern_record_obj(v);
readfloat((double *) v, code);
break;
case CODE_DOUBLE_ARRAY8_LITTLE:
case CODE_DOUBLE_ARRAY8_BIG:
len = read8u();
read_double_array:
+ if (len == 0)
+ intern_cleanup_failwith("input_value: invalid double_array");
size = len * Double_wosize;
- v = Val_hp(intern_dest);
- if (intern_obj_table != NULL) intern_obj_table[obj_counter++] = v;
- *intern_dest = Make_header_allocated_here(size, Double_array_tag,
- intern_color);
- intern_dest += 1 + size;
+ v = intern_alloc_obj(size, Double_array_tag);
+ intern_record_obj(v);
readfloats((double *) v, len, code);
break;
case CODE_DOUBLE_ARRAY32_LITTLE:
@@ -521,26 +597,28 @@ static void intern_rec(value *dest)
ReadItems(dest, 1);
continue; /* with next iteration of main loop, skipping *dest = v */
- case CODE_CUSTOM:
- ops = caml_find_custom_operations((char *) intern_src);
+ case CODE_CUSTOM: {
+ char * name = (char *) intern_src;
+ unsigned char * name_end = memchr(name, 0, intern_src_end - intern_src);
+ if (name_end == NULL) {
+ intern_cleanup_failwith
+ ("input_value: unterminated custom block identifier");
+ }
+ ops = caml_find_custom_operations(name);
if (ops == NULL) {
- intern_cleanup();
- caml_failwith("input_value: unknown custom block identifier");
+ intern_cleanup_failwith
+ ("input_value: unknown custom block identifier");
}
- while (*intern_src++ != 0) /*nothing*/; /*skip identifier*/
+ intern_src = name_end + 1; /*skip identifier*/
size = ops->deserialize((void *) (intern_dest + 2));
size = 1 + (size + sizeof(value) - 1) / sizeof(value);
- v = Val_hp(intern_dest);
- if (intern_obj_table != NULL) intern_obj_table[obj_counter++] = v;
- *intern_dest = Make_header_allocated_here(size, Custom_tag,
- intern_color);
+ v = intern_alloc_obj(size, Custom_tag);
+ intern_record_obj(v);
Custom_ops_val(v) = ops;
-
if (ops->finalize != NULL && Is_young(v)) {
/* Remember that the block has a finalizer. */
add_to_custom_table (&caml_custom_table, v, 0, 1);
}
-
- intern_dest += 1 + size;
break;
+ }
default:
intern_cleanup();
@@ -582,6 +659,7 @@ static void intern_alloc(mlsize_t whsize
intern_color =
outside_heap ? Caml_black : caml_allocation_color(intern_extra_block);
intern_dest = (header_t *) intern_extra_block;
+ intern_dest_end = intern_dest + whsize;
Assert (intern_block == 0);
} else {
/* this is a specialised version of caml_alloc from alloc.c */
@@ -602,15 +680,17 @@ static void intern_alloc(mlsize_t whsize
intern_color = Color_hd(intern_header);
Assert (intern_color == Caml_white || intern_color == Caml_black);
intern_dest = (header_t *) Hp_val(intern_block);
+ intern_dest_end = intern_dest + whsize;
Assert (intern_extra_block == NULL);
}
obj_counter = 0;
if (num_objects > 0) {
- intern_obj_table = (value *) malloc(num_objects * sizeof(value));
+ intern_obj_table = (value *) calloc(num_objects, sizeof(value));
if (intern_obj_table == NULL) {
intern_cleanup();
caml_raise_out_of_memory();
}
+ intern_num_objects = num_objects;
} else
Assert(intern_obj_table == NULL);
}
@@ -710,10 +790,12 @@ static value caml_input_val_core(struct
else if (r < 20)
caml_failwith("input_value: truncated object");
intern_src = (unsigned char *) header;
+ intern_src_end = (unsigned char *) header + 20;
if (read32u() == Intext_magic_number_big) {
/* Finish reading the header */
if (caml_really_getblock(chan, header + 20, 32 - 20) < 32 - 20)
caml_failwith("input_value: truncated object");
+ intern_src_end = (unsigned char *) header + 32;
}
intern_src = (unsigned char *) header;
caml_parse_header("input_value", &h);
@@ -728,7 +810,7 @@ static value caml_input_val_core(struct
caml_failwith("input_value: truncated object");
}
/* Initialize global state */
- intern_init(block, block);
+ intern_init(block, h.data_len, block);
intern_alloc(h.whsize, h.num_objects, outside_heap);
/* Fill it in */
intern_rec(&res);
@@ -782,13 +864,14 @@ CAMLexport value caml_input_val_from_str
struct marshal_header h;
/* Initialize global state */
- intern_init(&Byte_u(str, ofs), NULL);
+ intern_init(&Byte_u(str, ofs), caml_string_length(str) - ofs, NULL);
caml_parse_header("input_val_from_string", &h);
if (ofs + h.header_len + h.data_len > caml_string_length(str))
caml_failwith("input_val_from_string: bad length");
/* Allocate result */
intern_alloc(h.whsize, h.num_objects, 0);
intern_src = &Byte_u(str, ofs + h.header_len); /* If a GC occurred */
+ intern_src_end = intern_src + h.data_len;
/* Fill it in */
intern_rec(&obj);
intern_add_to_heap(h.whsize);
@@ -819,10 +902,9 @@ CAMLexport value caml_input_value_from_m
{
struct marshal_header h;
- intern_init(data + ofs, data);
-
+ intern_init(data + ofs, 32, data);
caml_parse_header("input_value_from_malloc", &h);
-
+ intern_src_end = intern_src + h.data_len;
return input_val_from_block(&h);
}
@@ -831,11 +913,14 @@ CAMLexport value caml_input_value_from_b
{
struct marshal_header h;
+ if (len < 0)
+ caml_failwith("input_value_from_block: negative length");
/* Initialize global state */
- intern_init(data, NULL);
+ intern_init(data, len, NULL);
caml_parse_header("input_value_from_block", &h);
if (h.header_len + h.data_len > len)
caml_failwith("input_val_from_block: bad length");
+ intern_src_end = intern_src + h.data_len;
return input_val_from_block(&h);
}
@@ -853,6 +938,7 @@ CAMLprim value caml_marshal_data_size(va
uintnat data_len;
intern_src = &Byte_u(buff, Long_val(ofs));
+ intern_src_end = &Byte_u(buff, caml_string_length(buff));
magic = read32u();
switch(magic) {
case Intext_magic_number_small: