File CVE-2026-28364.patch of Package ocaml.42959
From b0a2614684a52acded784ec213f14ddfe085d146 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@college-de-france.fr>
Date: Mon, 12 Jan 2026 11:56:01 +0100
Subject: [PATCH] robustify intern.c
---
runtime/caml/misc.h | 9 ++
runtime/intern.c | 196 ++++++++++++++++++++++++++++++++------------
2 files changed, 151 insertions(+), 54 deletions(-)
diff --git a/runtime/caml/misc.h b/runtime/caml/misc.h
index c605f8711e81..6fefe922aefc 100644
--- a/runtime/caml/misc.h
+++ b/runtime/caml/misc.h
@@ -259,6 +259,15 @@ CAMLnoreturn_end;
#define Caml_has_builtin(x) 0
#endif
+/* Branch prediction */
+#if defined(__GNUC__)
+#define CAMLlikely(e) __builtin_expect(!!(e), 1)
+#define CAMLunlikely(e) __builtin_expect(!!(e), 0)
+#else
+#define CAMLlikely(e) (e)
+#define CAMLunlikely(e) (e)
+#endif
+
/* Integer arithmetic with overflow detection.
The functions return 0 if no overflow, 1 if overflow.
The result of the operation is always stored at [*res].
diff --git a/runtime/intern.c b/runtime/intern.c
index afd49f2641ea..7f4e9bfdbe80 100644
--- a/runtime/intern.c
+++ b/runtime/intern.c
@@ -41,6 +41,9 @@
static unsigned char * intern_src;
/* Reading pointer in block holding input data. */
+static unsigned char * intern_src_end;
+/* Pointer to the end of the readable data. */
+
static unsigned char * intern_input = NULL;
/* Pointer to beginning of block holding input data,
if non-NULL this pointer will be freed by the cleanup function. */
@@ -48,6 +51,9 @@ static unsigned char * intern_input = NULL;
static header_t * intern_dest;
/* Writing pointer in destination block */
+static header_t * intern_dest_end;
+/* Pointer to the end of the destination block */
+
static char * intern_extra_block = NULL;
/* If non-NULL, point to new heap chunk allocated with caml_alloc_for_heap. */
@@ -57,6 +63,9 @@ static asize_t obj_counter;
static value * intern_obj_table = NULL;
/* The pointers to objects already seen */
+static uintnat intern_num_objects;
+/* How many objects are expected (from the header) */
+
static color_t intern_color;
/* Color to assign to newly created headers */
@@ -75,31 +84,64 @@ CAMLnoreturn_start
static void intern_bad_code_pointer(unsigned char digest[16])
CAMLnoreturn_end;
+CAMLnoreturn_start
+static void intern_cleanup_failwith(const char * msg)
+CAMLnoreturn_end;
+
static void intern_free_stack(void);
+Caml_inline void intern_check_read(uintnat len)
+{
+ if (CAMLunlikely(len > intern_src_end - intern_src)) {
+ intern_cleanup_failwith("input_value: invalid read");
+ }
+}
+
+Caml_inline void intern_record_obj(value v)
+{
+ if (intern_obj_table != NULL) {
+ if (CAMLunlikely(obj_counter >= intern_num_objects)) {
+ intern_cleanup_failwith("input_value: too many objects");
+ }
+ intern_obj_table[obj_counter++] = v;
+ }
+}
+
Caml_inline unsigned char read8u(void)
-{ return *intern_src++; }
+{
+ intern_check_read(1);
+ return *intern_src++;
+}
Caml_inline signed char read8s(void)
-{ return *intern_src++; }
+{
+ intern_check_read(1);
+ return *intern_src++;
+}
Caml_inline uint16_t read16u(void)
{
- uint16_t res = (intern_src[0] << 8) + intern_src[1];
+ uint16_t res;
+ intern_check_read(2);
+ res = (intern_src[0] << 8) + intern_src[1];
intern_src += 2;
return res;
}
Caml_inline int16_t read16s(void)
{
- int16_t res = (intern_src[0] << 8) + intern_src[1];
+ int16_t res;
+ intern_check_read(2);
+ res = (intern_src[0] << 8) + intern_src[1];
intern_src += 2;
return res;
}
Caml_inline uint32_t read32u(void)
{
- uint32_t res =
+ uint32_t res;
+ intern_check_read(4);
+ res =
((uint32_t)(intern_src[0]) << 24) + (intern_src[1] << 16)
+ (intern_src[2] << 8) + intern_src[3];
intern_src += 4;
@@ -108,7 +150,9 @@ Caml_inline uint32_t read32u(void)
Caml_inline int32_t read32s(void)
{
- int32_t res =
+ int32_t res;
+ intern_check_read(4);
+ res =
((uint32_t)(intern_src[0]) << 24) + (intern_src[1] << 16)
+ (intern_src[2] << 8) + intern_src[3];
intern_src += 4;
@@ -118,7 +162,9 @@ Caml_inline int32_t read32s(void)
#ifdef ARCH_SIXTYFOUR
static uintnat read64u(void)
{
- uintnat res =
+ uintnat res;
+ intern_check_read(8);
+ res =
((uintnat) (intern_src[0]) << 56)
+ ((uintnat) (intern_src[1]) << 48)
+ ((uintnat) (intern_src[2]) << 40)
@@ -132,13 +178,14 @@ static uintnat read64u(void)
}
#endif
-Caml_inline void readblock(void * dest, intnat len)
+Caml_inline void readblock(void * dest, uintnat len)
{
+ intern_check_read(len);
memcpy(dest, intern_src, len);
intern_src += len;
}
-static void intern_init(void * src, void * input)
+static void intern_init(void * src, uintnat len, void * input)
{
/* This is asserted at the beginning of demarshaling primitives.
If it fails, it probably means that an exception was raised
@@ -146,6 +193,7 @@ static void intern_init(void * src, void * input)
CAMLassert (intern_input == NULL && intern_obj_table == NULL \
&& intern_extra_block == NULL && intern_block == 0);
intern_src = src;
+ intern_src_end = intern_src + len;
intern_input = input;
}
@@ -172,6 +220,12 @@ static void intern_cleanup(void)
intern_free_stack();
}
+static void intern_cleanup_failwith(const char * msg)
+{
+ intern_cleanup();
+ caml_failwith(msg);
+}
+
static void readfloat(double * dest, unsigned int code)
{
if (sizeof(double) != 8) {
@@ -317,6 +371,17 @@ static struct intern_item * intern_resize_stack(struct intern_item * sp)
} \
} while(0)
+Caml_inline value intern_alloc_obj(mlsize_t wosize, tag_t tag)
+{
+ value v = Val_hp(intern_dest);
+ if (CAMLunlikely(wosize >= intern_dest_end - intern_dest)) {
+ intern_cleanup_failwith("input_value: invalid allocation");
+ }
+ *intern_dest = Make_header(wosize, tag, intern_color);
+ intern_dest += 1 + wosize;
+ return v;
+}
+
static void intern_rec(value *dest)
{
unsigned int code;
@@ -349,9 +414,19 @@ static void intern_rec(value *dest)
/* Pop item and iterate */
sp--;
break;
- case OShift:
+ case OShift: {
/* Shift value by an offset */
- *dest += sp->arg;
+ value v = *dest;
+ intnat ofs = sp->arg;
+ if (Is_block(v)
+ && (uintnat) ofs % sizeof(value) == 0
+ && ofs >= 0 && ofs < Bosize_val(v)
+ && Tag_val(v + ofs) == Infix_tag
+ && Infix_offset_val(v + ofs) == ofs)
+ *dest = v + ofs;
+ else
+ intern_cleanup_failwith("input_value: bad infix offset");
+ }
/* Pop item and iterate */
sp--;
break;
@@ -370,13 +445,12 @@ static void intern_rec(value *dest)
if (size == 0) {
v = Atom(tag);
} else {
- v = Val_hp(intern_dest);
- if (intern_obj_table != NULL) intern_obj_table[obj_counter++] = v;
- *intern_dest = Make_header(size, tag, intern_color);
- intern_dest += 1 + size;
+ v = intern_alloc_obj(size, tag);
+ intern_record_obj(v);
/* For objects, we need to freshen the oid */
if (tag == Object_tag) {
- CAMLassert(size >= 2);
+ if (CAMLunlikely(size < 2))
+ intern_cleanup_failwith("input_value: bad object block");
/* Request to read rest of the elements of the block */
ReadItems(&Field(v, 2), size - 2);
/* Request freshing OID */
@@ -399,11 +473,11 @@ static void intern_rec(value *dest)
/* Small string */
len = (code & 0x1F);
read_string:
+ if (CAMLunlikely(len > Bsize_wsize (Max_wosize) - 1))
+ intern_cleanup_failwith("input_value: string too large");
size = (len + sizeof(value)) / sizeof(value);
- v = Val_hp(intern_dest);
- if (intern_obj_table != NULL) intern_obj_table[obj_counter++] = v;
- *intern_dest = Make_header(size, String_tag, intern_color);
- intern_dest += 1 + size;
+ v = intern_alloc_obj(size, String_tag);
+ intern_record_obj(v);
Field(v, size - 1) = 0;
ofs_ind = Bsize_wsize(size) - 1;
Byte(v, ofs_ind) = ofs_ind - len;
@@ -431,10 +505,12 @@ static void intern_rec(value *dest)
case CODE_SHARED8:
ofs = read8u();
read_shared:
- CAMLassert (ofs > 0);
- CAMLassert (ofs <= obj_counter);
- CAMLassert (intern_obj_table != NULL);
- v = intern_obj_table[obj_counter - ofs];
+ ofs = obj_counter - ofs;
+ /* If intern_obj_table is NULL, obj_counter is 0 and the check fails */
+ if (CAMLunlikely(ofs >= obj_counter)) {
+ intern_cleanup_failwith("input_value: invalid shared reference");
+ }
+ v = intern_obj_table[ofs];
break;
case CODE_SHARED16:
ofs = read16u();
@@ -451,12 +527,16 @@ static void intern_rec(value *dest)
header = (header_t) read32u();
tag = Tag_hd(header);
size = Wosize_hd(header);
+ if (CAMLunlikely(tag >= No_scan_tag || tag == Infix_tag))
+ intern_cleanup_failwith("input_value: invalid block32");
goto read_block;
#ifdef ARCH_SIXTYFOUR
case CODE_BLOCK64:
header = (header_t) read64u();
tag = Tag_hd(header);
size = Wosize_hd(header);
+ if (CAMLunlikely(tag >= No_scan_tag || tag == Infix_tag))
+ intern_cleanup_failwith("input_value: invalid block64");
goto read_block;
#endif
case CODE_STRING8:
@@ -472,23 +552,19 @@ static void intern_rec(value *dest)
#endif
case CODE_DOUBLE_LITTLE:
case CODE_DOUBLE_BIG:
- v = Val_hp(intern_dest);
- if (intern_obj_table != NULL) intern_obj_table[obj_counter++] = v;
- *intern_dest = Make_header(Double_wosize, Double_tag,
- intern_color);
- intern_dest += 1 + Double_wosize;
+ v = intern_alloc_obj(Double_wosize, Double_tag);
+ intern_record_obj(v);
readfloat((double *) v, code);
break;
case CODE_DOUBLE_ARRAY8_LITTLE:
case CODE_DOUBLE_ARRAY8_BIG:
len = read8u();
read_double_array:
+ if (len == 0)
+ intern_cleanup_failwith("input_value: invalid double_array");
size = len * Double_wosize;
- v = Val_hp(intern_dest);
- if (intern_obj_table != NULL) intern_obj_table[obj_counter++] = v;
- *intern_dest = Make_header(size, Double_array_tag,
- intern_color);
- intern_dest += 1 + size;
+ v = intern_alloc_obj(size, Double_array_tag);
+ intern_record_obj(v);
readfloats((double *) v, len, code);
break;
case CODE_DOUBLE_ARRAY32_LITTLE:
@@ -531,16 +607,22 @@ static void intern_rec(value *dest)
case CODE_CUSTOM:
case CODE_CUSTOM_LEN:
case CODE_CUSTOM_FIXED: {
- ops = caml_find_custom_operations((char *) intern_src);
+ char * name = (char *) intern_src;
+ unsigned char * name_end = memchr(name, 0, intern_src_end - intern_src);
+ if (name_end == NULL) {
+ intern_cleanup_failwith
+ ("input_value: unterminated custom block identifier");
+ }
+ ops = caml_find_custom_operations(name);
if (ops == NULL) {
- intern_cleanup();
- caml_failwith("input_value: unknown custom block identifier");
+ intern_cleanup_failwith
+ ("input_value: unknown custom block identifier");
}
if (code == CODE_CUSTOM_FIXED && ops->fixed_length == NULL) {
- intern_cleanup();
- caml_failwith("input_value: expected a fixed-size custom block");
+ intern_cleanup_failwith
+ ("input_value: expected a fixed-size custom block");
}
- while (*intern_src++ != 0) /*nothing*/; /*skip identifier*/
+ intern_src = name_end + 1; /*skip identifier*/
if (code == CODE_CUSTOM) {
/* deprecated */
size = ops->deserialize((void *) (intern_dest + 2));
@@ -550,6 +632,7 @@ static void intern_rec(value *dest)
if (code == CODE_CUSTOM_FIXED) {
expected_size = ops->fixed_length->bsize_64;
} else {
+ intern_check_read(4);
intern_src += 4;
expected_size = read64u();
}
@@ -558,6 +641,7 @@ static void intern_rec(value *dest)
expected_size = ops->fixed_length->bsize_32;
} else {
expected_size = read32u();
+ intern_check_read(8);
intern_src += 8;
}
#endif
@@ -569,18 +653,13 @@ static void intern_rec(value *dest)
}
}
size = 1 + (size + sizeof(value) - 1) / sizeof(value);
- v = Val_hp(intern_dest);
- if (intern_obj_table != NULL) intern_obj_table[obj_counter++] = v;
- *intern_dest = Make_header(size, Custom_tag,
- intern_color);
+ v = intern_alloc_obj(size, Custom_tag);
+ intern_record_obj(v);
Custom_ops_val(v) = ops;
-
if (ops->finalize != NULL && Is_young(v)) {
/* Remember that the block has a finalizer. */
add_to_custom_table (Caml_state->custom_table, v, 0, 1);
}
-
- intern_dest += 1 + size;
break;
}
default:
@@ -621,6 +700,7 @@ static void intern_alloc(mlsize_t whsize, mlsize_t num_objects)
}
intern_color = caml_allocation_color(intern_extra_block);
intern_dest = (header_t *) intern_extra_block;
+ intern_dest_end = intern_dest + whsize;
CAMLassert (intern_block == 0);
} else {
/* this is a specialised version of caml_alloc from alloc.c */
@@ -647,16 +727,18 @@ static void intern_alloc(mlsize_t whsize, mlsize_t num_objects)
intern_color = Color_hd(intern_header);
CAMLassert (intern_color == Caml_white || intern_color == Caml_black);
intern_dest = (header_t *) Hp_val(intern_block);
+ intern_dest_end = intern_dest + whsize;
CAMLassert (intern_extra_block == NULL);
}
obj_counter = 0;
if (num_objects > 0) {
intern_obj_table =
- (value *) caml_stat_alloc_noexc(num_objects * sizeof(value));
+ (value *) caml_stat_calloc_noexc(num_objects, sizeof(value));
if (intern_obj_table == NULL) {
intern_cleanup();
caml_raise_out_of_memory();
}
+ intern_num_objects = num_objects;
} else
CAMLassert(intern_obj_table == NULL);
}
@@ -783,10 +865,12 @@ value caml_input_val(struct channel *chan)
else if (r < 20)
caml_failwith("input_value: truncated object");
intern_src = (unsigned char *) header;
+ intern_src_end = (unsigned char *) header + 20;
if (read32u() == Intext_magic_number_big) {
/* Finish reading the header */
if (caml_really_getblock(chan, header + 20, 32 - 20) < 32 - 20)
caml_failwith("input_value: truncated object");
+ intern_src_end = (unsigned char *) header + 32;
}
intern_src = (unsigned char *) header;
caml_parse_header("input_value", &h);
@@ -801,7 +885,7 @@ value caml_input_val(struct channel *chan)
caml_failwith("input_value: truncated object");
}
/* Initialize global state */
- intern_init(block, block);
+ intern_init(block, h.data_len, block);
intern_alloc(h.whsize, h.num_objects);
/* Fill it in */
intern_rec(&res);
@@ -829,13 +913,14 @@ CAMLexport value caml_input_val_from_bytes(value str, intnat ofs)
struct marshal_header h;
/* Initialize global state */
- intern_init(&Byte_u(str, ofs), NULL);
+ intern_init(&Byte_u(str, ofs), caml_string_length(str) - ofs, NULL);
caml_parse_header("input_val_from_string", &h);
if (ofs + h.header_len + h.data_len > caml_string_length(str))
caml_failwith("input_val_from_string: bad length");
/* Allocate result */
intern_alloc(h.whsize, h.num_objects);
intern_src = &Byte_u(str, ofs + h.header_len); /* If a GC occurred */
+ intern_src_end = intern_src + h.data_len;
/* Fill it in */
intern_rec(&obj);
CAMLreturn (intern_end(obj, h.whsize));
@@ -860,10 +945,9 @@ CAMLexport value caml_input_value_from_malloc(char * data, intnat ofs)
{
struct marshal_header h;
- intern_init(data + ofs, data);
-
+ intern_init(data + ofs, 32, data);
caml_parse_header("input_value_from_malloc", &h);
-
+ intern_src_end = intern_src + h.data_len;
return input_val_from_block(&h);
}
@@ -872,11 +956,14 @@ CAMLexport value caml_input_value_from_block(char * data, intnat len)
{
struct marshal_header h;
+ if (len < 0)
+ caml_failwith("input_value_from_block: negative length");
/* Initialize global state */
- intern_init(data, NULL);
+ intern_init(data, len, NULL);
caml_parse_header("input_value_from_block", &h);
if (h.header_len + h.data_len > len)
caml_failwith("input_val_from_block: bad length");
+ intern_src_end = intern_src + h.data_len;
return input_val_from_block(&h);
}
@@ -894,6 +981,7 @@ CAMLprim value caml_marshal_data_size(value buff, value ofs)
uintnat data_len;
intern_src = &Byte_u(buff, Long_val(ofs));
+ intern_src_end = &Byte_u(buff, caml_string_length(buff));
magic = read32u();
switch(magic) {
case Intext_magic_number_small: