File CVE-2026-28364.patch of Package ocaml.42958

From b0a2614684a52acded784ec213f14ddfe085d146 Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@college-de-france.fr>
Date: Mon, 12 Jan 2026 11:56:01 +0100
Subject: [PATCH] robustify intern.c

---
 byterun/caml/misc.h |   9 ++
 byterun/intern.c    | 196 ++++++++++++++++++++++++++++++++------------
 2 files changed, 151 insertions(+), 54 deletions(-)

Index: b/byterun/caml/misc.h
===================================================================
--- a/byterun/caml/misc.h
+++ b/byterun/caml/misc.h
@@ -119,6 +119,15 @@ CAMLextern void caml_fatal_error_arg2 (c
                                        char *fmt2, char *arg2)
 CAMLnoreturn_end;
 
+/* Branch prediction */
+#if defined(__GNUC__)
+#define CAMLlikely(e)   __builtin_expect(!!(e), 1)
+#define CAMLunlikely(e) __builtin_expect(!!(e), 0)
+#else
+#define CAMLlikely(e) (e)
+#define CAMLunlikely(e) (e)
+#endif
+
 /* Safe string operations */
 
 CAMLextern char * caml_strdup(const char * s);
Index: b/byterun/intern.c
===================================================================
--- a/byterun/intern.c
+++ b/byterun/intern.c
@@ -38,6 +38,9 @@
 static unsigned char * intern_src;
 /* Reading pointer in block holding input data. */
 
+static unsigned char * intern_src_end;
+/* Pointer to the end of the readable data. */
+
 static unsigned char * intern_input = NULL;
 /* Pointer to beginning of block holding input data,
    if non-NULL this pointer will be freed by the cleanup function. */
@@ -45,6 +48,9 @@ static unsigned char * intern_input = NU
 static header_t * intern_dest;
 /* Writing pointer in destination block */
 
+static header_t * intern_dest_end;
+/* Pointer to the end of the destination block */
+
 static char * intern_extra_block = NULL;
 /* If non-NULL, point to new heap chunk allocated with caml_alloc_for_heap. */
 
@@ -54,6 +60,9 @@ static asize_t obj_counter;
 static value * intern_obj_table = NULL;
 /* The pointers to objects already seen */
 
+static uintnat intern_num_objects;
+/* How many objects are expected (from the header) */
+
 static unsigned int intern_color;
 /* Color to assign to newly created headers */
 
@@ -72,31 +81,64 @@ CAMLnoreturn_start
 static void intern_bad_code_pointer(unsigned char digest[16])
 CAMLnoreturn_end;
 
+CAMLnoreturn_start
+static void intern_cleanup_failwith(const char * msg)
+CAMLnoreturn_end;
+
 static void intern_free_stack(void);
 
+static inline void intern_check_read(uintnat len)
+{
+  if (CAMLunlikely(len > intern_src_end - intern_src)) {
+    intern_cleanup_failwith("input_value: invalid read");
+  }
+}
+
+static inline void intern_record_obj(value v)
+{
+  if (intern_obj_table != NULL) {
+    if (CAMLunlikely(obj_counter >= intern_num_objects)) {
+      intern_cleanup_failwith("input_value: too many objects");
+    }
+    intern_obj_table[obj_counter++] = v;
+  }
+}
+
 static inline unsigned char read8u(void)
-{ return *intern_src++; }
+{
+  intern_check_read(1);
+  return *intern_src++;
+}
 
 static inline signed char read8s(void)
-{ return *intern_src++; }
+{
+  intern_check_read(1);
+  return *intern_src++;
+}
 
 static inline uint16_t read16u(void)
 {
-  uint16_t res = (intern_src[0] << 8) + intern_src[1];
+  uint16_t res;
+  intern_check_read(2);
+  res = (intern_src[0] << 8) + intern_src[1];
   intern_src += 2;
   return res;
 }
 
 static inline int16_t read16s(void)
 {
-  int16_t res = (intern_src[0] << 8) + intern_src[1];
+  int16_t res;
+  intern_check_read(2);
+  res = (intern_src[0] << 8) + intern_src[1];
   intern_src += 2;
   return res;
 }
 
 static inline uint32_t read32u(void)
 {
-  uint32_t res =
+  uint32_t res;
+  intern_check_read(4);
+  res =
     ((uint32_t)(intern_src[0]) << 24) + (intern_src[1] << 16)
     + (intern_src[2] << 8) + intern_src[3];
   intern_src += 4;
@@ -105,7 +147,9 @@ static inline uint32_t read32u(void)
 
 static inline int32_t read32s(void)
 {
-  int32_t res =
+  int32_t res;
+  intern_check_read(4);
+  res =
     ((uint32_t)(intern_src[0]) << 24) + (intern_src[1] << 16)
     + (intern_src[2] << 8) + intern_src[3];
   intern_src += 4;
@@ -115,7 +159,9 @@ static inline int32_t read32s(void)
 #ifdef ARCH_SIXTYFOUR
 static uintnat read64u(void)
 {
-  uintnat res =
+  uintnat res;
+  intern_check_read(8);
+  res =
     ((uintnat) (intern_src[0]) << 56)
     + ((uintnat) (intern_src[1]) << 48)
     + ((uintnat) (intern_src[2]) << 40)
@@ -129,13 +175,14 @@ static uintnat read64u(void)
 }
 #endif
 
-static inline void readblock(void * dest, intnat len)
+static inline void readblock(void * dest, uintnat len)
 {
+  intern_check_read(len);
   memcpy(dest, intern_src, len);
   intern_src += len;
 }
 
-static void intern_init(void * src, void * input)
+static void intern_init(void * src, uintnat len, void * input)
 {
   /* This is asserted at the beginning of demarshaling primitives.
      If it fails, it probably means that an exception was raised
@@ -143,6 +190,7 @@ static void intern_init(void * src, void
   Assert (intern_input == NULL && intern_obj_table == NULL \
      && intern_extra_block == NULL && intern_block == 0);
   intern_src = src;
+  intern_src_end = intern_src + len;
   intern_input = input;
 }
 
@@ -169,6 +217,12 @@ static void intern_cleanup(void)
   intern_free_stack();
 }
 
+static void intern_cleanup_failwith(const char * msg)
+{
+  intern_cleanup();
+  caml_failwith(msg);
+}
+
 static void readfloat(double * dest, unsigned int code)
 {
   if (sizeof(double) != 8) {
@@ -310,6 +364,17 @@ static struct intern_item * intern_resiz
     }                                                                   \
   } while(0)
 
+static inline value intern_alloc_obj(mlsize_t wosize, tag_t tag)
+{
+  value v = Val_hp(intern_dest);
+  if (CAMLunlikely(wosize >= intern_dest_end - intern_dest)) {
+    intern_cleanup_failwith("input_value: invalid allocation");
+  }
+  *intern_dest = Make_header_allocated_here(wosize, tag, intern_color);
+  intern_dest += 1 + wosize;
+  return v;
+}
+
 static void intern_rec(value *dest)
 {
   unsigned int code;
@@ -342,9 +407,19 @@ static void intern_rec(value *dest)
     /* Pop item and iterate */
     sp--;
     break;
-  case OShift:
+  case OShift: {
     /* Shift value by an offset */
-    *dest += sp->arg;
+    value v = *dest;
+    intnat ofs = sp->arg;
+    if (Is_block(v)
+        && (uintnat) ofs % sizeof(value) == 0
+        && ofs >= 0 && ofs < Bosize_val(v)
+        && Tag_val(v + ofs) == Infix_tag
+        && Infix_offset_val(v + ofs) == ofs)
+      *dest = v + ofs;
+    else
+      intern_cleanup_failwith("input_value: bad infix offset");
+    }
     /* Pop item and iterate */
     sp--;
     break;
@@ -363,13 +438,12 @@ static void intern_rec(value *dest)
       if (size == 0) {
         v = Atom(tag);
       } else {
-        v = Val_hp(intern_dest);
-        if (intern_obj_table != NULL) intern_obj_table[obj_counter++] = v;
-        *intern_dest = Make_header_allocated_here(size, tag, intern_color);
-        intern_dest += 1 + size;
+        v = intern_alloc_obj(size, tag);
+        intern_record_obj(v);
         /* For objects, we need to freshen the oid */
         if (tag == Object_tag) {
-          Assert(size >= 2);
+          if (CAMLunlikely(size < 2))
+            intern_cleanup_failwith("input_value: bad object block");
           /* Request to read rest of the elements of the block */
           ReadItems(&Field(v, 2), size - 2);
           /* Request freshing OID */
@@ -392,11 +466,11 @@ static void intern_rec(value *dest)
       /* Small string */
       len = (code & 0x1F);
     read_string:
+      if (CAMLunlikely(len > Bsize_wsize (Max_wosize) - 1))
+        intern_cleanup_failwith("input_value: string too large");
       size = (len + sizeof(value)) / sizeof(value);
-      v = Val_hp(intern_dest);
-      if (intern_obj_table != NULL) intern_obj_table[obj_counter++] = v;
-      *intern_dest = Make_header_allocated_here(size, String_tag, intern_color);
-      intern_dest += 1 + size;
+      v = intern_alloc_obj(size, String_tag);
+      intern_record_obj(v);
       Field(v, size - 1) = 0;
       ofs_ind = Bsize_wsize(size) - 1;
       Byte(v, ofs_ind) = ofs_ind - len;
@@ -424,10 +498,12 @@ static void intern_rec(value *dest)
       case CODE_SHARED8:
         ofs = read8u();
       read_shared:
-        Assert (ofs > 0);
-        Assert (ofs <= obj_counter);
-        Assert (intern_obj_table != NULL);
-        v = intern_obj_table[obj_counter - ofs];
+        ofs = obj_counter - ofs;
+        /* If intern_obj_table is NULL, obj_counter is 0 and the check fails */
+        if (CAMLunlikely(ofs >= obj_counter)) {
+          intern_cleanup_failwith("input_value: invalid shared reference");
+        }
+        v = intern_obj_table[ofs];
         break;
       case CODE_SHARED16:
         ofs = read16u();
@@ -444,12 +520,16 @@ static void intern_rec(value *dest)
         header = (header_t) read32u();
         tag = Tag_hd(header);
         size = Wosize_hd(header);
+        if (CAMLunlikely(tag >= No_scan_tag || tag == Infix_tag))
+          intern_cleanup_failwith("input_value: invalid block32");
         goto read_block;
 #ifdef ARCH_SIXTYFOUR
       case CODE_BLOCK64:
         header = (header_t) read64u();
         tag = Tag_hd(header);
         size = Wosize_hd(header);
+        if (CAMLunlikely(tag >= No_scan_tag || tag == Infix_tag))
+          intern_cleanup_failwith("input_value: invalid block64");
         goto read_block;
 #endif
       case CODE_STRING8:
@@ -465,23 +545,19 @@ static void intern_rec(value *dest)
 #endif
       case CODE_DOUBLE_LITTLE:
       case CODE_DOUBLE_BIG:
-        v = Val_hp(intern_dest);
-        if (intern_obj_table != NULL) intern_obj_table[obj_counter++] = v;
-        *intern_dest = Make_header_allocated_here(Double_wosize, Double_tag,
-                                                  intern_color);
-        intern_dest += 1 + Double_wosize;
+        v = intern_alloc_obj(Double_wosize, Double_tag);
+        intern_record_obj(v);
         readfloat((double *) v, code);
         break;
       case CODE_DOUBLE_ARRAY8_LITTLE:
       case CODE_DOUBLE_ARRAY8_BIG:
         len = read8u();
       read_double_array:
+        if (len == 0)
+          intern_cleanup_failwith("input_value: invalid double_array");
         size = len * Double_wosize;
-        v = Val_hp(intern_dest);
-        if (intern_obj_table != NULL) intern_obj_table[obj_counter++] = v;
-        *intern_dest = Make_header_allocated_here(size, Double_array_tag,
-                                                  intern_color);
-        intern_dest += 1 + size;
+        v = intern_alloc_obj(size, Double_array_tag);
+        intern_record_obj(v);
         readfloats((double *) v, len, code);
         break;
       case CODE_DOUBLE_ARRAY32_LITTLE:
@@ -521,26 +597,28 @@ static void intern_rec(value *dest)
         ReadItems(dest, 1);
         continue;  /* with next iteration of main loop, skipping *dest = v */
-      case CODE_CUSTOM:
-        ops = caml_find_custom_operations((char *) intern_src);
+      case CODE_CUSTOM: {
+        char * name = (char *) intern_src;
+        unsigned char * name_end = memchr(name, 0, intern_src_end - intern_src);
+        if (name_end == NULL) {
+          intern_cleanup_failwith
+            ("input_value: unterminated custom block identifier");
+        }
+        ops = caml_find_custom_operations(name);
         if (ops == NULL) {
-          intern_cleanup();
-          caml_failwith("input_value: unknown custom block identifier");
+          intern_cleanup_failwith
+            ("input_value: unknown custom block identifier");
         }
-        while (*intern_src++ != 0) /*nothing*/;  /*skip identifier*/
+        intern_src = name_end + 1; /*skip identifier*/
         size = ops->deserialize((void *) (intern_dest + 2));
         size = 1 + (size + sizeof(value) - 1) / sizeof(value);
-        v = Val_hp(intern_dest);
-        if (intern_obj_table != NULL) intern_obj_table[obj_counter++] = v;
-        *intern_dest = Make_header_allocated_here(size, Custom_tag,
-                                                  intern_color);
+        v = intern_alloc_obj(size, Custom_tag);
+        intern_record_obj(v);
         Custom_ops_val(v) = ops;
-
         if (ops->finalize != NULL && Is_young(v)) {
           /* Remember that the block has a finalizer. */
           add_to_custom_table (&caml_custom_table, v, 0, 1);
         }
-
-        intern_dest += 1 + size;
         break;
+      }
       default:
         intern_cleanup();
@@ -582,6 +659,7 @@ static void intern_alloc(mlsize_t whsize
     intern_color =
       outside_heap ? Caml_black : caml_allocation_color(intern_extra_block);
     intern_dest = (header_t *) intern_extra_block;
+    intern_dest_end = intern_dest + whsize;
     Assert (intern_block == 0);
   } else {
     /* this is a specialised version of caml_alloc from alloc.c */
@@ -602,15 +680,17 @@ static void intern_alloc(mlsize_t whsize
     intern_color = Color_hd(intern_header);
     Assert (intern_color == Caml_white || intern_color == Caml_black);
     intern_dest = (header_t *) Hp_val(intern_block);
+    intern_dest_end = intern_dest + whsize;
     Assert (intern_extra_block == NULL);
   }
   obj_counter = 0;
   if (num_objects > 0) {
-    intern_obj_table = (value *) malloc(num_objects * sizeof(value));
+    intern_obj_table = (value *) calloc(num_objects, sizeof(value));
     if (intern_obj_table == NULL) {
       intern_cleanup();
       caml_raise_out_of_memory();
     }
+    intern_num_objects = num_objects;
   } else
     Assert(intern_obj_table == NULL);
 }
@@ -710,10 +790,12 @@ static value caml_input_val_core(struct
   else if (r < 20)
     caml_failwith("input_value: truncated object");
   intern_src = (unsigned char *) header;
+  intern_src_end = (unsigned char *) header + 20;
   if (read32u() == Intext_magic_number_big) {
     /* Finish reading the header */
     if (caml_really_getblock(chan, header + 20, 32 - 20) < 32 - 20)
       caml_failwith("input_value: truncated object");
+    intern_src_end = (unsigned char *) header + 32;
   }
   intern_src = (unsigned char *) header;
   caml_parse_header("input_value", &h);
@@ -728,7 +810,7 @@ static value caml_input_val_core(struct
     caml_failwith("input_value: truncated object");
   }
   /* Initialize global state */
-  intern_init(block, block);
+  intern_init(block, h.data_len, block);
   intern_alloc(h.whsize, h.num_objects, outside_heap);
   /* Fill it in */
   intern_rec(&res);
@@ -782,13 +864,14 @@ CAMLexport value caml_input_val_from_str
   struct marshal_header h;
 
   /* Initialize global state */
-  intern_init(&Byte_u(str, ofs), NULL);
+  intern_init(&Byte_u(str, ofs), caml_string_length(str) - ofs, NULL);
   caml_parse_header("input_val_from_string", &h);
   if (ofs + h.header_len + h.data_len > caml_string_length(str))
     caml_failwith("input_val_from_string: bad length");
   /* Allocate result */
   intern_alloc(h.whsize, h.num_objects, 0);
   intern_src = &Byte_u(str, ofs + h.header_len); /* If a GC occurred */
+  intern_src_end = intern_src + h.data_len;
   /* Fill it in */
   intern_rec(&obj);
   intern_add_to_heap(h.whsize);
@@ -819,10 +902,9 @@ CAMLexport value caml_input_value_from_m
 {
   struct marshal_header h;
 
-  intern_init(data + ofs, data);
-
+  intern_init(data + ofs, 32, data);
   caml_parse_header("input_value_from_malloc", &h);
-
+  intern_src_end = intern_src + h.data_len;
   return input_val_from_block(&h);
 }
 
@@ -831,11 +913,14 @@ CAMLexport value caml_input_value_from_b
 {
   struct marshal_header h;
 
+  if (len < 0)
+    caml_failwith("input_value_from_block: negative length");
   /* Initialize global state */
-  intern_init(data, NULL);
+  intern_init(data, len, NULL);
   caml_parse_header("input_value_from_block", &h);
   if (h.header_len + h.data_len > len)
     caml_failwith("input_val_from_block: bad length");
+  intern_src_end = intern_src + h.data_len;
   return input_val_from_block(&h);
 }
 
@@ -853,6 +938,7 @@ CAMLprim value caml_marshal_data_size(va
   uintnat data_len;
 
   intern_src = &Byte_u(buff, Long_val(ofs));
+  intern_src_end = &Byte_u(buff, caml_string_length(buff));
   magic = read32u();
   switch(magic) {
   case Intext_magic_number_small:
openSUSE Build Service is sponsored by