File expat-CVE-2023-52425-1.patch of Package expat.32868

From 119ae277abaabd4d17b2e64300fec712ef403b28 Mon Sep 17 00:00:00 2001
From: Snild Dolkow <snild@sony.com>
Date: Thu, 28 Sep 2023 18:26:19 +0200
Subject: [PATCH] Grow buffer based on current size

Until now, the buffer size to grow to has been calculated based on the
distance from the current parse position to the end of the buffer. This
means that the size of any already-parsed data was not considered,
leading to inconsistent buffer growth.

There was also a special case in XML_Parse() when XML_CONTEXT_BYTES was
zero, where the buffer size would be set to twice the incoming string
length. This patch replaces this with an XML_GetBuffer() call.

Growing the buffer based on its total size makes its growth consistent.

The commit includes a test that checks that we can reach the max buffer
size (usually INT_MAX/2 + 1) regardless of previously parsed content.

GitHub CI couldn't allocate the full 1GiB with MinGW/wine32, though it
works locally with the same compiler and wine version. As a workaround,
the test tries to malloc 1GiB, and reduces `maxbuf` to 512MiB in case
of failure.
---
 expat/lib/xmlparse.c      | 33 ++++++++++++++--------------
 expat/tests/basic_tests.c | 45 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+), 17 deletions(-)

Index: expat-2.4.4/lib/xmlparse.c
===================================================================
--- expat-2.4.4.orig/lib/xmlparse.c
+++ expat-2.4.4/lib/xmlparse.c
@@ -1936,23 +1936,22 @@ XML_Parse(XML_Parser parser, const char
                       &parser->m_position);
     nLeftOver = s + len - end;
     if (nLeftOver) {
-      if (parser->m_buffer == NULL
-          || nLeftOver > parser->m_bufferLim - parser->m_buffer) {
-        /* avoid _signed_ integer overflow */
-        char *temp = NULL;
-        const int bytesToAllocate = (int)((unsigned)len * 2U);
-        if (bytesToAllocate > 0) {
-          temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate);
-        }
-        if (temp == NULL) {
-          parser->m_errorCode = XML_ERROR_NO_MEMORY;
-          parser->m_eventPtr = parser->m_eventEndPtr = NULL;
-          parser->m_processor = errorProcessor;
-          return XML_STATUS_ERROR;
-        }
-        parser->m_buffer = temp;
-        parser->m_bufferLim = parser->m_buffer + bytesToAllocate;
+      // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED
+      // (and XML_ERROR_FINISHED) from XML_GetBuffer.
+      const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing;
+      parser->m_parsingStatus.parsing = XML_PARSING;
+      void *const temp = XML_GetBuffer(parser, nLeftOver);
+      parser->m_parsingStatus.parsing = originalStatus;
+      if (temp == NULL) {
+        // NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
+        parser->m_eventPtr = parser->m_eventEndPtr = NULL;
+        parser->m_processor = errorProcessor;
+        return XML_STATUS_ERROR;
       }
+      // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we
+      // don't have any data to preserve, and can copy straight into the start
+      // of the buffer rather than the GetBuffer return pointer (which may be
+      // pointing further into the allocated buffer).
       memcpy(parser->m_buffer, end, nLeftOver);
     }
     parser->m_bufferPtr = parser->m_buffer;
@@ -2108,7 +2107,7 @@ XML_GetBuffer(XML_Parser parser, int len
     } else {
       char *newBuf;
       int bufferSize
-          = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr);
+          = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer);
       if (bufferSize == 0)
         bufferSize = INIT_BUFFER_SIZE;
       do {
Index: expat-2.4.4/tests/runtests.c
===================================================================
--- expat-2.4.4.orig/tests/runtests.c
+++ expat-2.4.4/tests/runtests.c
@@ -6663,6 +6663,51 @@ START_TEST(test_empty_element_abort) {
 }
 END_TEST
 
+START_TEST(test_buffer_can_grow_to_max) {
+  const char *const prefixes[] = {
+      "",
+      "<",
+      "<x a='",
+      "<doc><x a='",
+      "<document><x a='",
+      "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
+      "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
+      "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
+      "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
+      "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
+  const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
+  int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
+  if (sizeof(void *) < 8) {
+    // Looks like we have a 32-bit system. Can we make a big allocation?
+    void *big = malloc(maxbuf);
+    if (! big) {
+      // The big allocation failed. Let's be a little lenient.
+      maxbuf = maxbuf / 2;
+    }
+    free(big);
+  }
+
+  for (int i = 0; i < num_prefixes; ++i) {
+    // set_subtest("\"%s\"", prefixes[i]);
+    XML_Parser parser = XML_ParserCreate(NULL);
+    const int prefix_len = (int)strlen(prefixes[i]);
+    const enum XML_Status s
+        = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
+    if (s != XML_STATUS_OK)
+      xml_failure(parser);
+
+    // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
+    // subtracting the whole prefix is easiest, and close enough.
+    assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
+    // The limit should be consistent; no prefix should allow us to
+    // reach above the max buffer size.
+    assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
+    XML_ParserFree(parser);
+  }
+}
+END_TEST
+
+
 /*
  * Namespaces tests.
  */
@@ -12133,6 +12178,7 @@ make_suite(void) {
   tcase_add_test(tc_basic, test_bad_notation);
   tcase_add_test(tc_basic, test_default_doctype_handler);
   tcase_add_test(tc_basic, test_empty_element_abort);
+  tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
 
   suite_add_tcase(s, tc_namespace);
   tcase_add_checked_fixture(tc_namespace, namespace_setup, namespace_teardown);
Index: expat-2.4.4/tests/minicheck.h
===================================================================
--- expat-2.4.4.orig/tests/minicheck.h
+++ expat-2.4.4/tests/minicheck.h
@@ -64,7 +64,13 @@ extern "C" {
   }                                                                            \
   }
 
-#define fail(msg) _fail_unless(0, __FILE__, __LINE__, msg)
+#define fail(msg) _fail(__FILE__, __LINE__, msg)
+#define assert_true(cond)                                                    \
+  do {                                                                       \
+    if (! (cond)) {                                                          \
+      _fail(__FILE__, __LINE__, "check failed: " #cond);                     \
+    }                                                                        \
+  } while (0)
 
 typedef void (*tcase_setup_function)(void);
 typedef void (*tcase_teardown_function)(void);
@@ -104,6 +110,10 @@ void _check_set_test_info(char const *fu
  */
 
 void _fail_unless(int condition, const char *file, int line, const char *msg);
+#  if defined(__GNUC__)
+__attribute__((noreturn))
+#  endif
+void _fail(const char *file, int line, const char *msg);
 Suite *suite_create(const char *name);
 TCase *tcase_create(const char *name);
 void suite_add_tcase(Suite *suite, TCase *tc);
Index: expat-2.4.4/tests/minicheck.c
===================================================================
--- expat-2.4.4.orig/tests/minicheck.c
+++ expat-2.4.4/tests/minicheck.c
@@ -224,6 +224,21 @@ _fail_unless(int condition, const char *
   longjmp(env, 1);
 }
 
+void
+_fail(const char *file, int line, const char *msg) {
+  /* Always print the error message so it isn't lost.  In this case,
+     we have a failure, so there's no reason to be quiet about what
+     it is.
+  */
+  _check_current_filename = file;
+  _check_current_lineno = line;
+  if (msg != NULL) {
+    const int has_newline = (msg[strlen(msg) - 1] == '\n');
+    fprintf(stderr, "ERROR: %s%s", msg, has_newline ? "" : "\n");
+  }
+  longjmp(env, 1);
+}
+
 int
 srunner_ntests_failed(SRunner *runner) {
   assert(runner != NULL);
openSUSE Build Service is sponsored by