File expat-CVE-2023-52425-2.patch of Package expat.36379

Index: expat-2.4.4/Makefile.am
===================================================================
--- expat-2.4.4.orig/Makefile.am
+++ expat-2.4.4/Makefile.am
@@ -129,6 +129,11 @@ buildlib:
 run-benchmark:
 	$(MAKE) -C tests/benchmark
 	./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/recset.xml 65535 3
+	./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_attr.xml 4096 3
+	./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_cdata.xml 4096 3
+	./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_comment.xml 4096 3
+	./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_tag.xml 4096 3
+	./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_text.xml 4096 3
 
 .PHONY: download-xmlts-zip
 download-xmlts-zip:
Index: expat-2.4.4/doc/reference.html
===================================================================
--- expat-2.4.4.orig/doc/reference.html
+++ expat-2.4.4/doc/reference.html
@@ -150,10 +150,11 @@ interface.</p>
     </ul>
     </li>
     <li>
-      <a href="#billion-laughs">Billion Laughs Attack Protection</a>
+      <a href="#attack-protection">Attack Protection</a>
       <ul>
         <li><a href="#XML_SetBillionLaughsAttackProtectionMaximumAmplification">XML_SetBillionLaughsAttackProtectionMaximumAmplification</a></li>
         <li><a href="#XML_SetBillionLaughsAttackProtectionActivationThreshold">XML_SetBillionLaughsAttackProtectionActivationThreshold</a></li>
+        <li><a href="#XML_SetReparseDeferralEnabled">XML_SetReparseDeferralEnabled</a></li>
       </ul>
     </li>
     <li><a href="#miscellaneous">Miscellaneous Functions</a>
@@ -2090,11 +2091,7 @@ parse position may be before the beginni
 return NULL.</p>
 </div>
 
-<h3><a name="billion-laughs">Billion Laughs Attack Protection</a></h3>
-
-<p>The functions in this section configure the built-in
-  protection against various forms of
-  <a href="https://en.wikipedia.org/wiki/Billion_laughs_attack">billion laughs attacks</a>.</p>
+<h3><a name="attack-protection">Attack Protection</a><a name="billion-laughs"></a></h3>
 
 <h4 id="XML_SetBillionLaughsAttackProtectionMaximumAmplification">XML_SetBillionLaughsAttackProtectionMaximumAmplification</h4>
 <pre class="fcndec">
@@ -2182,6 +2179,27 @@ XML_SetBillionLaughsAttackProtectionActi
   </p>
 </div>
 
+<h4 id="XML_SetReparseDeferralEnabled">XML_SetReparseDeferralEnabled</h4>
+<pre class="fcndec">
+/* Added in Expat 2.6.0. */
+XML_Bool XMLCALL
+XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);
+</pre>
+<div class="fcndef">
+  <p>
+    Large tokens may require many parse calls before enough data is available for Expat to parse it in full.
+    If Expat retried parsing the token on every parse call, parsing could take quadratic time.
+    To avoid this, Expat only retries once a significant amount of new data is available.
+    This function allows disabling this behavior.
+  </p>
+  <p>
+    The <code>enabled</code> argument should be <code>XML_TRUE</code> or <code>XML_FALSE</code>.
+  </p>
+  <p>
+    Returns <code>XML_TRUE</code> on success, and <code>XML_FALSE</code> on error.
+  </p>
+</div>
+
 <h3><a name="miscellaneous">Miscellaneous functions</a></h3>
 
 <p>The functions in this section either obtain state information from
Index: expat-2.4.4/doc/xmlwf.xml
===================================================================
--- expat-2.4.4.orig/doc/xmlwf.xml
+++ expat-2.4.4/doc/xmlwf.xml
@@ -314,6 +314,16 @@ supports both.
       </varlistentry>
 
       <varlistentry>
+        <term><option>-q</option></term>
+        <listitem>
+          <para>
+            Disable reparse deferral, and allow quadratic parse runtime
+            on large tokens (default: reparse deferral enabled).
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
         <term><option>-r</option></term>
         <listitem>
 		<para>
Index: expat-2.4.4/lib/expat.h
===================================================================
--- expat-2.4.4.orig/lib/expat.h
+++ expat-2.4.4/lib/expat.h
@@ -15,6 +15,7 @@
    Copyright (c) 2016      Cristian Rodríguez <crrodriguez@opensuse.org>
    Copyright (c) 2016      Thomas Beutlich <tc@tbeu.de>
    Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
+   Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com>
    Licensed under the MIT license:
 
    Permission is  hereby granted,  free of charge,  to any  person obtaining
@@ -1047,6 +1048,10 @@ XML_SetBillionLaughsAttackProtectionActi
     XML_Parser parser, unsigned long long activationThresholdBytes);
 #endif
 
+/* Added in Expat 2.6.0. */
+XMLPARSEAPI(XML_Bool)
+XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);
+
 /* Expat follows the semantic versioning convention.
    See http://semver.org.
 */
Index: expat-2.4.4/lib/internal.h
===================================================================
--- expat-2.4.4.orig/lib/internal.h
+++ expat-2.4.4/lib/internal.h
@@ -31,6 +31,7 @@
    Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
    Copyright (c) 2018      Yury Gribov <tetra2005@gmail.com>
    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
+   Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com>
    Licensed under the MIT license:
 
    Permission is  hereby granted,  free of charge,  to any  person obtaining
@@ -158,6 +159,9 @@ unsigned long long testingAccountingGetC
 const char *unsignedCharToPrintable(unsigned char c);
 #endif
 
+extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c
+extern unsigned int g_parseAttempts;             // used for testing only
+
 #ifdef __cplusplus
 }
 #endif
Index: expat-2.4.4/lib/libexpat.def
===================================================================
--- expat-2.4.4.orig/lib/libexpat.def
+++ expat-2.4.4/lib/libexpat.def
@@ -78,3 +78,5 @@ EXPORTS
 ; added with version 2.4.0
   XML_SetBillionLaughsAttackProtectionActivationThreshold @69
   XML_SetBillionLaughsAttackProtectionMaximumAmplification @70
+; added with version 2.6.0
+  XML_SetReparseDeferralEnabled @71
\ No newline at end of file
Index: expat-2.4.4/lib/libexpatw.def
===================================================================
--- expat-2.4.4.orig/lib/libexpatw.def
+++ expat-2.4.4/lib/libexpatw.def
@@ -78,3 +78,5 @@ EXPORTS
 ; added with version 2.4.0
   XML_SetBillionLaughsAttackProtectionActivationThreshold @69
   XML_SetBillionLaughsAttackProtectionMaximumAmplification @70
+; added with version 2.6.0
+  XML_SetReparseDeferralEnabled @71
\ No newline at end of file
Index: expat-2.4.4/lib/xmlparse.c
===================================================================
--- expat-2.4.4.orig/lib/xmlparse.c
+++ expat-2.4.4/lib/xmlparse.c
@@ -71,6 +71,7 @@
 #  endif
 #endif
 
+#include <stdbool.h>
 #include <stddef.h>
 #include <string.h> /* memset(), memcpy() */
 #include <assert.h>
@@ -194,6 +195,8 @@ typedef char ICHAR;
 /* Do safe (NULL-aware) pointer arithmetic */
 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
 
+#define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
+
 #include "internal.h"
 #include "xmltok.h"
 #include "xmlrole.h"
@@ -600,6 +603,9 @@ static unsigned long getDebugLevel(const
        ? 0                                                                     \
        : ((*((pool)->ptr)++ = c), 1))
 
+XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c
+unsigned int g_parseAttempts = 0;                    // used for testing only
+
 struct XML_ParserStruct {
   /* The first member must be m_userData so that the XML_GetUserData
      macro works. */
@@ -615,6 +621,9 @@ struct XML_ParserStruct {
   const char *m_bufferLim;
   XML_Index m_parseEndByteIndex;
   const char *m_parseEndPtr;
+  size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
+  XML_Bool m_reparseDeferralEnabled;
+  int m_lastBufferRequestSize;
   XML_Char *m_dataBuf;
   XML_Char *m_dataBufEnd;
   XML_StartElementHandler m_startElementHandler;
@@ -945,6 +954,47 @@ get_hash_secret_salt(XML_Parser parser)
   return parser->m_hash_secret_salt;
 }
 
+static enum XML_Error
+callProcessor(XML_Parser parser, const char *start, const char *end,
+              const char **endPtr) {
+  const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
+
+  if (parser->m_reparseDeferralEnabled
+      && ! parser->m_parsingStatus.finalBuffer) {
+    // Heuristic: don't try to parse a partial token again until the amount of
+    // available data has increased significantly.
+    const size_t had_before = parser->m_partialTokenBytesBefore;
+    // ...but *do* try anyway if we're close to causing a reallocation.
+    size_t available_buffer
+        = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
+#if XML_CONTEXT_BYTES > 0
+    available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES);
+#endif
+    available_buffer
+        += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd);
+    // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
+    const bool enough
+        = (have_now >= 2 * had_before)
+          || ((size_t)parser->m_lastBufferRequestSize > available_buffer);
+
+    if (! enough) {
+      *endPtr = start; // callers may expect this to be set
+      return XML_ERROR_NONE;
+    }
+  }
+  g_parseAttempts += 1;
+  const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
+  if (ret == XML_ERROR_NONE) {
+    // if we consumed nothing, remember what we had on this parse attempt.
+    if (*endPtr == start) {
+      parser->m_partialTokenBytesBefore = have_now;
+    } else {
+      parser->m_partialTokenBytesBefore = 0;
+    }
+  }
+  return ret;
+}
+
 static XML_Bool /* only valid for root parser */
 startParsing(XML_Parser parser) {
   /* hash functions must be initialized before setContext() is called */
@@ -1126,6 +1176,9 @@ parserInit(XML_Parser parser, const XML_
   parser->m_bufferEnd = parser->m_buffer;
   parser->m_parseEndByteIndex = 0;
   parser->m_parseEndPtr = NULL;
+  parser->m_partialTokenBytesBefore = 0;
+  parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
+  parser->m_lastBufferRequestSize = 0;
   parser->m_declElementType = NULL;
   parser->m_declAttributeId = NULL;
   parser->m_declEntity = NULL;
@@ -1295,6 +1348,7 @@ XML_ExternalEntityParserCreate(XML_Parse
      to worry which hash secrets each table has.
   */
   unsigned long oldhash_secret_salt;
+  XML_Bool oldReparseDeferralEnabled;
 
   /* Validate the oldParser parameter before we pull everything out of it */
   if (oldParser == NULL)
@@ -1339,6 +1393,7 @@ XML_ExternalEntityParserCreate(XML_Parse
      to worry which hash secrets each table has.
   */
   oldhash_secret_salt = parser->m_hash_secret_salt;
+  oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled;
 
 #ifdef XML_DTD
   if (! context)
@@ -1391,6 +1446,7 @@ XML_ExternalEntityParserCreate(XML_Parse
   parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
   parser->m_ns_triplets = oldns_triplets;
   parser->m_hash_secret_salt = oldhash_secret_salt;
+  parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled;
   parser->m_parentParser = oldParser;
 #ifdef XML_DTD
   parser->m_paramEntityParsing = oldParamEntityParsing;
@@ -1844,56 +1900,8 @@ XML_Parse(XML_Parser parser, const char
   default:
     parser->m_parsingStatus.parsing = XML_PARSING;
   }
-
-  if (len == 0) {
-    parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
-    if (! isFinal)
-      return XML_STATUS_OK;
-    parser->m_positionPtr = parser->m_bufferPtr;
-    parser->m_parseEndPtr = parser->m_bufferEnd;
-
-    /* If data are left over from last buffer, and we now know that these
-       data are the final chunk of input, then we have to check them again
-       to detect errors based on that fact.
-    */
-    parser->m_errorCode
-        = parser->m_processor(parser, parser->m_bufferPtr,
-                              parser->m_parseEndPtr, &parser->m_bufferPtr);
-
-    if (parser->m_errorCode == XML_ERROR_NONE) {
-      switch (parser->m_parsingStatus.parsing) {
-      case XML_SUSPENDED:
-        /* It is hard to be certain, but it seems that this case
-         * cannot occur.  This code is cleaning up a previous parse
-         * with no new data (since len == 0).  Changing the parsing
-         * state requires getting to execute a handler function, and
-         * there doesn't seem to be an opportunity for that while in
-         * this circumstance.
-         *
-         * Given the uncertainty, we retain the code but exclude it
-         * from coverage tests.
-         *
-         * LCOV_EXCL_START
-         */
-        XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
-                          parser->m_bufferPtr, &parser->m_position);
-        parser->m_positionPtr = parser->m_bufferPtr;
-        return XML_STATUS_SUSPENDED;
-        /* LCOV_EXCL_STOP */
-      case XML_INITIALIZED:
-      case XML_PARSING:
-        parser->m_parsingStatus.parsing = XML_FINISHED;
-        /* fall through */
-      default:
-        return XML_STATUS_OK;
-      }
-    }
-    parser->m_eventEndPtr = parser->m_eventPtr;
-    parser->m_processor = errorProcessor;
-    return XML_STATUS_ERROR;
-  }
 #ifndef XML_CONTEXT_BYTES
-  else if (parser->m_bufferPtr == parser->m_bufferEnd) {
+  if (parser->m_bufferPtr == parser->m_bufferEnd) {
     const char *end;
     int nLeftOver;
     enum XML_Status result;
@@ -1904,12 +1912,15 @@ XML_Parse(XML_Parser parser, const char
       parser->m_processor = errorProcessor;
       return XML_STATUS_ERROR;
     }
+    // though this isn't a buffer request, we assume that `len` is the app's
+    // preferred buffer fill size, and therefore save it here.
+    parser->m_lastBufferRequestSize = len;
     parser->m_parseEndByteIndex += len;
     parser->m_positionPtr = s;
     parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
 
     parser->m_errorCode
-        = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end);
+        = callProcessor((parser, s, parser->m_parseEndPtr = s + len, &end);
 
     if (parser->m_errorCode != XML_ERROR_NONE) {
       parser->m_eventEndPtr = parser->m_eventPtr;
@@ -1942,6 +1953,9 @@ XML_Parse(XML_Parser parser, const char
       parser->m_parsingStatus.parsing = XML_PARSING;
       void *const temp = XML_GetBuffer(parser, nLeftOver);
       parser->m_parsingStatus.parsing = originalStatus;
+      // GetBuffer may have overwritten this, but we want to remember what the
+      // app requested, not how many bytes were left over after parsing.
+      parser->m_lastBufferRequestSize = len;
       if (temp == NULL) {
         // NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
         parser->m_eventPtr = parser->m_eventEndPtr = NULL;
@@ -1963,15 +1977,14 @@ XML_Parse(XML_Parser parser, const char
     return result;
   }
 #endif /* not defined XML_CONTEXT_BYTES */
-  else {
-    void *buff = XML_GetBuffer(parser, len);
-    if (buff == NULL)
-      return XML_STATUS_ERROR;
-    else {
-      memcpy(buff, s, len);
-      return XML_ParseBuffer(parser, len, isFinal);
-    }
+  void *buff = XML_GetBuffer(parser, len);
+  if (buff == NULL)
+    return XML_STATUS_ERROR;
+  if (len > 0) {
+    assert(s != NULL); // make sure s==NULL && len!=0 was rejected above
+    memcpy(buff, s, len);
   }
+  return XML_ParseBuffer(parser, len, isFinal);
 }
 
 enum XML_Status XMLCALL
@@ -2011,8 +2024,8 @@ XML_ParseBuffer(XML_Parser parser, int l
   parser->m_parseEndByteIndex += len;
   parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
 
-  parser->m_errorCode = parser->m_processor(
-      parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr);
+  parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
+                                      &parser->m_bufferPtr);
 
   if (parser->m_errorCode != XML_ERROR_NONE) {
     parser->m_eventEndPtr = parser->m_eventPtr;
@@ -2057,7 +2070,11 @@ XML_GetBuffer(XML_Parser parser, int len
   default:;
   }
 
-  if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) {
+  // whether or not the request succeeds, `len` seems to be the app's preferred
+  // buffer fill size; remember it.
+  parser->m_lastBufferRequestSize = len;
+  if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)
+      || parser->m_buffer == NULL) {
 #ifdef XML_CONTEXT_BYTES
     int keep;
 #endif /* defined XML_CONTEXT_BYTES */
@@ -2080,8 +2097,9 @@ XML_GetBuffer(XML_Parser parser, int len
     }
     neededSize += keep;
 #endif /* defined XML_CONTEXT_BYTES */
-    if (neededSize
-        <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
+    if (parser->m_buffer && parser->m_bufferPtr
+        && neededSize
+               <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
 #ifdef XML_CONTEXT_BYTES
       if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
         int offset
@@ -2095,14 +2113,12 @@ XML_GetBuffer(XML_Parser parser, int len
         parser->m_bufferPtr -= offset;
       }
 #else
-      if (parser->m_buffer && parser->m_bufferPtr) {
-        memmove(parser->m_buffer, parser->m_bufferPtr,
-                EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
-        parser->m_bufferEnd
-            = parser->m_buffer
-              + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
-        parser->m_bufferPtr = parser->m_buffer;
-      }
+      memmove(parser->m_buffer, parser->m_bufferPtr,
+              EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
+      parser->m_bufferEnd
+          = parser->m_buffer
+            + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
+      parser->m_bufferPtr = parser->m_buffer;
 #endif /* not defined XML_CONTEXT_BYTES */
     } else {
       char *newBuf;
@@ -2204,7 +2220,7 @@ XML_ResumeParser(XML_Parser parser) {
   }
   parser->m_parsingStatus.parsing = XML_PARSING;
 
-  parser->m_errorCode = parser->m_processor(
+  parser->m_errorCode = callProcessor(
       parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
 
   if (parser->m_errorCode != XML_ERROR_NONE) {
@@ -2557,6 +2573,15 @@ XML_SetBillionLaughsAttackProtectionActi
 }
 #endif /* XML_DTD */
 
+XML_Bool XMLCALL
+XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
+  if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
+    parser->m_reparseDeferralEnabled = enabled;
+    return XML_TRUE;
+  }
+  return XML_FALSE;
+}
+
 /* Initially tag->rawName always points into the parse buffer;
    for those TAG instances opened while the current parse buffer was
    processed, and not yet closed, we need to store tag->rawName in a more
openSUSE Build Service is sponsored by