File expat-CVE-2023-52425-2.patch of Package expat.35320

Index: expat-2.4.4/Makefile.in
===================================================================
--- expat-2.4.4.orig/Makefile.in
+++ expat-2.4.4/Makefile.in
@@ -1088,6 +1088,11 @@ buildlib:
 
 run-benchmark: tests/benchmark/benchmark
 	tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/recset.xml 65535 3
+	./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_attr.xml 4096 3
+	./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_cdata.xml 4096 3
+	./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_comment.xml 4096 3
+	./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_tag.xml 4096 3
+	./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_text.xml 4096 3
 
 tests/xmlts.zip:
 	wget --output-document=tests/xmlts.zip \
Index: expat-2.4.4/doc/reference.html
===================================================================
--- expat-2.4.4.orig/doc/reference.html
+++ expat-2.4.4/doc/reference.html
@@ -118,6 +118,7 @@ interface.</p>
       <li><a href="#XML_GetCurrentColumnNumber">XML_GetCurrentColumnNumber</a></li>
       <li><a href="#XML_GetCurrentByteCount">XML_GetCurrentByteCount</a></li>
       <li><a href="#XML_GetInputContext">XML_GetInputContext</a></li>
+        <li><a href="#XML_SetReparseDeferralEnabled">XML_SetReparseDeferralEnabled</a></li>
     </ul>
     </li>
     <li><a href="#miscellaneous">Miscellaneous Functions</a>
@@ -2004,6 +2005,27 @@ 
 return NULL.</p>
 </div>
 
+<h4 id="XML_SetReparseDeferralEnabled">XML_SetReparseDeferralEnabled</h4>
+<pre class="fcndec">
+/* Added in Expat 2.6.0. */
+XML_Bool XMLCALL
+XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);
+</pre>
+<div class="fcndef">
+  <p>
+    Large tokens may require many parse calls before enough data is available for Expat to parse it in full.
+    If Expat retried parsing the token on every parse call, parsing could take quadratic time.
+    To avoid this, Expat only retries once a significant amount of new data is available.
+    This function allows disabling this behavior.
+  </p>
+  <p>
+    The <code>enabled</code> argument should be <code>XML_TRUE</code> or <code>XML_FALSE</code>.
+  </p>
+  <p>
+    Returns <code>XML_TRUE</code> on success, and <code>XML_FALSE</code> on error.
+  </p>
+</div>
+
 <h3><a name="miscellaneous">Miscellaneous functions</a></h3>
 
 <p>The functions in this section either obtain state information from
Index: expat-2.4.4/doc/xmlwf.sgml
===================================================================
--- expat-2.4.4.orig/doc/xmlwf.sgml
+++ expat-2.4.4/doc/xmlwf.sgml
@@ -314,6 +314,16 @@ supports both.
       </varlistentry>
 
       <varlistentry>
+        <term><option>-q</option></term>
+        <listitem>
+          <para>
+            Disable reparse deferral, and allow quadratic parse runtime
+            on large tokens (default: reparse deferral enabled).
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
         <term><option>-r</option></term>
         <listitem>
 		<para>
Index: expat-2.4.4/lib/expat.h
===================================================================
--- expat-2.4.4.orig/lib/expat.h
+++ expat-2.4.4/lib/expat.h
@@ -1045,6 +1045,9 @@ XML_SetBillionLaughsAttackProtectionActi
 XMLPARSEAPI(const XML_Feature *)
 XML_GetFeatureList(void);
 
+/* Added in Expat 2.6.0. */
+XMLPARSEAPI(XML_Bool)
+XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);
 
 /* Expat follows the GNU/Linux convention of odd number minor version for
    beta/development releases and even number minor version for stable
Index: expat-2.4.4/lib/internal.h
===================================================================
--- expat-2.4.4.orig/lib/internal.h
+++ expat-2.4.4/lib/internal.h
@@ -71,3 +71,6 @@ 
 #define inline
 #endif
 #endif
+//extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c
+extern unsigned int g_parseAttempts;             // used for testing only
+
Index: expat-2.4.4/lib/libexpat.def
===================================================================
--- expat-2.4.4.orig/lib/libexpat.def
+++ expat-2.4.4/lib/libexpat.def
@@ -71,3 +71,5 @@ EXPORTS
   XML_StopParser @63
   XML_ResumeParser @64
   XML_GetParsingStatus @65
+; added with version 2.6.0
+  XML_SetReparseDeferralEnabled @71
\ No newline at end of file
Index: expat-2.4.4/lib/libexpatw.def
===================================================================
--- expat-2.4.4.orig/lib/libexpatw.def
+++ expat-2.4.4/lib/libexpatw.def
@@ -71,3 +71,5 @@ EXPORTS
   XML_StopParser @63
   XML_ResumeParser @64
   XML_GetParsingStatus @65
+; added with version 2.6.0
+  XML_SetReparseDeferralEnabled @66
\ No newline at end of file
Index: expat-2.4.4/lib/xmlparse.c
===================================================================
--- expat-2.4.4.orig/lib/xmlparse.c
+++ expat-2.4.4/lib/xmlparse.c
@@ -2,6 +2,7 @@
    See the file COPYING for copying permission.
 */
 
+#include <stdbool.h>
 #include <stddef.h>
 #include <string.h>                     /* memset(), memcpy() */
 #include <assert.h>
@@ -91,6 +92,11 @@ typedef char ICHAR;
 #endif /* HAVE_BCOPY */
 #endif /* HAVE_MEMMOVE */
 
+/* Do safe (NULL-aware) pointer arithmetic */
+#define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
+
+#define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
+
 #include "internal.h"
 #include "xmltok.h"
 #include "xmlrole.h"
@@ -466,6 +472,9 @@ static unsigned long getDebugLevel(const
    ? 0 \
    : ((*((pool)->ptr)++ = c), 1))
 
+XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c
+unsigned int g_parseAttempts = 0;                    // used for testing only
+
 struct XML_ParserStruct {
   /* The first member must be userData so that the XML_GetUserData
      macro works. */
@@ -481,6 +490,9 @@ struct XML_ParserStruct {
   const char *m_bufferLim;
   XML_Index m_parseEndByteIndex;
   const char *m_parseEndPtr;
+  size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
+  XML_Bool m_reparseDeferralEnabled;
+  int m_lastBufferRequestSize;
   XML_Char *m_dataBuf;
   XML_Char *m_dataBufEnd;
   XML_StartElementHandler m_startElementHandler;
@@ -734,6 +746,47 @@ get_hash_secret_salt(XML_Parser parser)
   }
 }
 
+static enum XML_Error
+callProcessor(XML_Parser parser, const char *start, const char *end,
+              const char **endPtr) {
+  const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
+
+  if (parser->m_reparseDeferralEnabled
+      && ! parser->m_parsingStatus.finalBuffer) {
+    // Heuristic: don't try to parse a partial token again until the amount of
+    // available data has increased significantly.
+    const size_t had_before = parser->m_partialTokenBytesBefore;
+    // ...but *do* try anyway if we're close to causing a reallocation.
+    size_t available_buffer
+        = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
+#if XML_CONTEXT_BYTES > 0
+    available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES);
+#endif
+    available_buffer
+        += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd);
+    // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
+    const bool enough
+        = (have_now >= 2 * had_before)
+          || ((size_t)parser->m_lastBufferRequestSize > available_buffer);
+
+    if (! enough) {
+      *endPtr = start; // callers may expect this to be set
+      return XML_ERROR_NONE;
+    }
+  }
+  g_parseAttempts += 1;
+  const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
+  if (ret == XML_ERROR_NONE) {
+    // if we consumed nothing, remember what we had on this parse attempt.
+    if (*endPtr == start) {
+      parser->m_partialTokenBytesBefore = have_now;
+    } else {
+      parser->m_partialTokenBytesBefore = 0;
+    }
+  }
+  return ret;
+}
+
 static XML_Bool  /* only valid for root parser */
 startParsing(XML_Parser parser)
 {
@@ -920,6 +973,9 @@ parserInit(XML_Parser parser, const XML_
   bufferEnd = buffer;
   parseEndByteIndex = 0;
   parseEndPtr = NULL;
+  parser->m_partialTokenBytesBefore = 0;
+  parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
+  parser->m_lastBufferRequestSize = 0;
   declElementType = NULL;
   declAttributeId = NULL;
   declEntity = NULL;
@@ -1083,6 +1139,7 @@ XML_ExternalEntityParserCreate(XML_Parse
      to worry which hash secrets each table has.
   */
   unsigned long oldhash_secret_salt = hash_secret_salt;
+  XML_Bool oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled;
 
 #ifdef XML_DTD
   if (!context)
@@ -1136,6 +1193,7 @@ XML_ExternalEntityParserCreate(XML_Parse
   defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
   ns_triplets = oldns_triplets;
   hash_secret_salt = oldhash_secret_salt;
+  parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled;
   parentParser = oldParser;
 #ifdef XML_DTD
   paramEntityParsing = oldParamEntityParsing;
@@ -1568,48 +1626,19 @@ XML_Parse(XML_Parser parser, const char
   default:
     ps_parsing = XML_PARSING;
   }
-
-  if (len == 0) {
-    ps_finalBuffer = (XML_Bool)isFinal;
-    if (!isFinal)
-      return XML_STATUS_OK;
-    positionPtr = bufferPtr;
-    parseEndPtr = bufferEnd;
-
-    /* If data are left over from last buffer, and we now know that these
-       data are the final chunk of input, then we have to check them again
-       to detect errors based on that fact.
-    */
-    errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr);
-
-    if (errorCode == XML_ERROR_NONE) {
-      switch (ps_parsing) {
-      case XML_SUSPENDED:
-        XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
-        positionPtr = bufferPtr;
-        return XML_STATUS_SUSPENDED;
-      case XML_INITIALIZED:
-      case XML_PARSING:
-        ps_parsing = XML_FINISHED;
-        /* fall through */
-      default:
-        return XML_STATUS_OK;
-      }
-    }
-    eventEndPtr = eventPtr;
-    processor = errorProcessor;
-    return XML_STATUS_ERROR;
-  }
 #ifndef XML_CONTEXT_BYTES
-  else if (bufferPtr == bufferEnd) {
+  if (bufferPtr == bufferEnd) {
     const char *end;
     int nLeftOver;
     enum XML_Error result;
+    // though this isn't a buffer request, we assume that `len` is the app's
+    // preferred buffer fill size, and therefore save it here.
+    parser->m_lastBufferRequestSize = len;
     parseEndByteIndex += len;
     positionPtr = s;
     ps_finalBuffer = (XML_Bool)isFinal;
 
-    errorCode = processor(parser, s, parseEndPtr = s + len, &end);
+    errorCode = callProcessor((parser, s, parseEndPtr = s + len, &end);
 
     if (errorCode != XML_ERROR_NONE) {
       eventEndPtr = eventPtr;
@@ -1643,6 +1671,9 @@ XML_Parse(XML_Parser parser, const char
       parser->m_parsingStatus.parsing = XML_PARSING;
       void *const temp = XML_GetBuffer(parser, nLeftOver);
       parser->m_parsingStatus.parsing = originalStatus;
+      // GetBuffer may have overwritten this, but we want to remember what the
+      // app requested, not how many bytes were left over after parsing.
+      parser->m_lastBufferRequestSize = len;
       if (temp == NULL) {
         // NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
         parser->m_eventPtr = parser->m_eventEndPtr = NULL;
@@ -1669,15 +1700,14 @@ XML_Parse(XML_Parser parser, const char
     return result;
   }
 #endif  /* not defined XML_CONTEXT_BYTES */
-  else {
-    void *buff = XML_GetBuffer(parser, len);
-    if (buff == NULL)
-      return XML_STATUS_ERROR;
-    else {
-      memcpy(buff, s, len);
-      return XML_ParseBuffer(parser, len, isFinal);
-    }
+  void *buff = XML_GetBuffer(parser, len);
+  if (buff == NULL)
+    return XML_STATUS_ERROR;
+  if (len > 0) {
+    assert(s != NULL); // make sure s==NULL && len!=0 was rejected above
+    memcpy(buff, s, len);
   }
+  return XML_ParseBuffer(parser, len, isFinal);
 }
 
 enum XML_Status XMLCALL
@@ -1709,7 +1739,7 @@ XML_ParseBuffer(XML_Parser parser, int l
   parseEndByteIndex += len;
   ps_finalBuffer = (XML_Bool)isFinal;
 
-  errorCode = processor(parser, start, parseEndPtr, &bufferPtr);
+  errorCode = callProcessor(parser, start, parseEndPtr, &bufferPtr);
 
   if (errorCode != XML_ERROR_NONE) {
     eventEndPtr = eventPtr;
@@ -1753,7 +1783,10 @@ XML_GetBuffer(XML_Parser parser, int len
   default: ;
   }
 
-  if (len > bufferLim - bufferEnd) {
+  // whether or not the request succeeds, `len` seems to be the app's preferred
+  // buffer fill size; remember it.
+  parser->m_lastBufferRequestSize = len;
+  if (len > (bufferLim - bufferEnd) || parser->m_buffer == NULL) {
     /* Do not invoke signed arithmetic overflow: */
     int neededSize = (int) ((unsigned)len + (unsigned)(bufferEnd - bufferPtr));
     if (neededSize < 0) {
@@ -1774,7 +1807,8 @@ XML_GetBuffer(XML_Parser parser, int len
     }
     neededSize += keep;
 #endif  /* defined XML_CONTEXT_BYTES */
-    if (neededSize  <= bufferLim - buffer) {
+    if (parser->m_buffer && parser->m_bufferPtr
+        && neededSize <= bufferLim - buffer) {
 #ifdef XML_CONTEXT_BYTES
       if (keep < bufferPtr - buffer) {
         int offset = (int)(bufferPtr - buffer) - keep;
@@ -1877,7 +1911,7 @@ XML_ResumeParser(XML_Parser parser) {
   }
   ps_parsing = XML_PARSING;
 
-  errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr);
+  errorCode = callProcessor(parser, bufferPtr, parseEndPtr, &bufferPtr);
 
   if (errorCode != XML_ERROR_NONE) {
     eventEndPtr = eventPtr;
@@ -2125,6 +2156,15 @@ XML_SetBillionLaughsAttackProtectionActi
   return features;
 }
 
+XML_Bool XMLCALL
+XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
+  if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
+    parser->m_reparseDeferralEnabled = enabled;
+    return XML_TRUE;
+  }
+  return XML_FALSE;
+}
+
 /* Initially tag->rawName always points into the parse buffer;
    for those TAG instances opened while the current parse buffer was
    processed, and not yet closed, we need to store tag->rawName in a more
openSUSE Build Service is sponsored by