File expat-CVE-2023-52425-2.patch of Package expat.35320
Index: expat-2.4.4/Makefile.in
===================================================================
--- expat-2.4.4.orig/Makefile.in
+++ expat-2.4.4/Makefile.in
@@ -1088,6 +1088,11 @@ buildlib:
run-benchmark: tests/benchmark/benchmark
tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/recset.xml 65535 3
+ ./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_attr.xml 4096 3
+ ./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_cdata.xml 4096 3
+ ./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_comment.xml 4096 3
+ ./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_tag.xml 4096 3
+ ./run.sh tests/benchmark/benchmark@EXEEXT@ -n $(top_srcdir)/../testdata/largefiles/aaaaaa_text.xml 4096 3
tests/xmlts.zip:
wget --output-document=tests/xmlts.zip \
Index: expat-2.4.4/doc/reference.html
===================================================================
--- expat-2.4.4.orig/doc/reference.html
+++ expat-2.4.4/doc/reference.html
@@ -118,6 +118,7 @@ interface.</p>
<li><a href="#XML_GetCurrentColumnNumber">XML_GetCurrentColumnNumber</a></li>
<li><a href="#XML_GetCurrentByteCount">XML_GetCurrentByteCount</a></li>
<li><a href="#XML_GetInputContext">XML_GetInputContext</a></li>
+ <li><a href="#XML_SetReparseDeferralEnabled">XML_SetReparseDeferralEnabled</a></li>
</ul>
</li>
<li><a href="#miscellaneous">Miscellaneous Functions</a>
@@ -2004,6 +2005,27 @@
return NULL.</p>
</div>
+<h4 id="XML_SetReparseDeferralEnabled">XML_SetReparseDeferralEnabled</h4>
+<pre class="fcndec">
+/* Added in Expat 2.6.0. */
+XML_Bool XMLCALL
+XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);
+</pre>
+<div class="fcndef">
+ <p>
+ Large tokens may require many parse calls before enough data is available for Expat to parse it in full.
+ If Expat retried parsing the token on every parse call, parsing could take quadratic time.
+ To avoid this, Expat only retries once a significant amount of new data is available.
+ This function allows disabling this behavior.
+ </p>
+ <p>
+ The <code>enabled</code> argument should be <code>XML_TRUE</code> or <code>XML_FALSE</code>.
+ </p>
+ <p>
+ Returns <code>XML_TRUE</code> on success, and <code>XML_FALSE</code> on error.
+ </p>
+</div>
+
<h3><a name="miscellaneous">Miscellaneous functions</a></h3>
<p>The functions in this section either obtain state information from
Index: expat-2.4.4/doc/xmlwf.sgml
===================================================================
--- expat-2.4.4.orig/doc/xmlwf.sgml
+++ expat-2.4.4/doc/xmlwf.sgml
@@ -314,6 +314,16 @@ supports both.
</varlistentry>
<varlistentry>
+ <term><option>-q</option></term>
+ <listitem>
+ <para>
+ Disable reparse deferral, and allow quadratic parse runtime
+ on large tokens (default: reparse deferral enabled).
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><option>-r</option></term>
<listitem>
<para>
Index: expat-2.4.4/lib/expat.h
===================================================================
--- expat-2.4.4.orig/lib/expat.h
+++ expat-2.4.4/lib/expat.h
@@ -1045,6 +1045,9 @@ XML_SetBillionLaughsAttackProtectionActi
XMLPARSEAPI(const XML_Feature *)
XML_GetFeatureList(void);
+/* Added in Expat 2.6.0. */
+XMLPARSEAPI(XML_Bool)
+XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);
/* Expat follows the GNU/Linux convention of odd number minor version for
beta/development releases and even number minor version for stable
Index: expat-2.4.4/lib/internal.h
===================================================================
--- expat-2.4.4.orig/lib/internal.h
+++ expat-2.4.4/lib/internal.h
@@ -71,3 +71,6 @@
#define inline
#endif
#endif
+//extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c
+extern unsigned int g_parseAttempts; // used for testing only
+
Index: expat-2.4.4/lib/libexpat.def
===================================================================
--- expat-2.4.4.orig/lib/libexpat.def
+++ expat-2.4.4/lib/libexpat.def
@@ -71,3 +71,5 @@ EXPORTS
XML_StopParser @63
XML_ResumeParser @64
XML_GetParsingStatus @65
+; added with version 2.6.0
+ XML_SetReparseDeferralEnabled @71
\ No newline at end of file
Index: expat-2.4.4/lib/libexpatw.def
===================================================================
--- expat-2.4.4.orig/lib/libexpatw.def
+++ expat-2.4.4/lib/libexpatw.def
@@ -71,3 +71,5 @@ EXPORTS
XML_StopParser @63
XML_ResumeParser @64
XML_GetParsingStatus @65
+; added with version 2.6.0
+ XML_SetReparseDeferralEnabled @66
\ No newline at end of file
Index: expat-2.4.4/lib/xmlparse.c
===================================================================
--- expat-2.4.4.orig/lib/xmlparse.c
+++ expat-2.4.4/lib/xmlparse.c
@@ -2,6 +2,7 @@
See the file COPYING for copying permission.
*/
+#include <stdbool.h>
#include <stddef.h>
#include <string.h> /* memset(), memcpy() */
#include <assert.h>
@@ -91,6 +92,11 @@ typedef char ICHAR;
#endif /* HAVE_BCOPY */
#endif /* HAVE_MEMMOVE */
+/* Do safe (NULL-aware) pointer arithmetic */
+#define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
+
+#define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
+
#include "internal.h"
#include "xmltok.h"
#include "xmlrole.h"
@@ -466,6 +472,9 @@ static unsigned long getDebugLevel(const
? 0 \
: ((*((pool)->ptr)++ = c), 1))
+XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c
+unsigned int g_parseAttempts = 0; // used for testing only
+
struct XML_ParserStruct {
/* The first member must be userData so that the XML_GetUserData
macro works. */
@@ -481,6 +490,9 @@ struct XML_ParserStruct {
const char *m_bufferLim;
XML_Index m_parseEndByteIndex;
const char *m_parseEndPtr;
+ size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
+ XML_Bool m_reparseDeferralEnabled;
+ int m_lastBufferRequestSize;
XML_Char *m_dataBuf;
XML_Char *m_dataBufEnd;
XML_StartElementHandler m_startElementHandler;
@@ -734,6 +746,47 @@ get_hash_secret_salt(XML_Parser parser)
}
}
+static enum XML_Error
+callProcessor(XML_Parser parser, const char *start, const char *end,
+ const char **endPtr) {
+ const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
+
+ if (parser->m_reparseDeferralEnabled
+ && ! parser->m_parsingStatus.finalBuffer) {
+ // Heuristic: don't try to parse a partial token again until the amount of
+ // available data has increased significantly.
+ const size_t had_before = parser->m_partialTokenBytesBefore;
+ // ...but *do* try anyway if we're close to causing a reallocation.
+ size_t available_buffer
+ = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
+#if XML_CONTEXT_BYTES > 0
+ available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES);
+#endif
+ available_buffer
+ += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd);
+ // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
+ const bool enough
+ = (have_now >= 2 * had_before)
+ || ((size_t)parser->m_lastBufferRequestSize > available_buffer);
+
+ if (! enough) {
+ *endPtr = start; // callers may expect this to be set
+ return XML_ERROR_NONE;
+ }
+ }
+ g_parseAttempts += 1;
+ const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
+ if (ret == XML_ERROR_NONE) {
+ // if we consumed nothing, remember what we had on this parse attempt.
+ if (*endPtr == start) {
+ parser->m_partialTokenBytesBefore = have_now;
+ } else {
+ parser->m_partialTokenBytesBefore = 0;
+ }
+ }
+ return ret;
+}
+
static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser)
{
@@ -920,6 +973,9 @@ parserInit(XML_Parser parser, const XML_
bufferEnd = buffer;
parseEndByteIndex = 0;
parseEndPtr = NULL;
+ parser->m_partialTokenBytesBefore = 0;
+ parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
+ parser->m_lastBufferRequestSize = 0;
declElementType = NULL;
declAttributeId = NULL;
declEntity = NULL;
@@ -1083,6 +1139,7 @@ XML_ExternalEntityParserCreate(XML_Parse
to worry which hash secrets each table has.
*/
unsigned long oldhash_secret_salt = hash_secret_salt;
+ XML_Bool oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled;
#ifdef XML_DTD
if (!context)
@@ -1136,6 +1193,7 @@ XML_ExternalEntityParserCreate(XML_Parse
defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
ns_triplets = oldns_triplets;
hash_secret_salt = oldhash_secret_salt;
+ parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled;
parentParser = oldParser;
#ifdef XML_DTD
paramEntityParsing = oldParamEntityParsing;
@@ -1568,48 +1626,19 @@ XML_Parse(XML_Parser parser, const char
default:
ps_parsing = XML_PARSING;
}
-
- if (len == 0) {
- ps_finalBuffer = (XML_Bool)isFinal;
- if (!isFinal)
- return XML_STATUS_OK;
- positionPtr = bufferPtr;
- parseEndPtr = bufferEnd;
-
- /* If data are left over from last buffer, and we now know that these
- data are the final chunk of input, then we have to check them again
- to detect errors based on that fact.
- */
- errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr);
-
- if (errorCode == XML_ERROR_NONE) {
- switch (ps_parsing) {
- case XML_SUSPENDED:
- XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
- positionPtr = bufferPtr;
- return XML_STATUS_SUSPENDED;
- case XML_INITIALIZED:
- case XML_PARSING:
- ps_parsing = XML_FINISHED;
- /* fall through */
- default:
- return XML_STATUS_OK;
- }
- }
- eventEndPtr = eventPtr;
- processor = errorProcessor;
- return XML_STATUS_ERROR;
- }
#ifndef XML_CONTEXT_BYTES
- else if (bufferPtr == bufferEnd) {
+ if (bufferPtr == bufferEnd) {
const char *end;
int nLeftOver;
enum XML_Error result;
+ // though this isn't a buffer request, we assume that `len` is the app's
+ // preferred buffer fill size, and therefore save it here.
+ parser->m_lastBufferRequestSize = len;
parseEndByteIndex += len;
positionPtr = s;
ps_finalBuffer = (XML_Bool)isFinal;
- errorCode = processor(parser, s, parseEndPtr = s + len, &end);
+ errorCode = callProcessor((parser, s, parseEndPtr = s + len, &end);
if (errorCode != XML_ERROR_NONE) {
eventEndPtr = eventPtr;
@@ -1643,6 +1671,9 @@ XML_Parse(XML_Parser parser, const char
parser->m_parsingStatus.parsing = XML_PARSING;
void *const temp = XML_GetBuffer(parser, nLeftOver);
parser->m_parsingStatus.parsing = originalStatus;
+ // GetBuffer may have overwritten this, but we want to remember what the
+ // app requested, not how many bytes were left over after parsing.
+ parser->m_lastBufferRequestSize = len;
if (temp == NULL) {
// NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
parser->m_eventPtr = parser->m_eventEndPtr = NULL;
@@ -1669,15 +1700,14 @@ XML_Parse(XML_Parser parser, const char
return result;
}
#endif /* not defined XML_CONTEXT_BYTES */
- else {
- void *buff = XML_GetBuffer(parser, len);
- if (buff == NULL)
- return XML_STATUS_ERROR;
- else {
- memcpy(buff, s, len);
- return XML_ParseBuffer(parser, len, isFinal);
- }
+ void *buff = XML_GetBuffer(parser, len);
+ if (buff == NULL)
+ return XML_STATUS_ERROR;
+ if (len > 0) {
+ assert(s != NULL); // make sure s==NULL && len!=0 was rejected above
+ memcpy(buff, s, len);
}
+ return XML_ParseBuffer(parser, len, isFinal);
}
enum XML_Status XMLCALL
@@ -1709,7 +1739,7 @@ XML_ParseBuffer(XML_Parser parser, int l
parseEndByteIndex += len;
ps_finalBuffer = (XML_Bool)isFinal;
- errorCode = processor(parser, start, parseEndPtr, &bufferPtr);
+ errorCode = callProcessor(parser, start, parseEndPtr, &bufferPtr);
if (errorCode != XML_ERROR_NONE) {
eventEndPtr = eventPtr;
@@ -1753,7 +1783,10 @@ XML_GetBuffer(XML_Parser parser, int len
default: ;
}
- if (len > bufferLim - bufferEnd) {
+ // whether or not the request succeeds, `len` seems to be the app's preferred
+ // buffer fill size; remember it.
+ parser->m_lastBufferRequestSize = len;
+ if (len > (bufferLim - bufferEnd) || parser->m_buffer == NULL) {
/* Do not invoke signed arithmetic overflow: */
int neededSize = (int) ((unsigned)len + (unsigned)(bufferEnd - bufferPtr));
if (neededSize < 0) {
@@ -1774,7 +1807,8 @@ XML_GetBuffer(XML_Parser parser, int len
}
neededSize += keep;
#endif /* defined XML_CONTEXT_BYTES */
- if (neededSize <= bufferLim - buffer) {
+ if (parser->m_buffer && parser->m_bufferPtr
+ && neededSize <= bufferLim - buffer) {
#ifdef XML_CONTEXT_BYTES
if (keep < bufferPtr - buffer) {
int offset = (int)(bufferPtr - buffer) - keep;
@@ -1877,7 +1911,7 @@ XML_ResumeParser(XML_Parser parser) {
}
ps_parsing = XML_PARSING;
- errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr);
+ errorCode = callProcessor(parser, bufferPtr, parseEndPtr, &bufferPtr);
if (errorCode != XML_ERROR_NONE) {
eventEndPtr = eventPtr;
@@ -2125,6 +2156,15 @@ XML_SetBillionLaughsAttackProtectionActi
return features;
}
+XML_Bool XMLCALL
+XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
+ if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
+ parser->m_reparseDeferralEnabled = enabled;
+ return XML_TRUE;
+ }
+ return XML_FALSE;
+}
+
/* Initially tag->rawName always points into the parse buffer;
for those TAG instances opened while the current parse buffer was
processed, and not yet closed, we need to store tag->rawName in a more