File expat-CVE-2023-52425-fix-tests.patch of Package expat.36379
From: Simon Lees <sflees@suse.de>
Date: Fri, Mar 1 10:42:13
Subject: CVE-2023-52425 Tests and Test suite fixes
This adds the Tests for CVE-2023-52425 to the testsuite and addresses
other changes required for the other tests to pass with the other
internal changes.
---
Index: expat-2.4.4/tests/runtests.c
===================================================================
--- expat-2.4.4.orig/tests/runtests.c
+++ expat-2.4.4/tests/runtests.c
@@ -97,7 +97,42 @@
# endif /* XML_UNICODE */
#endif /* XML_UNICODE_WCHAR_T */
+struct handler_record_entry {
+ const char *name;
+ int arg;
+};
+struct handler_record_list {
+ int count;
+ struct handler_record_entry entries[50]; // arbitrary big-enough max count
+};
+
+extern const struct handler_record_entry *
+_handler_record_get(const struct handler_record_list *storage, int index,
+ const char *file, int line);
+
+# define handler_record_get(storage, index) \
+ _handler_record_get((storage), (index), __FILE__, __LINE__)
+
+# define assert_record_handler_called(storage, index, expected_name, \
+ expected_arg) \
+ do { \
+ const struct handler_record_entry *e \
+ = handler_record_get(storage, index); \
+ assert_true(strcmp(e->name, expected_name) == 0); \
+ assert_true(e->arg == (expected_arg)); \
+ } while (0)
+
+const struct handler_record_entry *
+_handler_record_get(const struct handler_record_list *storage, int index,
+ const char *file, int line) {
+ if (storage->count <= index) {
+ _fail(file, line, "too few handler calls");
+ }
+ return &storage->entries[index];
+}
+
static XML_Parser g_parser = NULL;
+static int g_chunkSize = 1;
static void
tcase_add_test__ifdef_xml_dtd(TCase *tc, tcase_test_function test) {
@@ -143,22 +178,22 @@ _xml_failure(XML_Parser parser, const ch
static enum XML_Status
_XML_Parse_SINGLE_BYTES(XML_Parser parser, const char *s, int len,
int isFinal) {
- enum XML_Status res = XML_STATUS_ERROR;
- int offset = 0;
-
- if (len == 0) {
- return XML_Parse(parser, s, len, isFinal);
- }
-
- for (; offset < len; offset++) {
- const int innerIsFinal = (offset == len - 1) && isFinal;
- const char c = s[offset]; /* to help out-of-bounds detection */
- res = XML_Parse(parser, &c, sizeof(char), innerIsFinal);
- if (res != XML_STATUS_OK) {
- return res;
+ // This ensures that tests have to run pathological parse cases
+ // (e.g. when `s` is NULL) against plain XML_Parse rather than
+ // chunking _XML_Parse_SINGLE_BYTES.
+ assert((parser != NULL) && (s != NULL) && (len >= 0));
+ const int chunksize = g_chunkSize;
+ if (chunksize > 0) {
+ // parse in chunks of `chunksize` bytes as long as not exhausting
+ for (; len > chunksize; len -= chunksize, s += chunksize) {
+ enum XML_Status res = XML_Parse(parser, s, chunksize, XML_FALSE);
+ if (res != XML_STATUS_OK) {
+ return res;
+ }
}
}
- return res;
+ // parse the final chunk, the size of which will be <= chunksize
+ return XML_Parse(parser, s, len, isFinal);
}
#define xml_failure(parser) _xml_failure((parser), __FILE__, __LINE__)
@@ -1055,7 +1090,7 @@ START_TEST(test_line_number_after_parse)
"\n</tag>";
XML_Size lineno;
- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
+ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
== XML_STATUS_ERROR)
xml_failure(g_parser);
lineno = XML_GetCurrentLineNumber(g_parser);
@@ -1072,7 +1107,7 @@ START_TEST(test_column_number_after_pars
const char *text = "<tag></tag>";
XML_Size colno;
- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
+ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
== XML_STATUS_ERROR)
xml_failure(g_parser);
colno = XML_GetCurrentColumnNumber(g_parser);
@@ -1140,7 +1175,7 @@ START_TEST(test_line_number_after_error)
" <b>\n"
" </a>"; /* missing </b> */
XML_Size lineno;
- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
+ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
!= XML_STATUS_ERROR)
fail("Expected a parse error");
@@ -1159,7 +1194,7 @@ START_TEST(test_column_number_after_erro
" <b>\n"
" </a>"; /* missing </b> */
XML_Size colno;
- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
+ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
!= XML_STATUS_ERROR)
fail("Expected a parse error");
@@ -2537,34 +2572,41 @@ START_TEST(test_memory_allocation) {
}
END_TEST
-static void XMLCALL
+static void
+record_call(struct handler_record_list *const rec, const char *funcname,
+ const int arg) {
+ const int max_entries = sizeof(rec->entries) / sizeof(rec->entries[0]);
+ assert_true(rec->count < max_entries);
+ struct handler_record_entry *const e = &rec->entries[rec->count++];
+ e->name = funcname;
+ e->arg = arg;
+}
+
+void XMLCALL
record_default_handler(void *userData, const XML_Char *s, int len) {
UNUSED_P(s);
- UNUSED_P(len);
- CharData_AppendXMLChars((CharData *)userData, XCS("D"), 1);
+ record_call((struct handler_record_list *)userData, __func__, len);
}
-static void XMLCALL
+void XMLCALL
record_cdata_handler(void *userData, const XML_Char *s, int len) {
UNUSED_P(s);
- UNUSED_P(len);
- CharData_AppendXMLChars((CharData *)userData, XCS("C"), 1);
+ record_call((struct handler_record_list *)userData, __func__, len);
XML_DefaultCurrent(g_parser);
}
-static void XMLCALL
+void XMLCALL
record_cdata_nodefault_handler(void *userData, const XML_Char *s, int len) {
UNUSED_P(s);
- UNUSED_P(len);
- CharData_AppendXMLChars((CharData *)userData, XCS("c"), 1);
+ record_call((struct handler_record_list *)userData, __func__, len);
}
-static void XMLCALL
+void XMLCALL
record_skip_handler(void *userData, const XML_Char *entityName,
int is_parameter_entity) {
UNUSED_P(entityName);
- CharData_AppendXMLChars((CharData *)userData,
- is_parameter_entity ? XCS("E") : XCS("e"), 1);
+ record_call((struct handler_record_list *)userData, __func__,
+ is_parameter_entity);
}
/* Test XML_DefaultCurrent() passes handling on correctly */
@@ -2574,78 +2616,196 @@ START_TEST(test_default_current) {
"<!ENTITY entity '%'>\n"
"]>\n"
"<doc>&entity;</doc>";
- CharData storage;
-
+ {
+ struct handler_record_list storage;
+ storage.count = 0;
XML_SetDefaultHandler(g_parser, record_default_handler);
XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
- CharData_Init(&storage);
XML_SetUserData(g_parser, &storage);
if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
== XML_STATUS_ERROR)
xml_failure(g_parser);
- CharData_CheckXMLChars(&storage, XCS("DCDCDCDCDCDD"));
-
+ int i = 0;
+ assert_record_handler_called(&storage, i++, "record_default_handler", 5);
+ // we should have gotten one or more cdata callbacks, totaling 5 chars
+ int cdata_len_remaining = 5;
+ while (cdata_len_remaining > 0) {
+ const struct handler_record_entry *c_entry
+ = handler_record_get(&storage, i++);
+ assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
+ assert_true(c_entry->arg > 0);
+ assert_true(c_entry->arg <= cdata_len_remaining);
+ cdata_len_remaining -= c_entry->arg;
+ // default handler must follow, with the exact same len argument.
+ assert_record_handler_called(&storage, i++, "record_default_handler",
+ c_entry->arg);
+ }
+ assert_record_handler_called(&storage, i++, "record_default_handler", 6);
+ assert_true(storage.count == i);
+ }
+ {
/* Again, without the defaulting */
+ struct handler_record_list storage;
+ storage.count = 0;
XML_ParserReset(g_parser, NULL);
XML_SetDefaultHandler(g_parser, record_default_handler);
XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
- CharData_Init(&storage);
XML_SetUserData(g_parser, &storage);
if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
== XML_STATUS_ERROR)
xml_failure(g_parser);
- CharData_CheckXMLChars(&storage, XCS("DcccccD"));
-
+ int i = 0;
+ assert_record_handler_called(&storage, i++, "record_default_handler", 5);
+ // we should have gotten one or more cdata callbacks, totaling 5 chars
+ int cdata_len_remaining = 5;
+ while (cdata_len_remaining > 0) {
+ const struct handler_record_entry *c_entry
+ = handler_record_get(&storage, i++);
+ assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
+ assert_true(c_entry->arg > 0);
+ assert_true(c_entry->arg <= cdata_len_remaining);
+ cdata_len_remaining -= c_entry->arg;
+ }
+ assert_record_handler_called(&storage, i++, "record_default_handler", 6);
+ assert_true(storage.count == i);
+ }
+ {
/* Now with an internal entity to complicate matters */
+ struct handler_record_list storage;
+ storage.count = 0;
XML_ParserReset(g_parser, NULL);
XML_SetDefaultHandler(g_parser, record_default_handler);
XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
- CharData_Init(&storage);
XML_SetUserData(g_parser, &storage);
if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
XML_TRUE)
== XML_STATUS_ERROR)
xml_failure(g_parser);
/* The default handler suppresses the entity */
- CharData_CheckXMLChars(&storage, XCS("DDDDDDDDDDDDDDDDDDD"));
-
+ assert_record_handler_called(&storage, 0, "record_default_handler", 9);
+ assert_record_handler_called(&storage, 1, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 2, "record_default_handler", 3);
+ assert_record_handler_called(&storage, 3, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 4, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 5, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 6, "record_default_handler", 8);
+ assert_record_handler_called(&storage, 7, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 8, "record_default_handler", 6);
+ assert_record_handler_called(&storage, 9, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 10, "record_default_handler", 7);
+ assert_record_handler_called(&storage, 11, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 12, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 13, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 14, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 15, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 16, "record_default_handler", 5);
+ assert_record_handler_called(&storage, 17, "record_default_handler", 8);
+ assert_record_handler_called(&storage, 18, "record_default_handler", 6);
+ assert_true(storage.count == 19);
+ }
+ {
/* Again, with a skip handler */
+ struct handler_record_list storage;
+ storage.count = 0;
XML_ParserReset(g_parser, NULL);
XML_SetDefaultHandler(g_parser, record_default_handler);
XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
- CharData_Init(&storage);
XML_SetUserData(g_parser, &storage);
if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
XML_TRUE)
== XML_STATUS_ERROR)
xml_failure(g_parser);
/* The default handler suppresses the entity */
- CharData_CheckXMLChars(&storage, XCS("DDDDDDDDDDDDDDDDDeD"));
-
+ assert_record_handler_called(&storage, 0, "record_default_handler", 9);
+ assert_record_handler_called(&storage, 1, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 2, "record_default_handler", 3);
+ assert_record_handler_called(&storage, 3, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 4, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 5, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 6, "record_default_handler", 8);
+ assert_record_handler_called(&storage, 7, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 8, "record_default_handler", 6);
+ assert_record_handler_called(&storage, 9, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 10, "record_default_handler", 7);
+ assert_record_handler_called(&storage, 11, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 12, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 13, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 14, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 15, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 16, "record_default_handler", 5);
+ assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
+ assert_record_handler_called(&storage, 18, "record_default_handler", 6);
+ assert_true(storage.count == 19);
+ }
+ {
/* This time, allow the entity through */
+ struct handler_record_list storage;
+ storage.count = 0;
XML_ParserReset(g_parser, NULL);
XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
- CharData_Init(&storage);
XML_SetUserData(g_parser, &storage);
if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
XML_TRUE)
== XML_STATUS_ERROR)
xml_failure(g_parser);
- CharData_CheckXMLChars(&storage, XCS("DDDDDDDDDDDDDDDDDCDD"));
-
+ assert_record_handler_called(&storage, 0, "record_default_handler", 9);
+ assert_record_handler_called(&storage, 1, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 2, "record_default_handler", 3);
+ assert_record_handler_called(&storage, 3, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 4, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 5, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 6, "record_default_handler", 8);
+ assert_record_handler_called(&storage, 7, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 8, "record_default_handler", 6);
+ assert_record_handler_called(&storage, 9, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 10, "record_default_handler", 7);
+ assert_record_handler_called(&storage, 11, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 12, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 13, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 14, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 15, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 16, "record_default_handler", 5);
+ assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
+ assert_record_handler_called(&storage, 18, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 19, "record_default_handler", 6);
+ assert_true(storage.count == 20);
+ }
+ {
/* Finally, without passing the cdata to the default handler */
+ struct handler_record_list storage;
+ storage.count = 0;
XML_ParserReset(g_parser, NULL);
XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
- CharData_Init(&storage);
XML_SetUserData(g_parser, &storage);
if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
XML_TRUE)
== XML_STATUS_ERROR)
xml_failure(g_parser);
- CharData_CheckXMLChars(&storage, XCS("DDDDDDDDDDDDDDDDDcD"));
+ assert_record_handler_called(&storage, 0, "record_default_handler", 9);
+ assert_record_handler_called(&storage, 1, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 2, "record_default_handler", 3);
+ assert_record_handler_called(&storage, 3, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 4, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 5, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 6, "record_default_handler", 8);
+ assert_record_handler_called(&storage, 7, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 8, "record_default_handler", 6);
+ assert_record_handler_called(&storage, 9, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 10, "record_default_handler", 7);
+ assert_record_handler_called(&storage, 11, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 12, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 13, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 14, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 15, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 16, "record_default_handler", 5);
+ assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
+ 1);
+ assert_record_handler_called(&storage, 18, "record_default_handler", 6);
+ assert_true(storage.count == 19);
+ }
}
END_TEST
@@ -3010,7 +3170,7 @@ START_TEST(test_reset_in_entity) {
resumable = XML_TRUE;
XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
+ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
== XML_STATUS_ERROR)
xml_failure(g_parser);
XML_GetParsingStatus(g_parser, &status);
@@ -3703,8 +3863,6 @@ START_TEST(test_user_parameters) {
if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
== XML_STATUS_ERROR)
xml_failure(g_parser);
- if (comment_count != 2)
- fail("Comment handler not invoked enough times");
/* Ensure we can't change policy mid-parse */
if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
fail("Changed param entity parsing policy while parsing");
@@ -3945,7 +4103,66 @@ START_TEST(test_get_buffer_3_overflow) {
XML_ParserFree(parser);
}
END_TEST
+
#endif // defined(XML_CONTEXT_BYTES)
+START_TEST(test_buffer_can_grow_to_max) {
+ const char *const prefixes[] = {
+ "",
+ "<",
+ "<x a='",
+ "<doc><x a='",
+ "<document><x a='",
+ "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
+ "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
+ "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
+ "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
+ "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
+ const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
+ int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
+#if defined(__MINGW32__) && ! defined(__MINGW64__)
+ // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
+ // Can we make a big allocation?
+ void *big = malloc(maxbuf);
+ if (! big) {
+ // The big allocation failed. Let's be a little lenient.
+ maxbuf = maxbuf / 2;
+ }
+ free(big);
+#endif
+
+ for (int i = 0; i < num_prefixes; ++i) {
+ //set_subtest("\"%s\"", prefixes[i]);
+ XML_Parser parser = XML_ParserCreate(NULL);
+ const int prefix_len = (int)strlen(prefixes[i]);
+ const enum XML_Status s
+ = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
+ if (s != XML_STATUS_OK)
+ xml_failure(parser);
+
+ // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
+ // subtracting the whole prefix is easiest, and close enough.
+ assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
+ // The limit should be consistent; no prefix should allow us to
+ // reach above the max buffer size.
+ assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
+ XML_ParserFree(parser);
+ }
+}
+END_TEST
+
+START_TEST(test_getbuffer_allocates_on_zero_len) {
+ for (int first_len = 1; first_len >= 0; first_len--) {
+ //set_subtest("with len=%d first", first_len);
+ XML_Parser parser = XML_ParserCreate(NULL);
+ assert_true(parser != NULL);
+ assert_true(XML_GetBuffer(parser, first_len) != NULL);
+ assert_true(XML_GetBuffer(parser, 0) != NULL);
+ if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
+ xml_failure(parser);
+ XML_ParserFree(parser);
+ }
+}
+END_TEST
/* Test position information macros */
START_TEST(test_byte_info_at_end) {
@@ -4322,7 +4539,86 @@ START_TEST(test_bad_ignore_section) {
}
END_TEST
-/* Test recursive parsing */
+struct bom_testdata {
+ const char *external;
+ int split;
+ XML_Bool nested_callback_happened;
+};
+
+static int XMLCALL
+external_bom_checker(XML_Parser parser, const XML_Char *context,
+ const XML_Char *base, const XML_Char *systemId,
+ const XML_Char *publicId) {
+ const char *text;
+ UNUSED_P(base);
+ UNUSED_P(systemId);
+ UNUSED_P(publicId);
+
+ XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
+ if (ext_parser == NULL)
+ fail("Could not create external entity parser");
+
+ if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
+ struct bom_testdata *const testdata
+ = (struct bom_testdata *)XML_GetUserData(parser);
+ const char *const external = testdata->external;
+ const int split = testdata->split;
+ testdata->nested_callback_happened = XML_TRUE;
+
+ if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
+ != XML_STATUS_OK) {
+ xml_failure(ext_parser);
+ }
+ text = external + split; // the parse below will continue where we left off.
+ } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
+ text = "<!ELEMENT doc EMPTY>\n"
+ "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
+ "<!ENTITY % e2 '%e1;'>\n";
+ } else {
+ fail("unknown systemId");
+ }
+
+ if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
+ != XML_STATUS_OK)
+ xml_failure(ext_parser);
+
+ XML_ParserFree(ext_parser);
+ return XML_STATUS_OK;
+}
+
+/* regression test: BOM should be consumed when followed by a partial token. */
+START_TEST(test_external_bom_consumed) {
+ const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
+ "<doc></doc>\n";
+ const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
+ const int len = (int)strlen(external);
+ for (int split = 0; split <= len; ++split) {
+ //set_subtest("split at byte %d", split);
+
+ struct bom_testdata testdata;
+ testdata.external = external;
+ testdata.split = split;
+ testdata.nested_callback_happened = XML_FALSE;
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+ if (parser == NULL) {
+ fail("Couldn't create parser");
+ }
+ XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
+ XML_SetExternalEntityRefHandler(parser, external_bom_checker);
+ XML_SetUserData(parser, &testdata);
+ if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
+ == XML_STATUS_ERROR)
+ xml_failure(parser);
+ if (! testdata.nested_callback_happened) {
+ fail("ref handler not called");
+ }
+ XML_ParserFree(parser);
+ }
+}
+END_TEST
+
+/* Failing with the following error - sflees@suse.de */
static int XMLCALL
external_entity_valuer(XML_Parser parser, const XML_Char *context,
const XML_Char *base, const XML_Char *systemId,
@@ -4343,7 +4639,10 @@ external_entity_valuer(XML_Parser parser
if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
if (_XML_Parse_SINGLE_BYTES(ext_parser, text1, (int)strlen(text1), XML_TRUE)
== XML_STATUS_ERROR)
+ {
xml_failure(ext_parser);
+ }
+
} else if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
ExtFaults *fault = (ExtFaults *)XML_GetUserData(parser);
enum XML_Status status;
@@ -4353,7 +4652,9 @@ external_entity_valuer(XML_Parser parser
(int)strlen(fault->parse_text), XML_TRUE);
if (fault->error == XML_ERROR_NONE) {
if (status == XML_STATUS_ERROR)
+ {
xml_failure(ext_parser);
+ }
} else {
if (status != XML_STATUS_ERROR)
fail(fault->fail_text);
@@ -4361,7 +4662,10 @@ external_entity_valuer(XML_Parser parser
if (error != fault->error
&& (fault->error != XML_ERROR_XML_DECL
|| error != XML_ERROR_TEXT_DECL))
+ {
xml_failure(ext_parser);
+ }
+
}
}
@@ -6663,50 +6967,697 @@ START_TEST(test_empty_element_abort) {
}
END_TEST
-START_TEST(test_buffer_can_grow_to_max) {
- const char *const prefixes[] = {
- "",
- "<",
- "<x a='",
- "<doc><x a='",
- "<document><x a='",
- "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
- "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
- "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
- "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
- "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
- const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
- int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
- if (sizeof(void *) < 8) {
- // Looks like we have a 32-bit system. Can we make a big allocation?
- void *big = malloc(maxbuf);
- if (! big) {
- // The big allocation failed. Let's be a little lenient.
- maxbuf = maxbuf / 2;
+int XMLCALL
+external_entity_unfinished_attlist(XML_Parser parser, const XML_Char *context,
+ const XML_Char *base,
+ const XML_Char *systemId,
+ const XML_Char *publicId) {
+ const char *text = "<!ELEMENT barf ANY>\n"
+ "<!ATTLIST barf my_attr (blah|%blah;a|foo) #REQUIRED>\n"
+ "<!--COMMENT-->\n";
+ XML_Parser ext_parser;
+
+ UNUSED_P(base);
+ UNUSED_P(publicId);
+ if (systemId == NULL)
+ return XML_STATUS_OK;
+
+ ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
+ if (ext_parser == NULL)
+ fail("Could not create external entity parser");
+
+ if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
+ == XML_STATUS_ERROR)
+ xml_failure(ext_parser);
+
+ XML_ParserFree(ext_parser);
+ return XML_STATUS_OK;
+}
+
+/* Regression test for GH issue #612: unfinished m_declAttributeType
+ * allocation in ->m_tempPool can corrupt following allocation.
+ */
+START_TEST(test_pool_integrity_with_unfinished_attr) {
+ const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
+ "<!DOCTYPE foo [\n"
+ "<!ELEMENT foo ANY>\n"
+ "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
+ "%entp;\n"
+ "]>\n"
+ "<a></a>\n";
+ const XML_Char *expected = XCS("COMMENT");
+ CharData storage;
+
+ CharData_Init(&storage);
+ XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
+ XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
+ XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
+ XML_SetCommentHandler(g_parser, accumulate_comment);
+ XML_SetUserData(g_parser, &storage);
+ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
+ == XML_STATUS_ERROR)
+ xml_failure(g_parser);
+ CharData_CheckXMLChars(&storage, expected);
+}
+END_TEST
+
+typedef struct {
+ XML_Parser parser;
+ CharData *storage;
+} ParserPlusStorage;
+
+void XMLCALL
+accumulate_and_suspend_comment_handler(void *userData, const XML_Char *data) {
+ ParserPlusStorage *const parserPlusStorage = (ParserPlusStorage *)userData;
+ accumulate_comment(parserPlusStorage->storage, data);
+ XML_StopParser(parserPlusStorage->parser, XML_TRUE);
+}
+
+START_TEST(test_nested_entity_suspend) {
+ const char *const text = "<!DOCTYPE a [\n"
+ " <!ENTITY e1 '<!--e1-->'>\n"
+ " <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
+ " <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
+ "]>\n"
+ "<a><!--start-->&e3;<!--end--></a>";
+ const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
+ XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
+ CharData storage;
+ CharData_Init(&storage);
+ XML_Parser parser = XML_ParserCreate(NULL);
+ ParserPlusStorage parserPlusStorage = {parser, &storage};
+
+ XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
+ XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
+ XML_SetUserData(parser, &parserPlusStorage);
+
+ enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
+ while (status == XML_STATUS_SUSPENDED) {
+ status = XML_ResumeParser(parser);
+ }
+ if (status != XML_STATUS_OK)
+ xml_failure(parser);
+
+ CharData_CheckXMLChars(&storage, expected);
+ XML_ParserFree(parser);
+}
+END_TEST
+
+/* Regression test for quadratic parsing on large tokens */
+START_TEST(test_big_tokens_scale_linearly) {
+ const struct {
+ const char *pre;
+ const char *post;
+ } text[] = {
+ {"<a>", "</a>"}, // assumed good, used as baseline
+ {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
+ {"<c attr='", "'></c>"}, // big attribute, used to be O(N²)
+ {"<d><!-- ", " --></d>"}, // long comment, used to be O(N²)
+ {"<e><", "/></e>"}, // big elem name, used to be O(N²)
+ };
+ const int num_cases = sizeof(text) / sizeof(text[0]);
+ char aaaaaa[4096];
+ const int fillsize = (int)sizeof(aaaaaa);
+ const int fillcount = 100;
+ const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
+ const unsigned max_factor = 4;
+ const unsigned max_scanned = max_factor * approx_bytes;
+
+ memset(aaaaaa, 'a', fillsize);
+
+ if (! g_reparseDeferralEnabledDefault) {
+ return; // heuristic is disabled; we would get O(n^2) and fail.
+ }
+
+ for (int i = 0; i < num_cases; ++i) {
+ XML_Parser parser = XML_ParserCreate(NULL);
+ assert_true(parser != NULL);
+ enum XML_Status status;
+ //set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
+
+ // parse the start text
+ g_bytesScanned = 0;
+ status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
+ (int)strlen(text[i].pre), XML_FALSE);
+ if (status != XML_STATUS_OK) {
+ xml_failure(parser);
+ }
+
+ // parse lots of 'a', failing the test early if it takes too long
+ unsigned past_max_count = 0;
+ for (int f = 0; f < fillcount; ++f) {
+ status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
+ if (status != XML_STATUS_OK) {
+ xml_failure(parser);
+ }
+ if (g_bytesScanned > max_scanned) {
+ // We're not done, and have already passed the limit -- the test will
+ // definitely fail. This block allows us to save time by failing early.
+ const unsigned pushed
+ = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
+ fprintf(
+ stderr,
+ "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
+ f + 1, fillcount, pushed, g_bytesScanned,
+ g_bytesScanned / (double)pushed, max_scanned, max_factor);
+ past_max_count++;
+ // We are failing, but allow a few log prints first. If we don't reach
+ // a count of five, the test will fail after the loop instead.
+ assert_true(past_max_count < 5);
+ }
+ }
+
+ // parse the end text
+ status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
+ (int)strlen(text[i].post), XML_TRUE);
+ if (status != XML_STATUS_OK) {
+ xml_failure(parser);
+ }
+
+ assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
+ if (g_bytesScanned > max_scanned) {
+ fprintf(
+ stderr,
+ "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
+ g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
+ max_factor);
+ fail("scanned too many bytes");
}
- free(big);
+
+ XML_ParserFree(parser);
}
+}
+END_TEST
+
+START_TEST(test_set_reparse_deferral) {
+ const char *const pre = "<d>";
+ const char *const start = "<x attr='";
+ const char *const end = "'></x>";
+ char eeeeee[100];
+ const int fillsize = (int)sizeof(eeeeee);
+ memset(eeeeee, 'e', fillsize);
+
+ for (int enabled = 0; enabled <= 1; enabled += 1) {
+ //set_subtest("deferral=%d", enabled);
- for (int i = 0; i < num_prefixes; ++i) {
- // set_subtest("\"%s\"", prefixes[i]);
XML_Parser parser = XML_ParserCreate(NULL);
- const int prefix_len = (int)strlen(prefixes[i]);
- const enum XML_Status s
- = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
- if (s != XML_STATUS_OK)
+ assert_true(parser != NULL);
+ assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
+ // pre-grow the buffer to avoid reparsing due to almost-fullness
+ assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
+
+ CharData storage;
+ CharData_Init(&storage);
+ XML_SetUserData(parser, &storage);
+ XML_SetStartElementHandler(parser, start_element_event_handler);
+
+ enum XML_Status status;
+ // parse the start text
+ status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
+ if (status != XML_STATUS_OK) {
xml_failure(parser);
+ }
+ CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
+
+ // ..and the start of the token
+ status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
+ if (status != XML_STATUS_OK) {
+ xml_failure(parser);
+ }
+ CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
+
+ // try to parse lots of 'e', but the token isn't finished
+ for (int c = 0; c < 100; ++c) {
+ status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
+ if (status != XML_STATUS_OK) {
+ xml_failure(parser);
+ }
+ }
+ CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
+
+ // end the <x> token.
+ status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
+ if (status != XML_STATUS_OK) {
+ xml_failure(parser);
+ }
+
+ if (enabled) {
+ // In general, we may need to push more data to trigger a reparse attempt,
+ // but in this test, the data is constructed to always require it.
+ CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
+ // 2x the token length should suffice; the +1 covers the start and end.
+ for (int c = 0; c < 101; ++c) {
+ status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
+ if (status != XML_STATUS_OK) {
+ xml_failure(parser);
+ }
+ }
+ }
+ CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
- // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
- // subtracting the whole prefix is easiest, and close enough.
- assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
- // The limit should be consistent; no prefix should allow us to
- // reach above the max buffer size.
- assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
XML_ParserFree(parser);
}
}
END_TEST
+struct element_decl_data {
+ XML_Parser parser;
+ int count;
+};
+
+static void
+element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
+ UNUSED_P(name);
+ struct element_decl_data *testdata = (struct element_decl_data *)userData;
+ testdata->count += 1;
+ XML_FreeContentModel(testdata->parser, model);
+}
+
+static int
+external_inherited_parser(XML_Parser p, const XML_Char *context,
+ const XML_Char *base, const XML_Char *systemId,
+ const XML_Char *publicId) {
+ UNUSED_P(base);
+ UNUSED_P(systemId);
+ UNUSED_P(publicId);
+ const char *const pre = "<!ELEMENT document ANY>\n";
+ const char *const start = "<!ELEMENT ";
+ const char *const end = " ANY>\n";
+ const char *const post = "<!ELEMENT xyz ANY>\n";
+ const int enabled = *(int *)XML_GetUserData(p);
+ char eeeeee[100];
+ char spaces[100];
+ const int fillsize = (int)sizeof(eeeeee);
+ assert_true(fillsize == (int)sizeof(spaces));
+ memset(eeeeee, 'e', fillsize);
+ memset(spaces, ' ', fillsize);
+
+ XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
+ assert_true(parser != NULL);
+ // pre-grow the buffer to avoid reparsing due to almost-fullness
+ assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
+
+ struct element_decl_data testdata;
+ testdata.parser = parser;
+ testdata.count = 0;
+ XML_SetUserData(parser, &testdata);
+ XML_SetElementDeclHandler(parser, element_decl_counter);
+
+ enum XML_Status status;
+ // parse the initial text
+ status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
+ if (status != XML_STATUS_OK) {
+ xml_failure(parser);
+ }
+ assert_true(testdata.count == 1); // first element should be done
+
+ // ..and the start of the big token
+ status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
+ if (status != XML_STATUS_OK) {
+ xml_failure(parser);
+ }
+ assert_true(testdata.count == 1); // still just the first one
+
+ // try to parse lots of 'e', but the token isn't finished
+ for (int c = 0; c < 100; ++c) {
+ status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
+ if (status != XML_STATUS_OK) {
+ xml_failure(parser);
+ }
+ }
+ assert_true(testdata.count == 1); // *still* just the first one
+
+ // end the big token.
+ status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
+ if (status != XML_STATUS_OK) {
+ xml_failure(parser);
+ }
+
+ if (enabled) {
+ // In general, we may need to push more data to trigger a reparse attempt,
+ // but in this test, the data is constructed to always require it.
+ assert_true(testdata.count == 1); // or the test is incorrect
+ // 2x the token length should suffice; the +1 covers the start and end.
+ for (int c = 0; c < 101; ++c) {
+ status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
+ if (status != XML_STATUS_OK) {
+ xml_failure(parser);
+ }
+ }
+ }
+ assert_true(testdata.count == 2); // the big token should be done
+
+ // parse the final text
+ status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
+ if (status != XML_STATUS_OK) {
+ xml_failure(parser);
+ }
+ assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
+
+ XML_ParserFree(parser);
+ return XML_STATUS_OK;
+}
+
+START_TEST(test_reparse_deferral_is_inherited) {
+ const char *const text
+ = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
+ for (int enabled = 0; enabled <= 1; ++enabled) {
+ //set_subtest("deferral=%d", enabled);
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+ assert_true(parser != NULL);
+ XML_SetUserData(parser, (void *)&enabled);
+ XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
+ // this handler creates a sub-parser and checks that its deferral behavior
+ // is what we expected, based on the value of `enabled` (in userdata).
+ XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
+ assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
+ if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
+ xml_failure(parser);
+
+ XML_ParserFree(parser);
+ }
+}
+END_TEST
+
+START_TEST(test_set_reparse_deferral_on_null_parser) {
+ assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
+ assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
+ assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
+ assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
+ assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
+ == XML_FALSE);
+ assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
+ == XML_FALSE);
+}
+END_TEST
+
+START_TEST(test_set_reparse_deferral_on_the_fly) {
+ const char *const pre = "<d><x attr='";
+ const char *const end = "'></x>";
+ char iiiiii[100];
+ const int fillsize = (int)sizeof(iiiiii);
+ memset(iiiiii, 'i', fillsize);
+
+ XML_Parser parser = XML_ParserCreate(NULL);
+ assert_true(parser != NULL);
+ assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
+
+ CharData storage;
+ CharData_Init(&storage);
+ XML_SetUserData(parser, &storage);
+ XML_SetStartElementHandler(parser, start_element_event_handler);
+
+ enum XML_Status status;
+ // parse the start text
+ status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
+ if (status != XML_STATUS_OK) {
+ xml_failure(parser);
+ }
+ CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
+
+ // try to parse some 'i', but the token isn't finished
+ status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
+ if (status != XML_STATUS_OK) {
+ xml_failure(parser);
+ }
+ CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
+
+ // end the <x> token.
+ status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
+ if (status != XML_STATUS_OK) {
+ xml_failure(parser);
+ }
+ CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
+
+ // now change the heuristic setting and add *no* data
+ assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
+ // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
+ status = XML_Parse(parser, "", 0, XML_FALSE);
+ if (status != XML_STATUS_OK) {
+ xml_failure(parser);
+ }
+ CharData_CheckXMLChars(&storage, XCS("dx"));
+
+ XML_ParserFree(parser);
+}
+END_TEST
+
+START_TEST(test_set_bad_reparse_option) {
+ XML_Parser parser = XML_ParserCreate(NULL);
+ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
+ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
+ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
+ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
+ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
+ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
+ assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
+ assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
+ assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
+ XML_ParserFree(parser);
+}
+END_TEST
+
+static size_t g_totalAlloc = 0;
+static size_t g_biggestAlloc = 0;
+
+static void *
+counting_realloc(void *ptr, size_t size) {
+ g_totalAlloc += size;
+ if (size > g_biggestAlloc) {
+ g_biggestAlloc = size;
+ }
+ return realloc(ptr, size);
+}
+
+static void *
+counting_malloc(size_t size) {
+ return counting_realloc(NULL, size);
+}
+
+START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
+ if (g_chunkSize != 0) {
+ // this test does not use SINGLE_BYTES, because it depends on very precise
+ // buffer fills.
+ return;
+ }
+ if (! g_reparseDeferralEnabledDefault) {
+ return; // this test is irrelevant when the deferral heuristic is disabled.
+ }
+
+ const int document_length = 65536;
+ char *const document = (char *)malloc(document_length);
+
+ const XML_Memory_Handling_Suite memfuncs = {
+ counting_malloc,
+ counting_realloc,
+ free,
+ };
+
+ const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
+ const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
+ const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
+
+ for (const int *leading = leading_list; *leading >= 0; leading++) {
+ for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
+ for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
+ //set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
+ // *fillsize);
+ // start by checking that the test looks reasonably valid
+ assert_true(*leading + *bigtoken <= document_length);
+
+ // put 'x' everywhere; some will be overwritten by elements.
+ memset(document, 'x', document_length);
+ // maybe add an initial tag
+ if (*leading) {
+ assert_true(*leading >= 3); // or the test case is invalid
+ memcpy(document, "<a>", 3);
+ }
+ // add the large token
+ document[*leading + 0] = '<';
+ document[*leading + 1] = 'b';
+ memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
+ document[*leading + *bigtoken - 1] = '>';
+
+ // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
+ const int expected_elem_total = 1 + (*leading ? 1 : 0);
+
+ XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
+ assert_true(parser != NULL);
+
+ CharData storage;
+ CharData_Init(&storage);
+ XML_SetUserData(parser, &storage);
+ XML_SetStartElementHandler(parser, start_element_event_handler);
+
+ g_biggestAlloc = 0;
+ g_totalAlloc = 0;
+ int offset = 0;
+ // fill data until the big token is covered (but not necessarily parsed)
+ while (offset < *leading + *bigtoken) {
+ assert_true(offset + *fillsize <= document_length);
+ const enum XML_Status status
+ = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
+ if (status != XML_STATUS_OK) {
+ xml_failure(parser);
+ }
+ offset += *fillsize;
+ }
+ // Now, check that we've had a buffer allocation that could fit the
+ // context bytes and our big token. In order to detect a special case,
+ // we need to know how many bytes of our big token were included in the
+ // first push that contained _any_ bytes of the big token:
+ const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
+ if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
+ // Special case: we aren't saving any context, and the whole big token
+ // was covered by a single fill, so Expat may have parsed directly
+ // from our input pointer, without allocating an internal buffer.
+ } else if (*leading < XML_CONTEXT_BYTES) {
+ assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
+ } else {
+ assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
+ }
+ // fill data until the big token is actually parsed
+ while (storage.count < expected_elem_total) {
+ const size_t alloc_before = g_totalAlloc;
+ assert_true(offset + *fillsize <= document_length);
+ const enum XML_Status status
+ = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
+ if (status != XML_STATUS_OK) {
+ xml_failure(parser);
+ }
+ offset += *fillsize;
+ // since all the bytes of the big token are already in the buffer,
+ // the bufsize ceiling should make us finish its parsing without any
+ // further buffer allocations. We assume that there will be no other
+ // large allocations in this test.
+ assert_true(g_totalAlloc - alloc_before < 4096);
+ }
+ // test-the-test: was our alloc even called?
+ assert_true(g_totalAlloc > 0);
+ // test-the-test: there shouldn't be any extra start elements
+ assert_true(storage.count == expected_elem_total);
+
+ XML_ParserFree(parser);
+ }
+ }
+ }
+ free(document);
+}
+END_TEST
+
+START_TEST(test_varying_buffer_fills) {
+ const int KiB = 1024;
+ const int MiB = 1024 * KiB;
+ const int document_length = 16 * MiB;
+ const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
+
+ if (g_chunkSize != 0) {
+ return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
+ }
+
+ char *const document = (char *)malloc(document_length);
+ assert_true(document != NULL);
+ memset(document, 'x', document_length);
+ document[0] = '<';
+ document[1] = 't';
+ memset(&document[2], ' ', big - 2); // a very spacy token
+ document[big - 1] = '>';
+
+ // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
+ // When reparse deferral is enabled, the final (negated) value is the expected
+ // maximum number of bytes scanned in parse attempts.
+ const int testcases[][30] = {
+ {8 * MiB, -8 * MiB},
+ {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
+ // zero-size fills shouldn't trigger the bypass
+ {4 * MiB, 0, 4 * MiB, -12 * MiB},
+ {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
+ {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
+ // try to hit the buffer ceiling only once (at the end)
+ {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
+ // try to hit the same buffer ceiling multiple times
+ {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
+
+ // try to hit every ceiling, by always landing 1K shy of the buffer size
+ {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
+ 128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
+
+ // try to avoid every ceiling, by always landing 1B past the buffer size
+ // the normal 2x heuristic threshold still forces parse attempts.
+ {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
+ 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
+ 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
+ 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
+ 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
+ 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
+ 2 * MiB, 4 * MiB, // will attempt 8MiB + 1 ==> total 10M + 682K + 7
+ -(10 * MiB + 682 * KiB + 7)},
+ // try to avoid every ceiling again, except on our last fill.
+ {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1
+ 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2
+ 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3
+ 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4
+ 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
+ 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6
+ 2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
+ -(10 * MiB + 682 * KiB + 6)},
+
+ // try to hit ceilings on the way multiple times
+ {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
+ 512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
+ 1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1, // 4 MiB buffer
+ 2 * MiB + 1, 1 * MiB, 512 * KiB, // 8 MiB buffer
+ // we'll make a parse attempt at every parse call
+ -(45 * MiB + 12)},
+ };
+ const int testcount = sizeof(testcases) / sizeof(testcases[0]);
+ for (int test_i = 0; test_i < testcount; test_i++) {
+ const int *fillsize = testcases[test_i];
+ //set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
+ // fillsize[2], fillsize[3]);
+ XML_Parser parser = XML_ParserCreate(NULL);
+ assert_true(parser != NULL);
+
+ CharData storage;
+ CharData_Init(&storage);
+ XML_SetUserData(parser, &storage);
+ XML_SetStartElementHandler(parser, start_element_event_handler);
+
+ g_bytesScanned = 0;
+ int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
+ int offset = 0;
+ while (*fillsize >= 0) {
+ assert_true(offset + *fillsize <= document_length); // or test is invalid
+ const enum XML_Status status
+ = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
+ if (status != XML_STATUS_OK) {
+ xml_failure(parser);
+ }
+ offset += *fillsize;
+ fillsize++;
+ assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
+ worstcase_bytes += offset; // we might've tried to parse all pending bytes
+ }
+ assert_true(storage.count == 1); // the big token should've been parsed
+ assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
+ if (g_reparseDeferralEnabledDefault) {
+ // heuristic is enabled; some XML_Parse calls may have deferred reparsing
+ const unsigned max_bytes_scanned = -*fillsize;
+ if (g_bytesScanned > max_bytes_scanned) {
+ fprintf(stderr,
+ "bytes scanned in parse attempts: actual=%u limit=%u \n",
+ g_bytesScanned, max_bytes_scanned);
+ fail("too many bytes scanned in parse attempts");
+ }
+ }
+ assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
+
+ XML_ParserFree(parser);
+ }
+ free(document);
+}
+END_TEST
/*
* Namespaces tests.
@@ -6780,13 +7731,13 @@ START_TEST(test_return_ns_triplet) {
if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
== XML_STATUS_ERROR)
xml_failure(g_parser);
- if (! triplet_start_flag)
- fail("triplet_start_checker not invoked");
/* Check that unsetting "return triplets" fails while still parsing */
XML_SetReturnNSTriplet(g_parser, XML_FALSE);
if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
== XML_STATUS_ERROR)
xml_failure(g_parser);
+ if (! triplet_start_flag)
+ fail("triplet_start_checker not invoked");
if (! triplet_end_flag)
fail("triplet_end_checker not invoked");
if (dummy_handler_flags
@@ -11643,9 +12594,12 @@ START_TEST(test_accounting_precision) {
/* Processing instructions */
{"<?xml-stylesheet type=\"text/xsl\" href=\"https://domain.invalid/\" media=\"all\"?><e/>",
NULL, NULL, 0, filled_later},
+ {"<?pi0?><?pi1 ?><?pi2 ?><r/><?pi4?>", NULL, NULL, 0, filled_later},
+# ifdef XML_DTD
{"<?pi0?><?pi1 ?><?pi2 ?><!DOCTYPE r SYSTEM 'first.ent'><r/>",
"<?pi3?><!ENTITY % e1 SYSTEM 'second.ent'><?pi4?>%e1;<?pi5?>", "<?pi6?>",
0, filled_later},
+# endif /* XML_DTD */
/* CDATA */
{"<e><![CDATA[one two three]]></e>", NULL, NULL, 0, filled_later},
@@ -11660,6 +12614,7 @@ START_TEST(test_accounting_precision) {
NULL, NULL, sizeof(XML_Char) * strlen("111<![CDATA[2 <= 2]]>333"),
filled_later},
+# ifdef XML_DTD
/* Conditional sections */
{"<!DOCTYPE r [\n"
"<!ENTITY % draft 'INCLUDE'>\n"
@@ -11672,6 +12627,7 @@ START_TEST(test_accounting_precision) {
"<![%final;[<!--22-->]]>",
NULL, sizeof(XML_Char) * (strlen("INCLUDE") + strlen("IGNORE")),
filled_later},
+# endif /* XML_DTD */
/* General entities */
{"<!DOCTYPE root [\n"
@@ -11697,8 +12653,14 @@ START_TEST(test_accounting_precision) {
" <!ENTITY five SYSTEM 'first.ent'>\n"
"]>\n"
"<r>&five;</r>",
- "12345", NULL, 0, filled_later},
+ "12345", NULL, 0},
+ {"<!DOCTYPE r [\n"
+ " <!ENTITY five SYSTEM 'first.ent'>\n"
+ "]>\n"
+ "<r>&five;</r>",
+ "\xEF\xBB\xBF" /* UTF-8 BOM */, NULL, 0, filled_later},
+# ifdef XML_DTD
/* Parameter entities */
{"<!DOCTYPE r [\n"
"<!ENTITY % comment \"<!---->\">\n"
@@ -11784,24 +12746,16 @@ START_TEST(test_accounting_precision) {
"%e1;\n",
"\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>" /* UTF-8 BOM */,
strlen("\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>"), filled_later},
- {"<!DOCTYPE r [\n"
- " <!ENTITY five SYSTEM 'first.ent'>\n"
- "]>\n"
- "<r>&five;</r>",
- "\xEF\xBB\xBF" /* UTF-8 BOM */, NULL, 0, filled_later},
+# endif /* XML_DTD */
};
const size_t countCases = sizeof(cases) / sizeof(cases[0]);
size_t u = 0;
for (; u < countCases; u++) {
- size_t v = 0;
- for (; v < 2; v++) {
- const XML_Bool singleBytesWanted = (v == 0) ? XML_FALSE : XML_TRUE;
const unsigned long long expectedCountBytesDirect
= strlen(cases[u].primaryText);
const unsigned long long expectedCountBytesIndirect
- = (cases[u].firstExternalText ? strlen(cases[u].firstExternalText)
- : 0)
+ = (cases[u].firstExternalText ? strlen(cases[u].firstExternalText) : 0)
+ (cases[u].secondExternalText ? strlen(cases[u].secondExternalText)
: 0)
+ cases[u].expectedCountBytesIndirectExtra;
@@ -11812,14 +12766,10 @@ START_TEST(test_accounting_precision) {
XML_SetExternalEntityRefHandler(parser,
accounting_external_entity_ref_handler);
XML_SetUserData(parser, (void *)&cases[u]);
- cases[u].singleBytesWanted = singleBytesWanted;
}
- const XmlParseFunction xmlParseFunction
- = singleBytesWanted ? _XML_Parse_SINGLE_BYTES : XML_Parse;
-
enum XML_Status status
- = xmlParseFunction(parser, cases[u].primaryText,
+ = _XML_Parse_SINGLE_BYTES(parser, cases[u].primaryText,
(int)strlen(cases[u].primaryText), XML_TRUE);
if (status != XML_STATUS_OK) {
_xml_failure(parser, __FILE__, __LINE__);
@@ -11835,22 +12785,20 @@ START_TEST(test_accounting_precision) {
if (actualCountBytesDirect != expectedCountBytesDirect) {
fprintf(
stderr,
- "Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ", %s: Expected " EXPAT_FMT_ULL(
+ "Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ": Expected " EXPAT_FMT_ULL(
"") " count direct bytes, got " EXPAT_FMT_ULL("") " instead.\n",
- u + 1, countCases, singleBytesWanted ? "single bytes" : "chunks",
- expectedCountBytesDirect, actualCountBytesDirect);
+ u + 1, countCases, expectedCountBytesDirect, actualCountBytesDirect);
fail("Count of direct bytes is off");
}
if (actualCountBytesIndirect != expectedCountBytesIndirect) {
fprintf(
stderr,
- "Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ", %s: Expected " EXPAT_FMT_ULL(
+ "Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ": Expected " EXPAT_FMT_ULL(
"") " count indirect bytes, got " EXPAT_FMT_ULL("") " instead.\n",
- u + 1, countCases, singleBytesWanted ? "single bytes" : "chunks",
- expectedCountBytesIndirect, actualCountBytesIndirect);
+ u + 1, countCases, expectedCountBytesIndirect,
+ actualCountBytesIndirect);
fail("Count of indirect bytes is off");
- }
}
}
}
@@ -12031,7 +12979,7 @@ make_suite(void) {
tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
tcase_add_test(tc_basic, test_memory_allocation);
- tcase_add_test(tc_basic, test_default_current);
+ tcase_add_test(tc_basic, test_default_current);
tcase_add_test(tc_basic, test_dtd_elements);
tcase_add_test(tc_basic, test_dtd_elements_nesting);
tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
@@ -12065,6 +13013,8 @@ make_suite(void) {
#if defined(XML_CONTEXT_BYTES)
tcase_add_test(tc_basic, test_get_buffer_3_overflow);
#endif
+ tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
+ tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
tcase_add_test(tc_basic, test_byte_info_at_end);
tcase_add_test(tc_basic, test_byte_info_at_error);
tcase_add_test(tc_basic, test_byte_info_at_cdata);
@@ -12075,6 +13025,7 @@ make_suite(void) {
tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
+ tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
@@ -12178,7 +13129,17 @@ make_suite(void) {
tcase_add_test(tc_basic, test_bad_notation);
tcase_add_test(tc_basic, test_default_doctype_handler);
tcase_add_test(tc_basic, test_empty_element_abort);
- tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
+ tcase_add_test__ifdef_xml_dtd(tc_basic,
+ test_pool_integrity_with_unfinished_attr);
+ tcase_add_test(tc_basic, test_nested_entity_suspend);
+ tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
+ tcase_add_test(tc_basic, test_set_reparse_deferral);
+ tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
+ tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
+ tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
+ tcase_add_test(tc_basic, test_set_bad_reparse_option);
+ tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
+ tcase_add_test(tc_basic, test_varying_buffer_fills);
suite_add_tcase(s, tc_namespace);
tcase_add_checked_fixture(tc_namespace, namespace_setup, namespace_teardown);
@@ -12320,7 +13281,6 @@ make_suite(void) {
tcase_add_test(tc_nsalloc, test_nsalloc_long_default_in_ext);
tcase_add_test(tc_nsalloc, test_nsalloc_long_systemid_in_ext);
tcase_add_test(tc_nsalloc, test_nsalloc_prefixed_element);
-
#if defined(XML_DTD)
suite_add_tcase(s, tc_accounting);
tcase_add_test(tc_accounting, test_accounting_precision);
Index: expat-2.4.4/lib/internal.h
===================================================================
--- expat-2.4.4.orig/lib/internal.h
+++ expat-2.4.4/lib/internal.h
@@ -160,7 +160,7 @@ const char *unsignedCharToPrintable(unsi
#endif
extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c
-extern unsigned int g_parseAttempts; // used for testing only
+extern unsigned int g_bytesScanned; // used for testing only
#ifdef __cplusplus
}
Index: expat-2.4.4/lib/xmlparse.c
===================================================================
--- expat-2.4.4.orig/lib/xmlparse.c
+++ expat-2.4.4/lib/xmlparse.c
@@ -605,7 +605,7 @@ static unsigned long getDebugLevel(const
: ((*((pool)->ptr)++ = c), 1))
XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c
-unsigned int g_parseAttempts = 0; // used for testing only
+unsigned int g_bytesScanned = 0; // used for testing only
struct XML_ParserStruct {
/* The first member must be m_userData so that the XML_GetUserData
@@ -983,7 +983,7 @@ callProcessor(XML_Parser parser, const c
return XML_ERROR_NONE;
}
}
- g_parseAttempts += 1;
+ g_bytesScanned += (unsigned)have_now;
const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
if (ret == XML_ERROR_NONE) {
// if we consumed nothing, remember what we had on this parse attempt.
@@ -1352,7 +1352,7 @@ XML_ExternalEntityParserCreate(XML_Parse
XML_Bool oldReparseDeferralEnabled;
/* Validate the oldParser parameter before we pull everything out of it */
- if (oldParser == NULL)
+ if (oldParser == NULL)
return NULL;
/* Stash the original parser contents on the stack */
@@ -1921,7 +1921,7 @@ XML_Parse(XML_Parser parser, const char
parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
parser->m_errorCode
- = callProcessor((parser, s, parser->m_parseEndPtr = s + len, &end);
+ = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
if (parser->m_errorCode != XML_ERROR_NONE) {
parser->m_eventEndPtr = parser->m_eventPtr;