File expat-CVE-2022-25236-relax-fix.patch of Package expat.23180

Index: expat-2.2.5/doc/reference.html
===================================================================
--- expat-2.2.5.orig/doc/reference.html
+++ expat-2.2.5/doc/reference.html
@@ -934,6 +934,14 @@ the local part will be concatenated with
 to support RDF processors. It is a programming error to use the null separator
 with <a href= "#XML_SetReturnNSTriplet">namespace triplets</a>.</div>
 
+<p><strong>Note:</strong>
+Expat does not validate namespace URIs (beyond encoding)
+against RFC 3986 today (and is not required to do so with regard to
+the XML 1.0 namespaces specification) but it may start doing that
+in future releases.  Before that, an application using Expat must
+be ready to receive namespace URIs containing non-URI characters.
+</p>
+
 <pre class="fcndec" id="XML_ParserCreate_MM">
 XML_Parser XMLCALL
 XML_ParserCreate_MM(const XML_Char *encoding,
Index: expat-2.2.5/lib/expat.h
===================================================================
--- expat-2.2.5.orig/lib/expat.h
+++ expat-2.2.5/lib/expat.h
@@ -244,6 +244,17 @@ XML_ParserCreate(const XML_Char *encodin
    and the local part will be concatenated without any separator.
    It is a programming error to use the separator '\0' with namespace
    triplets (see XML_SetReturnNSTriplet).
+   If a namespace separator is chosen that can be part of a URI or
+   part of an XML name, splitting an expanded name back into its
+   1, 2 or 3 original parts on application level in the element handler
+   may end up vulnerable, so these are advised against;  sane choices for
+   a namespace separator are e.g. '\n' (line feed) and '|' (pipe).
+
+   Note that Expat does not validate namespace URIs (beyond encoding)
+   against RFC 3986 today (and is not required to do so with regard to
+   the XML 1.0 namespaces specification) but it may start doing that
+   in future releases.  Before that, an application using Expat must
+   be ready to receive namespace URIs containing non-URI characters.
 */
 XMLPARSEAPI(XML_Parser)
 XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator);
Index: expat-2.2.5/lib/xmlparse.c
===================================================================
--- expat-2.2.5.orig/lib/xmlparse.c
+++ expat-2.2.5/lib/xmlparse.c
@@ -3625,6 +3625,117 @@ storeAtts(XML_Parser parser, const ENCOD
   return XML_ERROR_NONE;
 }
 
+static XML_Bool
+is_rfc3986_uri_char(XML_Char candidate) {
+  // For the RFC 3986 ANBF grammar see
+  // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
+
+  switch (candidate) {
+  // From rule "ALPHA" (uppercase half)
+  case 'A':
+  case 'B':
+  case 'C':
+  case 'D':
+  case 'E':
+  case 'F':
+  case 'G':
+  case 'H':
+  case 'I':
+  case 'J':
+  case 'K':
+  case 'L':
+  case 'M':
+  case 'N':
+  case 'O':
+  case 'P':
+  case 'Q':
+  case 'R':
+  case 'S':
+  case 'T':
+  case 'U':
+  case 'V':
+  case 'W':
+  case 'X':
+  case 'Y':
+  case 'Z':
+
+  // From rule "ALPHA" (lowercase half)
+  case 'a':
+  case 'b':
+  case 'c':
+  case 'd':
+  case 'e':
+  case 'f':
+  case 'g':
+  case 'h':
+  case 'i':
+  case 'j':
+  case 'k':
+  case 'l':
+  case 'm':
+  case 'n':
+  case 'o':
+  case 'p':
+  case 'q':
+  case 'r':
+  case 's':
+  case 't':
+  case 'u':
+  case 'v':
+  case 'w':
+  case 'x':
+  case 'y':
+  case 'z':
+
+  // From rule "DIGIT"
+  case '0':
+  case '1':
+  case '2':
+  case '3':
+  case '4':
+  case '5':
+  case '6':
+  case '7':
+  case '8':
+  case '9':
+
+  // From rule "pct-encoded"
+  case '%':
+
+  // From rule "unreserved"
+  case '-':
+  case '.':
+  case '_':
+  case '~':
+
+  // From rule "gen-delims"
+  case ':':
+  case '/':
+  case '?':
+  case '#':
+  case '[':
+  case ']':
+  case '@':
+
+  // From rule "sub-delims"
+  case '!':
+  case '$':
+  case '&':
+  case '\'':
+  case '(':
+  case ')':
+  case '*':
+  case '+':
+  case ',':
+  case ';':
+  case '=':
+    return XML_TRUE;
+
+  default:
+    return XML_FALSE;
+  }
+}
+
 /* addBinding() overwrites the value of prefix->binding without checking.
    Therefore one must keep track of the old value outside of addBinding().
 */
@@ -3686,14 +3797,26 @@ addBinding(XML_Parser parser, PREFIX *pr
         && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
       isXMLNS = XML_FALSE;
     
-    // NOTE: While Expat does not validate namespace URIs against RFC 3986,
-    //       we have to at least make sure that the XML processor on top of
-    //       Expat (that is splitting tag names by namespace separator into
-    //       2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
-    //       by an attacker putting additional namespace separator characters
-    //       into namespace declarations.  That would be ambiguous and not to
-    //       be expected.
-    if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) {
+    // NOTE: While Expat does not validate namespace URIs against RFC 3986
+    //       today (and is not REQUIRED to do so with regard to the XML 1.0
+    //       namespaces specification) we have to at least make sure, that
+    //       the application on top of Expat (that is likely splitting expanded
+    //       element names ("qualified names") of form
+    //       "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
+    //       in its element handler code) cannot be confused by an attacker
+    //       putting additional namespace separator characters into namespace
+    //       declarations.  That would be ambiguous and not to be expected.
+    //
+    //       While the HTML API docs of function XML_ParserCreateNS have been
+    //       advising against use of a namespace separator character that can
+    //       appear in a URI for >20 years now, some widespread applications
+    //       are using URI characters (':' (colon) in particular) for a
+    //       namespace separator, in practice.  To keep these applications
+    //       functional, we only reject namespaces URIs containing the
+    //       application-chosen namespace separator if the chosen separator
+    //       is a non-URI character with regard to RFC 3986.
+    if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
+        && ! is_rfc3986_uri_char(uri[len])) {
       return XML_ERROR_SYNTAX;
     }
   }
Index: expat-2.2.5/tests/runtests.c
===================================================================
--- expat-2.2.5.orig/tests/runtests.c
+++ expat-2.2.5/tests/runtests.c
@@ -8135,16 +8135,18 @@ START_TEST(test_ns_separator_in_uri) {
   struct test_case {
     enum XML_Status expectedStatus;
     const char *doc;
+    XML_Char namesep;
   };
   struct test_case cases[] = {
-      {XML_STATUS_OK, "<doc xmlns='one_two' />"},
-      {XML_STATUS_ERROR, "<doc xmlns='one&#x0A;two' />"},
+      {XML_STATUS_OK, "<doc xmlns='one_two' />", XCS('\n')},
+      {XML_STATUS_ERROR, "<doc xmlns='one&#x0A;two' />", XCS('\n')},
+      {XML_STATUS_OK, "<doc xmlns='one:two' />", XCS(':')},
   };
 
   size_t i = 0;
   size_t failCount = 0;
   for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
-    XML_Parser parser = XML_ParserCreateNS(NULL, '\n');
+    XML_Parser parser = XML_ParserCreateNS(NULL, cases[i].namesep);
     XML_SetElementHandler(parser, dummy_start_element, dummy_end_element);
     if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc),
                   /*isFinal*/ XML_TRUE)
openSUSE Build Service is sponsored by