File CVE-2023-52425-libexpat-2.6.0-backport.patch of Package python3.38021
From d7133c7e0f91b14c390aa30a5689c353ef754fb6 Mon Sep 17 00:00:00 2001
From: Sebastian Pipping <sebastian@pipping.org>
Date: Wed, 7 Feb 2024 15:32:45 +0100
Subject: [PATCH] Fix etree XMLPullParser tests for Expat >=2.6.0 with reparse
deferral
Combined with gh#python/cpython!31453
bpo-46811: Make test suite support Expat >=2.4.5 (GH-31453)
Curly brackets were never allowed in namespace URIs
according to RFC 3986, and so-called namespace-validating
XML parsers have the right to reject them a invalid URIs.
libexpat >=2.4.5 has become strcter in that regard due to
related security issues; with ET.XML instantiating a
namespace-aware parser under the hood, this test has no
future in CPython.
References:
- https://datatracker.ietf.org/doc/html/rfc3968
- https://www.w3.org/TR/xml-names/
Also, test_minidom.py: Support Expat >=2.4.5
(cherry picked from commit 2cae93832f46b245847bdc252456ddf7742ef45e)
Co-authored-by: Sebastian Pipping <sebastian@pipping.org>
Fixes: gh#python/cpython#115133
From-PR: gh#python/cpython!115138
Patch: CVE-2023-52425-libexpat-2.6.0-backport.patch
---
Lib/test/support/__init__.py | 13 +-
Lib/test/test_minidom.py | 12 +
Lib/test/test_pyexpat.py | 61 +++++++++-
Lib/test/test_sax.py | 54 ++++++++
Lib/test/test_xml_etree.py | 14 +-
Misc/NEWS.d/next/Library/2022-02-20-21-03-31.bpo-46811.8BxgdQ.rst | 1
Misc/NEWS.d/next/Tests/2024-02-07-15-49-37.gh-issue-115133.WBajNr.rst | 1
7 files changed, 146 insertions(+), 10 deletions(-)
create mode 100644 Misc/NEWS.d/next/Library/2022-02-20-21-03-31.bpo-46811.8BxgdQ.rst
create mode 100644 Misc/NEWS.d/next/Tests/2024-02-07-15-49-37.gh-issue-115133.WBajNr.rst
--- a/Lib/test/support/__init__.py
+++ b/Lib/test/support/__init__.py
@@ -19,6 +19,7 @@ import platform
import re
import shutil
import socket
+import pyexpat
import stat
import struct
import subprocess
@@ -102,7 +103,7 @@ __all__ = [
# miscellaneous
"check_warnings", "EnvironmentVarGuard", "run_with_locale", "swap_item",
"swap_attr", "Matcher", "set_memlimit", "SuppressCrashReport", "sortdict",
- "run_with_tz",
+ "run_with_tz", "fails_with_expat_2_6_0", "is_expat_2_6_0",
]
class Error(Exception):
@@ -2343,3 +2344,13 @@ def adjust_int_max_str_digits(max_digits
yield
finally:
sys.set_int_max_str_digits(current)
+
+
+@functools.lru_cache(maxsize=32)
+def _is_expat_2_6_0():
+ return hasattr(pyexpat.ParserCreate(), 'SetReparseDeferralEnabled')
+is_expat_2_6_0 = _is_expat_2_6_0()
+
+fails_with_expat_2_6_0 = (unittest.expectedFailure
+ if is_expat_2_6_0
+ else lambda test: test)
--- a/Lib/test/test_minidom.py
+++ b/Lib/test/test_minidom.py
@@ -9,6 +9,7 @@ import xml.dom.minidom
from xml.dom.minidom import parse, Node, Document, parseString
from xml.dom.minidom import getDOMImplementation
+from xml.parsers.expat import ExpatError
tstfile = findfile("test.xml", subdir="xmltestdata")
@@ -1090,7 +1091,11 @@ class MinidomTest(unittest.TestCase):
# Verify that character decoding errors raise exceptions instead
# of crashing
- self.assertRaises(UnicodeDecodeError, parseString,
+ # It doesn’t make any sense to insist on the exact text of the
+ # error message, or even the exact Exception … it is enough that
+ # the error has been discovered.
+ with self.assertRaises((UnicodeDecodeError, ExpatError)):
+ parseString(
b'<fran\xe7ais>Comment \xe7a va ? Tr\xe8s bien ?</fran\xe7ais>')
doc.unlink()
@@ -1536,7 +1541,10 @@ class MinidomTest(unittest.TestCase):
self.confirm(doc2.namespaceURI == xml.dom.EMPTY_NAMESPACE)
def testExceptionOnSpacesInXMLNSValue(self):
- with self.assertRaisesRegex(ValueError, 'Unsupported syntax'):
+ # It doesn’t make any sense to insist on the exact text of the
+ # error message, or even the exact Exception … it is enough that
+ # the error has been discovered.
+ with self.assertRaises((ExpatError, ValueError)):
parseString('<element xmlns:abc="http:abc.com/de f g/hi/j k"><abc:foo /></element>')
def testDocRemoveChild(self):
--- a/Lib/test/test_pyexpat.py
+++ b/Lib/test/test_pyexpat.py
@@ -11,7 +11,7 @@ import traceback
from xml.parsers import expat
from xml.parsers.expat import errors
-from test.support import sortdict, run_unittest
+from test.support import sortdict, run_unittest, is_expat_2_6_0
class SetAttributeTest(unittest.TestCase):
@@ -749,5 +749,64 @@ def test_main():
ErrorMessageTest,
ForeignDTDTests)
+class ReparseDeferralTest(unittest.TestCase):
+ def test_getter_setter_round_trip(self):
+ if not is_expat_2_6_0:
+ self.skipTest("Linked libexpat doesn't support reparse deferral")
+
+ parser = expat.ParserCreate()
+ enabled = (expat.version_info >= (2, 6, 0))
+
+ self.assertIs(parser.GetReparseDeferralEnabled(), enabled)
+ parser.SetReparseDeferralEnabled(False)
+ self.assertIs(parser.GetReparseDeferralEnabled(), False)
+ parser.SetReparseDeferralEnabled(True)
+ self.assertIs(parser.GetReparseDeferralEnabled(), enabled)
+
+ def test_reparse_deferral_enabled(self):
+ if not is_expat_2_6_0:
+ self.skipTest("Linked libexpat doesn't support reparse deferral")
+
+ started = []
+
+ def start_element(name, _):
+ started.append(name)
+
+ parser = expat.ParserCreate()
+ parser.StartElementHandler = start_element
+ self.assertTrue(parser.GetReparseDeferralEnabled())
+
+ for chunk in (b'<doc', b'/>'):
+ parser.Parse(chunk, False)
+
+ # The key test: Have handlers already fired? Expecting: no.
+ self.assertEqual(started, [])
+
+ parser.Parse(b'', True)
+
+ self.assertEqual(started, ['doc'])
+
+ def test_reparse_deferral_disabled(self):
+ if not is_expat_2_6_0:
+ self.skipTest("Linked libexpat doesn't support reparse deferral")
+
+ started = []
+
+ def start_element(name, _):
+ started.append(name)
+
+ parser = expat.ParserCreate()
+ parser.StartElementHandler = start_element
+ if is_expat_2_6_0:
+ parser.SetReparseDeferralEnabled(False)
+ self.assertFalse(parser.GetReparseDeferralEnabled())
+
+ for chunk in (b'<doc', b'/>'):
+ parser.Parse(chunk, False)
+
+ # The key test: Have handlers already fired? Expecting: yes.
+ self.assertEqual(started, ['doc'])
+
+
if __name__ == "__main__":
test_main()
--- a/Lib/test/test_sax.py
+++ b/Lib/test/test_sax.py
@@ -20,7 +20,7 @@ import gc
import os.path
import shutil
from test import support
-from test.support import findfile, run_unittest, TESTFN
+from test.support import findfile, run_unittest, TESTFN, is_expat_2_6_0
TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata")
TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata")
@@ -1060,6 +1060,58 @@ class ExpatReaderTest(XmlTestBase):
self.assertEqual(result.getvalue(), start + b"<doc>text</doc>")
+ def test_flush_reparse_deferral_enabled(self):
+ if not is_expat_2_6_0:
+ self.skipTest("Linked libexpat doesn't support reparse deferral")
+
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+ parser = create_parser()
+ parser.setContentHandler(xmlgen)
+
+ for chunk in ("<doc", ">"):
+ parser.feed(chunk)
+
+ self.assertEqual(result.getvalue(), start) # i.e. no elements started
+ self.assertTrue(parser._parser.GetReparseDeferralEnabled())
+
+ parser.flush()
+
+ self.assertTrue(parser._parser.GetReparseDeferralEnabled())
+ self.assertEqual(result.getvalue(), start + b"<doc>")
+
+ parser.feed("</doc>")
+ parser.close()
+
+ self.assertEqual(result.getvalue(), start + b"<doc></doc>")
+
+ def test_flush_reparse_deferral_disabled(self):
+ if not is_expat_2_6_0:
+ self.skipTest("Linked libexpat doesn't support reparse deferral")
+
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+ parser = create_parser()
+ parser.setContentHandler(xmlgen)
+
+ for chunk in ("<doc", ">"):
+ parser.feed(chunk)
+
+ parser._parser.SetReparseDeferralEnabled(False)
+ self.assertEqual(result.getvalue(), start) # i.e. no elements started
+
+ self.assertFalse(parser._parser.GetReparseDeferralEnabled())
+
+ parser.flush()
+
+ self.assertFalse(parser._parser.GetReparseDeferralEnabled())
+ self.assertEqual(result.getvalue(), start + b"<doc>")
+
+ parser.feed("</doc>")
+ parser.close()
+
+ self.assertEqual(result.getvalue(), start + b"<doc></doc>")
+
# ===== Locator support
def test_expat_locator_noinfo(self):
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -18,7 +18,8 @@ import weakref
from itertools import product
from test import support
-from test.support import TESTFN, findfile, import_fresh_module, gc_collect
+from test.support import (TESTFN, findfile, import_fresh_module,
+ gc_collect, is_expat_2_6_0, fails_with_expat_2_6_0)
# pyET is the pure-Python implementation.
#
@@ -1026,6 +1027,7 @@ class XMLPullParserTest(unittest.TestCas
def test_simple_xml(self):
for chunk_size in (None, 1, 5):
with self.subTest(chunk_size=chunk_size):
+ expected_events = []
parser = ET.XMLPullParser()
self.assert_event_tags(parser, [])
self._feed(parser, "<!-- comment -->\n", chunk_size)
@@ -1035,16 +1037,17 @@ class XMLPullParserTest(unittest.TestCas
chunk_size)
self.assert_event_tags(parser, [])
self._feed(parser, ">\n", chunk_size)
- self.assert_event_tags(parser, [('end', 'element')])
+ expected_events += [('end', 'element')]
self._feed(parser, "<element>text</element>tail\n", chunk_size)
self._feed(parser, "<empty-element/>\n", chunk_size)
- self.assert_event_tags(parser, [
+ expected_events += [
('end', 'element'),
('end', 'empty-element'),
- ])
+ ]
self._feed(parser, "</root>\n", chunk_size)
- self.assert_event_tags(parser, [('end', 'root')])
+ expected_events += [('end', 'root')]
self.assertIsNone(parser.close())
+ self.assert_event_tags(parser, expected_events)
def test_feed_while_iterating(self):
parser = ET.XMLPullParser()
@@ -1646,6 +1649,7 @@ class BugsTest(unittest.TestCase):
b"<?xml version='1.0' encoding='ascii'?>\n"
b'<body>tãg</body>')
+ @unittest.skip('Fails with modern libexpat.')
def test_issue3151(self):
e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>')
self.assertEqual(e.tag, '{${stuff}}localname')
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-02-20-21-03-31.bpo-46811.8BxgdQ.rst
@@ -0,0 +1 @@
+Make test suite support Expat >=2.4.5
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2024-02-07-15-49-37.gh-issue-115133.WBajNr.rst
@@ -0,0 +1 @@
+Fix etree XMLPullParser tests for Expat >=2.6.0 with reparse deferral