File CVE-2024-27351.patch of Package python-Django.34904
Index: Django-2.0.7/tests/utils_tests/test_text.py
===================================================================
--- Django-2.0.7.orig/tests/utils_tests/test_text.py
+++ Django-2.0.7/tests/utils_tests/test_text.py
@@ -178,6 +178,32 @@ class TestUtilsText(SimpleTestCase):
truncator = text.Truncator(re_tag_catastrophic_test)
self.assertEqual(re_tag_catastrophic_test, truncator.words(500, html=True))
+ # Only open brackets.
+ test = "<" * text.Truncator.MAX_LENGTH_HTML
+ truncator = text.Truncator(test)
+ self.assertEqual(truncator.words(1, html=True), test)
+
+ # Tags with special chars in attrs.
+ truncator = text.Truncator(
+ """<i style="margin: 5%; font: *;">Hello, my dear lady!</i>"""
+ )
+ self.assertEqual(
+ """<i style="margin: 5%; font: *;">Hello, my dear…</i>""",
+ truncator.words(3, html=True),
+ )
+
+ # Tags with special non-latin chars in attrs.
+ truncator = text.Truncator("""<p data-x="א">Hello, my dear lady!</p>""")
+ self.assertEqual(
+ """<p data-x="א">Hello, my dear…</p>""",
+ truncator.words(3, html=True),
+ )
+
+ # Misplaced brackets.
+ truncator = text.Truncator("hello >< world")
+ self.assertEqual(truncator.words(1, html=True), "hello…")
+ self.assertEqual(truncator.words(2, html=True), "hello >< world")
+
def test_wrap(self):
digits = '1234 67 9'
self.assertEqual(text.wrap(digits, 100), '1234 67 9')
Index: Django-2.0.7/django/utils/text.py
===================================================================
--- Django-2.0.7.orig/django/utils/text.py
+++ Django-2.0.7/django/utils/text.py
@@ -17,8 +17,64 @@ def capfirst(x):
return x and str(x)[0].upper() + str(x)[1:]
-# Set up regular expressions
-re_words = re.compile(r'<.*?>|((?:\w[-\w]*|&.*?;)+)', re.S)
+# ----- Begin security-related performance workaround -----
+
+# We used to have, below
+#
+# re_words = re.compile(r"<[^>]+?>|([^<>\s]+)", re.S)
+#
+# But it was shown that this regex, in the way we use it here, has some
+# catastrophic edge-case performance features. Namely, when it is applied to
+# text with only open brackets "<<<...". The class below provides the services
+# and correct answers for the use cases, but in these edge cases does it much
+# faster.
+re_notag = re.compile(r"([^<>\s]+)", re.S)
+re_prt = re.compile(r"<|([^<>\s]+)", re.S)
+
+
+class WordsRegex:
+ @staticmethod
+ def search(text, pos):
+ # Look for "<" or a non-tag word.
+ partial = re_prt.search(text, pos)
+ if partial is None or partial[1] is not None:
+ return partial
+
+ # "<" was found, look for a closing ">".
+ end = text.find(">", partial.end(0))
+ if end < 0:
+ # ">" cannot be found, look for a word.
+ return re_notag.search(text, pos + 1)
+ else:
+ # "<" followed by a ">" was found -- fake a match.
+ end += 1
+ return FakeMatch(text[partial.start(0): end], end)
+
+
+class FakeMatch:
+ __slots__ = ["_text", "_end"]
+
+ def end(self, group=0):
+ assert group == 0, "This specific object takes only group=0"
+ return self._end
+
+ def __getitem__(self, group):
+ if group == 1:
+ return None
+ assert group == 0, "This specific object takes only group in {0,1}"
+ return self._text
+
+ def group(self, group):
+ return self[group]
+
+ def __init__(self, text, end):
+ self._text, self._end = text, end
+
+
+# ----- End security-related performance workaround -----
+
+# Set up regular expressions.
+re_words = WordsRegex
re_chars = re.compile(r'<.*?>|(.)', re.S)
re_tag = re.compile(r'<(/)?(\S+?)(?:(\s*/)|\s.*?)?>', re.S)
re_newlines = re.compile(r'\r\n|\r') # Used in normalize_newlines