File CVE-2025-64460.patch of Package python-Django.41924

From 4d2b8803bebcdefd2b76e9e8fc528d5fddea93f0 Mon Sep 17 00:00:00 2001
From: Shai Berger <shai@platonix.com>
Date: Sat, 11 Oct 2025 21:42:56 +0300
Subject: [PATCH] [4.2.x] Fixed CVE-2025-64460 -- Corrected quadratic inner
 text accumulation in XML serializer.

Previously, `getInnerText()` recursively used `list.extend()` on strings,
which added each character from child nodes as a separate list element.
On deeply nested XML content, this caused the overall deserialization
work to grow quadratically with input size, potentially allowing
disproportionate CPU consumption for crafted XML.

The fix separates collection of inner texts from joining them, so that
each subtree is joined only once, reducing the complexity to linear in
the size of the input. These changes also include a mitigation for a
xml.dom.minidom performance issue.

Thanks Seokchan Yoon (https://ch4n3.kr/) for report.

Co-authored-by: Jacob Walls <jacobtylerwalls@gmail.com>
Co-authored-by: Natalia <124304+nessita@users.noreply.github.com>

Backport of 50efb718b31333051bc2dcb06911b8fa1358c98c from main.
---
 django/core/serializers/xml_serializer.py | 39 +++++++++++++---
 docs/releases/4.2.27.txt                  | 10 +++++
 docs/topics/serialization.txt             |  2 +
 tests/serializers/test_xml.py             | 55 ++++++++++++++++++++++-
 4 files changed, 99 insertions(+), 7 deletions(-)

diff --git a/django/core/serializers/xml_serializer.py b/django/core/serializers/xml_serializer.py
index 3f9955aa23c5..5db8c067c413 100644
--- a/django/core/serializers/xml_serializer.py
+++ b/django/core/serializers/xml_serializer.py
@@ -2,7 +2,8 @@
 XML serializer.
 """
 import json
-from xml.dom import pulldom
+from contextlib import contextmanager
+from xml.dom import minidom, pulldom
 from xml.sax import handler
 from xml.sax.expatreader import ExpatParser as _ExpatParser
 
@@ -14,6 +15,25 @@
 from django.utils.xmlutils import SimplerXMLGenerator, UnserializableContentError
 
 
+@contextmanager
+def fast_cache_clearing():
+    """Workaround for performance issues in minidom document checks.
+
+    Speeds up repeated DOM operations by skipping unnecessary full traversal
+    of the DOM tree.
+    """
+    module_helper_was_lambda = False
+    if original_fn := getattr(minidom, "_in_document", None):
+        module_helper_was_lambda = original_fn.__name__ == "<lambda>"
+        if not module_helper_was_lambda:
+            minidom._in_document = lambda node: bool(node.ownerDocument)
+    try:
+        yield
+    finally:
+        if original_fn and not module_helper_was_lambda:
+            minidom._in_document = original_fn
+
+
 class Serializer(base.Serializer):
     """Serialize a QuerySet to XML."""
 
@@ -208,7 +228,8 @@ def _make_parser(self):
     def __next__(self):
         for event, node in self.event_stream:
             if event == "START_ELEMENT" and node.nodeName == "object":
-                self.event_stream.expandNode(node)
+                with fast_cache_clearing():
+                    self.event_stream.expandNode(node)
                 return self._handle_object(node)
         raise StopIteration
 
@@ -392,19 +413,25 @@ def _get_model_from_node(self, node, attr):
 
 def getInnerText(node):
     """Get all the inner text of a DOM node (recursively)."""
+    inner_text_list = getInnerTextList(node)
+    return "".join(inner_text_list)
+
+
+def getInnerTextList(node):
+    """Return a list of the inner texts of a DOM node (recursively)."""
     # inspired by https://mail.python.org/pipermail/xml-sig/2005-March/011022.html
-    inner_text = []
+    result = []
     for child in node.childNodes:
         if (
             child.nodeType == child.TEXT_NODE
             or child.nodeType == child.CDATA_SECTION_NODE
         ):
-            inner_text.append(child.data)
+            result.append(child.data)
         elif child.nodeType == child.ELEMENT_NODE:
-            inner_text.extend(getInnerText(child))
+            result.extend(getInnerTextList(child))
         else:
             pass
-    return "".join(inner_text)
+    return result
 
 
 # Below code based on Christian Heimes' defusedxml
diff --git a/docs/topics/serialization.txt b/docs/topics/serialization.txt
index 0bb57642ab7e..dc403ca1d4ac 100644
--- a/docs/topics/serialization.txt
+++ b/docs/topics/serialization.txt
@@ -173,6 +173,8 @@ Identifier  Information
 .. _jsonl: https://jsonlines.org/
 .. _PyYAML: https://pyyaml.org/
 
+.. _serialization-formats-xml:
+
 XML
 ---
 
diff --git a/tests/serializers/test_xml.py b/tests/serializers/test_xml.py
index c9df2f2a5bc5..03462cfed54a 100644
--- a/tests/serializers/test_xml.py
+++ b/tests/serializers/test_xml.py
@@ -1,7 +1,10 @@
+import gc
+import time
 from xml.dom import minidom
 
 from django.core import serializers
-from django.core.serializers.xml_serializer import DTDForbidden
+from django.core.serializers.xml_serializer import Deserializer, DTDForbidden
+from django.db import models
 from django.test import TestCase, TransactionTestCase
 
 from .tests import SerializersTestBase, SerializersTransactionTestBase
@@ -90,6 +93,56 @@ def test_no_dtd(self):
         with self.assertRaises(DTDForbidden):
             next(serializers.deserialize("xml", xml))
 
+    def test_crafted_xml_performance(self):
+        """The time to process invalid inputs is not quadratic."""
+
+        def build_crafted_xml(depth, leaf_text_len):
+            nested_open = "<nested>" * depth
+            nested_close = "</nested>" * depth
+            leaf = "x" * leaf_text_len
+            field_content = f"{nested_open}{leaf}{nested_close}"
+            return f"""
+                <django-objects version="1.0">
+                   <object model="contenttypes.contenttype" pk="1">
+                      <field name="app_label">{field_content}</field>
+                      <field name="model">m</field>
+                   </object>
+                </django-objects>
+            """
+
+        def deserialize(crafted_xml):
+            iterator = Deserializer(crafted_xml)
+            gc.collect()
+
+            start_time = time.perf_counter()
+            result = list(iterator)
+            end_time = time.perf_counter()
+
+            self.assertEqual(len(result), 1)
+            self.assertIsInstance(result[0].object, models.Model)
+            return end_time - start_time
+
+        def assertFactor(label, params, factor=2):
+            factors = []
+            prev_time = None
+            for depth, length in params:
+                crafted_xml = build_crafted_xml(depth, length)
+                elapsed = deserialize(crafted_xml)
+                if prev_time is not None:
+                    factors.append(elapsed / prev_time)
+                prev_time = elapsed
+
+            with self.subTest(label):
+                # Assert based on the average factor to reduce test flakiness.
+                self.assertLessEqual(sum(factors) / len(factors), factor)
+
+        assertFactor(
+            "varying depth, varying length",
+            [(50, 2000), (100, 4000), (200, 8000), (400, 16000), (800, 32000)],
+            2,
+        )
+        assertFactor("constant depth, varying length", [(100, 1), (100, 1000)], 2)
+
 
 class XmlSerializerTransactionTestCase(
     SerializersTransactionTestBase, TransactionTestCase
openSUSE Build Service is sponsored by