File site-url.patch of Package weblate

From 9a1963be8bb95fc3bf25d3fb33d3cd1222f67336 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= <michal@cihar.com>
Date: Tue, 11 Nov 2025 19:45:27 +0100
Subject: [PATCH] fix: make add_site_url filter more robust

- Do not assume libxml fixes HTML in certain way, create the document
  structure manually.
- Make it gracefully handle text outside of the tags.

Fixes #16890
---
 weblate/accounts/templatetags/site_url.py   | 29 +++++++++++++--------
 weblate/accounts/tests/test_templatetags.py | 11 ++++++--
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/weblate/accounts/templatetags/site_url.py b/weblate/accounts/templatetags/site_url.py
index 29e2c9484eb6..e69aebe9d770 100644
--- a/weblate/accounts/templatetags/site_url.py
+++ b/weblate/accounts/templatetags/site_url.py
@@ -5,13 +5,18 @@
 """Convert any links in HTML to absolute."""
 
 from io import StringIO
+from typing import TYPE_CHECKING
 
 from django import template
+from django.utils.html import escape
 from django.utils.safestring import mark_safe
 from lxml import etree
 
 from weblate.utils.site import get_site_url
 
+if TYPE_CHECKING:
+    from django.utils.safestring import SafeString
+
 register = template.Library()
 
 
@@ -19,24 +24,26 @@
 def add_site_url(content):
     """Automatically add site URL to any relative links or images."""
     parser = etree.HTMLParser(collect_ids=False)
-    tree = etree.parse(StringIO(content), parser)
+    tree = etree.parse(StringIO(f"<html><body>{content}</body></html>"), parser)
     for link in tree.findall(".//a"):
         url = link.get("href")
-        if url.startswith("/"):
+        if url and url.startswith("/"):
             link.set("href", get_site_url(url))
     for link in tree.findall(".//img"):
         url = link.get("src")
-        if url.startswith("/"):
+        if url and url.startswith("/"):
             link.set("src", get_site_url(url))
 
     body = tree.find("body")
+    if body is None:
+        msg = "Failed parsing HTML!"
+        raise ValueError(msg)
+    parts: list[str | SafeString] = [
+        etree.tostring(child, pretty_print=True, method="html", encoding="unicode")
+        for child in body.iterchildren()
+    ]
+    if body.text:
+        parts.insert(0, escape(body.text))
     return mark_safe(  # noqa: S308
-        "".join(
-            [
-                etree.tostring(
-                    child, pretty_print=True, method="html", encoding="unicode"
-                )
-                for child in body.iterchildren()
-            ]
-        )
+        "".join(parts)
     )
diff --git a/weblate/accounts/tests/test_templatetags.py b/weblate/accounts/tests/test_templatetags.py
index 41ded8ff2765..ce3d6449d8e7 100644
--- a/weblate/accounts/tests/test_templatetags.py
+++ b/weblate/accounts/tests/test_templatetags.py
@@ -9,15 +9,21 @@
 
 
 class TemplateTagsTestCase(SimpleTestCase):
-    def test_simple(self) -> None:
+    def test_add_site_url_filter(self) -> None:
         template = Template("""
                 {% load site_url %}
                 <html><body>
                 {% filter add_site_url %}
+                <p>
                     text:
                     <a href="/foo"><span>Foo</span></a>
-
+                </p>
                 {% endfilter %}
+                <p>
+                {% filter add_site_url %}
+                    other&amp;
+                {% endfilter %}
+                </p>
                 </body>
                 </html>
             """)
@@ -33,6 +39,7 @@ def test_simple(self) -> None:
                         </span>
                     </a>
                 </p>
+                <p>other&amp;</p>
             </body>
             </html>
             """,
openSUSE Build Service is sponsored by