File site-url.patch of Package weblate
From 9a1963be8bb95fc3bf25d3fb33d3cd1222f67336 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= <michal@cihar.com>
Date: Tue, 11 Nov 2025 19:45:27 +0100
Subject: [PATCH] fix: make add_site_url filter more robust
- Do not assume libxml fixes HTML in certain way, create the document
structure manually.
- Make it gracefully handle text outside of the tags.
Fixes #16890
---
weblate/accounts/templatetags/site_url.py | 29 +++++++++++++--------
weblate/accounts/tests/test_templatetags.py | 11 ++++++--
2 files changed, 27 insertions(+), 13 deletions(-)
diff --git a/weblate/accounts/templatetags/site_url.py b/weblate/accounts/templatetags/site_url.py
index 29e2c9484eb6..e69aebe9d770 100644
--- a/weblate/accounts/templatetags/site_url.py
+++ b/weblate/accounts/templatetags/site_url.py
@@ -5,13 +5,18 @@
"""Convert any links in HTML to absolute."""
from io import StringIO
+from typing import TYPE_CHECKING
from django import template
+from django.utils.html import escape
from django.utils.safestring import mark_safe
from lxml import etree
from weblate.utils.site import get_site_url
+if TYPE_CHECKING:
+ from django.utils.safestring import SafeString
+
register = template.Library()
@@ -19,24 +24,26 @@
def add_site_url(content):
"""Automatically add site URL to any relative links or images."""
parser = etree.HTMLParser(collect_ids=False)
- tree = etree.parse(StringIO(content), parser)
+ tree = etree.parse(StringIO(f"<html><body>{content}</body></html>"), parser)
for link in tree.findall(".//a"):
url = link.get("href")
- if url.startswith("/"):
+ if url and url.startswith("/"):
link.set("href", get_site_url(url))
for link in tree.findall(".//img"):
url = link.get("src")
- if url.startswith("/"):
+ if url and url.startswith("/"):
link.set("src", get_site_url(url))
body = tree.find("body")
+ if body is None:
+ msg = "Failed parsing HTML!"
+ raise ValueError(msg)
+ parts: list[str | SafeString] = [
+ etree.tostring(child, pretty_print=True, method="html", encoding="unicode")
+ for child in body.iterchildren()
+ ]
+ if body.text:
+ parts.insert(0, escape(body.text))
return mark_safe( # noqa: S308
- "".join(
- [
- etree.tostring(
- child, pretty_print=True, method="html", encoding="unicode"
- )
- for child in body.iterchildren()
- ]
- )
+ "".join(parts)
)
diff --git a/weblate/accounts/tests/test_templatetags.py b/weblate/accounts/tests/test_templatetags.py
index 41ded8ff2765..ce3d6449d8e7 100644
--- a/weblate/accounts/tests/test_templatetags.py
+++ b/weblate/accounts/tests/test_templatetags.py
@@ -9,15 +9,21 @@
class TemplateTagsTestCase(SimpleTestCase):
- def test_simple(self) -> None:
+ def test_add_site_url_filter(self) -> None:
template = Template("""
{% load site_url %}
<html><body>
{% filter add_site_url %}
+ <p>
text:
<a href="/foo"><span>Foo</span></a>
-
+ </p>
{% endfilter %}
+ <p>
+ {% filter add_site_url %}
+ other&
+ {% endfilter %}
+ </p>
</body>
</html>
""")
@@ -33,6 +39,7 @@ def test_simple(self) -> None:
</span>
</a>
</p>
+ <p>other&</p>
</body>
</html>
""",