File bleach-3.1.5-CVE-2021-23980.patch of Package python-bleach
From: Andreas Stieger <Andreas.Stieger@gmx.de>
Date: Thu, 28 Jan 2021 14:56:24 -0500
Based on the following upstream commits:
1334134d34397966a7f7cfebd38639e9ba2c680e (fix)
d398c89e54ced6b1039d3677689707456ba42dec (tests)
---
bleach/html5lib_shim.py | 4 ++++
bleach/sanitizer.py | 4 ++++
tests/test_clean.py | 47 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 55 insertions(+)
Index: bleach-3.1.5/bleach/html5lib_shim.py
===================================================================
--- bleach-3.1.5.orig/bleach/html5lib_shim.py
+++ bleach-3.1.5/bleach/html5lib_shim.py
@@ -25,6 +25,10 @@ from html5lib.filters.base import Filter
from html5lib.filters.sanitizer import allowed_protocols
from html5lib.filters.sanitizer import Filter as SanitizerFilter
from html5lib._inputstream import HTMLInputStream
+from html5lib.serializer import (
+ escape,
+ HTMLSerializer,
+) # noqa: E402 module level import not at top of file
from html5lib.serializer import HTMLSerializer
from html5lib._tokenizer import HTMLTokenizer
from html5lib._trie import Trie
Index: bleach-3.1.5/bleach/sanitizer.py
===================================================================
--- bleach-3.1.5.orig/bleach/sanitizer.py
+++ bleach-3.1.5/bleach/sanitizer.py
@@ -347,6 +347,10 @@ class BleachSanitizerFilter(html5lib_shi
elif token_type == 'Comment':
if not self.strip_html_comments:
+ # call lxml.sax.saxutils to escape &, <, and > in addition to " and '
+ token["data"] = html5lib_shim.escape(
+ token["data"], entities={'"': """, "'": "'"}
+ )
return token
else:
return None
Index: bleach-3.1.5/tests/test_clean.py
===================================================================
--- bleach-3.1.5.orig/tests/test_clean.py
+++ bleach-3.1.5/tests/test_clean.py
@@ -822,6 +822,291 @@ def test_namespace_rc_data_element_strip
assert clean(data, tags=[namespace_tag, rc_data_element_tag], strip=False) == expected
+@pytest.mark.parametrize(
+ "namespace_tag, end_tag, eject_tag, data, expected",
+ [
+ # eject with style
+ (
+ "math",
+ "p",
+ "style",
+ "<math></p><style><!--</style><img src/onerror=alert(1)>",
+ "<math><p></p><style><!--</style><img src/onerror=alert(1)>--></style></math>",
+ ),
+ (
+ "math",
+ "br",
+ "style",
+ "<math></br><style><!--</style><img src/onerror=alert(1)>",
+ "<math><br><style><!--</style><img src/onerror=alert(1)>--></style></math>",
+ ),
+ (
+ "svg",
+ "p",
+ "style",
+ "<svg></p><style><!--</style><img src/onerror=alert(1)>",
+ "<svg><p></p><style><!--</style><img src/onerror=alert(1)>--></style></svg>",
+ ),
+ (
+ "svg",
+ "br",
+ "style",
+ "<svg></br><style><!--</style><img src/onerror=alert(1)>",
+ "<svg><br><style><!--</style><img src/onerror=alert(1)>--></style></svg>",
+ ),
+ # eject with title
+ (
+ "math",
+ "p",
+ "title",
+ "<math></p><title><!--</title><img src/onerror=alert(1)>",
+ "<math><p></p><title><!--</title><img src/onerror=alert(1)>--></title></math>",
+ ),
+ (
+ "math",
+ "br",
+ "title",
+ "<math></br><title><!--</title><img src/onerror=alert(1)>",
+ "<math><br><title><!--</title><img src/onerror=alert(1)>--></title></math>",
+ ),
+ (
+ "svg",
+ "p",
+ "title",
+ "<svg></p><title><!--</title><img src/onerror=alert(1)>",
+ "<svg><p></p><title><!--</title><img src/onerror=alert(1)>--></title></svg>",
+ ),
+ (
+ "svg",
+ "br",
+ "title",
+ "<svg></br><title><!--</title><img src/onerror=alert(1)>",
+ "<svg><br><title><!--</title><img src/onerror=alert(1)>--></title></svg>",
+ ),
+ # eject with noscript
+ (
+ "math",
+ "p",
+ "noscript",
+ "<math></p><noscript><!--</noscript><img src/onerror=alert(1)>",
+ "<math><p></p><noscript><!--</noscript><img src/onerror=alert(1)>--></noscript></math>",
+ ),
+ (
+ "math",
+ "br",
+ "noscript",
+ "<math></br><noscript><!--</noscript><img src/onerror=alert(1)>",
+ "<math><br><noscript><!--</noscript><img src/onerror=alert(1)>--></noscript></math>",
+ ),
+ (
+ "svg",
+ "p",
+ "noscript",
+ "<svg></p><noscript><!--</noscript><img src/onerror=alert(1)>",
+ "<svg><p></p><noscript><!--</noscript><img src/onerror=alert(1)>--></noscript></svg>",
+ ),
+ (
+ "svg",
+ "br",
+ "noscript",
+ "<svg></br><noscript><!--</noscript><img src/onerror=alert(1)>",
+ "<svg><br><noscript><!--</noscript><img src/onerror=alert(1)>--></noscript></svg>",
+ ),
+ # eject with script
+ (
+ "math",
+ "p",
+ "script",
+ "<math></p><script><!--</script><img src/onerror=alert(1)>",
+ "<math><p></p><script><!--</script><img src/onerror=alert(1)>--></script></math>",
+ ),
+ (
+ "math",
+ "br",
+ "script",
+ "<math></br><script><!--</script><img src/onerror=alert(1)>",
+ "<math><br><script><!--</script><img src/onerror=alert(1)>--></script></math>",
+ ),
+ (
+ "svg",
+ "p",
+ "script",
+ "<svg></p><script><!--</script><img src/onerror=alert(1)>",
+ "<svg><p></p><script><!--</script><img src/onerror=alert(1)>--></script></svg>",
+ ),
+ (
+ "svg",
+ "br",
+ "script",
+ "<svg></br><script><!--</script><img src/onerror=alert(1)>",
+ "<svg><br><script><!--</script><img src/onerror=alert(1)>--></script></svg>",
+ ),
+ # eject with noembed
+ (
+ "math",
+ "p",
+ "noembed",
+ "<math></p><noembed><!--</noembed><img src/onerror=alert(1)>",
+ "<math><p></p><noembed><!--</noembed><img src/onerror=alert(1)>--></noembed></math>",
+ ),
+ (
+ "math",
+ "br",
+ "noembed",
+ "<math></br><noembed><!--</noembed><img src/onerror=alert(1)>",
+ "<math><br><noembed><!--</noembed><img src/onerror=alert(1)>--></noembed></math>",
+ ),
+ (
+ "svg",
+ "p",
+ "noembed",
+ "<svg></p><noembed><!--</noembed><img src/onerror=alert(1)>",
+ "<svg><p></p><noembed><!--</noembed><img src/onerror=alert(1)>--></noembed></svg>",
+ ),
+ (
+ "svg",
+ "br",
+ "noembed",
+ "<svg></br><noembed><!--</noembed><img src/onerror=alert(1)>",
+ "<svg><br><noembed><!--</noembed><img src/onerror=alert(1)>--></noembed></svg>",
+ ),
+ # eject with textarea
+ (
+ "math",
+ "p",
+ "textarea",
+ "<math></p><textarea><!--</textarea><img src/onerror=alert(1)>",
+ "<math><p></p><textarea><!--</textarea><img src/onerror=alert(1)>--></textarea></math>",
+ ),
+ (
+ "math",
+ "br",
+ "textarea",
+ "<math></br><textarea><!--</textarea><img src/onerror=alert(1)>",
+ "<math><br><textarea><!--</textarea><img src/onerror=alert(1)>--></textarea></math>",
+ ),
+ (
+ "svg",
+ "p",
+ "textarea",
+ "<svg></p><textarea><!--</textarea><img src/onerror=alert(1)>",
+ "<svg><p></p><textarea><!--</textarea><img src/onerror=alert(1)>--></textarea></svg>",
+ ),
+ (
+ "svg",
+ "br",
+ "textarea",
+ "<svg></br><textarea><!--</textarea><img src/onerror=alert(1)>",
+ "<svg><br><textarea><!--</textarea><img src/onerror=alert(1)>--></textarea></svg>",
+ ),
+ # eject with noframes
+ (
+ "math",
+ "p",
+ "noframes",
+ "<math></p><noframes><!--</noframes><img src/onerror=alert(1)>",
+ "<math><p></p><noframes><!--</noframes><img src/onerror=alert(1)>--></noframes></math>",
+ ),
+ (
+ "math",
+ "br",
+ "noframes",
+ "<math></br><noframes><!--</noframes><img src/onerror=alert(1)>",
+ "<math><br><noframes><!--</noframes><img src/onerror=alert(1)>--></noframes></math>",
+ ),
+ (
+ "svg",
+ "p",
+ "noframes",
+ "<svg></p><noframes><!--</noframes><img src/onerror=alert(1)>",
+ "<svg><p></p><noframes><!--</noframes><img src/onerror=alert(1)>--></noframes></svg>",
+ ),
+ (
+ "svg",
+ "br",
+ "noframes",
+ "<svg></br><noframes><!--</noframes><img src/onerror=alert(1)>",
+ "<svg><br><noframes><!--</noframes><img src/onerror=alert(1)>--></noframes></svg>",
+ ),
+ # eject with iframe
+ (
+ "math",
+ "p",
+ "iframe",
+ "<math></p><iframe><!--</iframe><img src/onerror=alert(1)>",
+ "<math><p></p><iframe><!--</iframe><img src/onerror=alert(1)>--></iframe></math>",
+ ),
+ (
+ "math",
+ "br",
+ "iframe",
+ "<math></br><iframe><!--</iframe><img src/onerror=alert(1)>",
+ "<math><br><iframe><!--</iframe><img src/onerror=alert(1)>--></iframe></math>",
+ ),
+ (
+ "svg",
+ "p",
+ "iframe",
+ "<svg></p><iframe><!--</iframe><img src/onerror=alert(1)>",
+ "<svg><p></p><iframe><!--</iframe><img src/onerror=alert(1)>--></iframe></svg>",
+ ),
+ (
+ "svg",
+ "br",
+ "iframe",
+ "<svg></br><iframe><!--</iframe><img src/onerror=alert(1)>",
+ "<svg><br><iframe><!--</iframe><img src/onerror=alert(1)>--></iframe></svg>",
+ ),
+ # eject with xmp
+ (
+ "math",
+ "p",
+ "xmp",
+ "<math></p><xmp><!--</xmp><img src/onerror=alert(1)>",
+ "<math><p></p><xmp><!--</xmp><img src/onerror=alert(1)>--></xmp></math>",
+ ),
+ (
+ "math",
+ "br",
+ "xmp",
+ "<math></br><xmp><!--</xmp><img src/onerror=alert(1)>",
+ "<math><br><xmp><!--</xmp><img src/onerror=alert(1)>--></xmp></math>",
+ ),
+ (
+ "svg",
+ "p",
+ "xmp",
+ "<svg></p><xmp><!--</xmp><img src/onerror=alert(1)>",
+ "<svg><p></p><xmp><!--</xmp><img src/onerror=alert(1)>--></xmp></svg>",
+ ),
+ (
+ "svg",
+ "br",
+ "xmp",
+ "<svg></br><xmp><!--</xmp><img src/onerror=alert(1)>",
+ "<svg><br><xmp><!--</xmp><img src/onerror=alert(1)>--></xmp></svg>",
+ ),
+ ],
+)
+def test_html_comments_escaped(namespace_tag, end_tag, eject_tag, data, expected):
+ # refs: bug 1689399 / GHSA-vv2x-vrpj-qqpq
+ #
+ # p and br can be just an end tag (e.g. </p> == <p></p>)
+ #
+ # In browsers:
+ #
+ # * img and other tags break out of the svg or math namespace (e.g. <svg><img></svg> == <svg><img></svg>)
+ # * style does not (e.g. <svg><style></svg> == <svg><style></style></svg>)
+ # * style and other tags without child elements does not (e.g. <svg><style></svg> == <svg><style></style></svg>)
+ # * the breaking tag ejects trailing elements (e.g. <svg><img><style></style></svg> == <svg></svg><img><style></style>)
+ #
+ # the ejected elements can trigger XSS
+ assert (
+ clean(data, tags=[namespace_tag, end_tag, eject_tag], strip_comments=False)
+ == expected
+ )
+
+
def get_ids_and_tests():
"""Retrieves regression tests from data/ directory