File CVE-2020-27783-prevent-noscript.patch of Package python-lxml.26325

From 89e7aad6e7ff9ecd88678ff25f885988b184b26e Mon Sep 17 00:00:00 2001
From: Stefan Behnel <stefan_ml@behnel.de>
Date: Sun, 18 Oct 2020 10:06:46 +0200
Subject: [PATCH] Prevent combinations of <noscript> and <style> to sneak
 JavaScript through the HTML cleaner.

---
 src/lxml/html/clean.py            |  3 +++
 src/lxml/html/tests/test_clean.py | 10 ++++++++++
 2 files changed, 13 insertions(+)

Index: lxml-4.0.0/src/lxml/html/clean.py
===================================================================
--- lxml-4.0.0.orig/src/lxml/html/clean.py
+++ lxml-4.0.0/src/lxml/html/clean.py
@@ -509,6 +509,9 @@ class Cleaner(object):
             return True
         if 'expression(' in style:
             return True
+        if '</noscript' in style:
+            # e.g. '<noscript><style><a title="</noscript><img src=x onerror=alert(1)>">'
+            return True
         return False
 
     def clean_html(self, html):
Index: lxml-4.0.0/src/lxml/html/tests/test_clean.py
===================================================================
--- lxml-4.0.0.orig/src/lxml/html/tests/test_clean.py
+++ lxml-4.0.0/src/lxml/html/tests/test_clean.py
@@ -69,6 +69,16 @@ class CleanerTest(unittest.TestCase):
         s = lxml.html.fromstring('<invalid tag>child</another>')
         self.assertEqual('child', clean_html(s).text_content())
 
+    def test_sneaky_noscript_in_style(self):
+        # This gets parsed as <noscript> -> <style>"...</noscript>..."</style>
+        # thus passing the </noscript> through into the output.
+        html = '<noscript><style><a title="</noscript><img src=x onerror=alert(1)>">'
+        s = lxml.html.fragment_fromstring(html)
+
+        self.assertEqual(
+            b'<noscript><style>/* deleted */</style></noscript>',
+            lxml.html.tostring(clean_html(s)))
+
 
 def test_suite():
     suite = unittest.TestSuite()
openSUSE Build Service is sponsored by