File coerce_comments_to_work_with_lxml.patch of Package python-html5lib

From 0c551c9519e47f76f8f185089ed71cb9539b6e00 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Mon, 23 Nov 2015 15:17:07 +0000
Subject: [PATCH] Make lxml tree-builder coerce comments to work with lxml 3.5.

---
 html5lib/ihatexml.py                | 3 +++
 html5lib/treebuilders/etree_lxml.py | 9 +++++----
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py
index 0fc7930..5da5d93 100644
--- a/html5lib/ihatexml.py
+++ b/html5lib/ihatexml.py
@@ -225,6 +225,9 @@ def coerceComment(self, data):
             while "--" in data:
                 warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
                 data = data.replace("--", "- -")
+            if data.endswith("-"):
+                warnings.warn("Comments cannot end in a dash", DataLossWarning)
+                data += " "
         return data
 
     def coerceCharacters(self, data):
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index 35d08ef..c6c981f 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -54,7 +54,7 @@ def _getChildNodes(self):
 def testSerializer(element):
     rv = []
     finalText = None
-    infosetFilter = ihatexml.InfosetFilter()
+    infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
 
     def serializeElement(element, indent=0):
         if not hasattr(element, "tag"):
@@ -189,7 +189,7 @@ class TreeBuilder(_base.TreeBuilder):
 
     def __init__(self, namespaceHTMLElements, fullTree=False):
         builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
-        infosetFilter = self.infosetFilter = ihatexml.InfosetFilter()
+        infosetFilter = self.infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
         self.namespaceHTMLElements = namespaceHTMLElements
 
         class Attributes(dict):
@@ -257,7 +257,7 @@ def _getData(self):
             data = property(_getData, _setData)
 
         self.elementClass = Element
-        self.commentClass = builder.Comment
+        self.commentClass = Comment
         # self.fragmentClass = builder.DocumentFragment
         _base.TreeBuilder.__init__(self, namespaceHTMLElements)
 
@@ -344,7 +344,8 @@ def insertRoot(self, token):
 
         # Append the initial comments:
         for comment_token in self.initial_comments:
-            root.addprevious(etree.Comment(comment_token["data"]))
+            comment = self.commentClass(comment_token["data"])
+            root.addprevious(comment._element)
 
         # Create the root document and add the ElementTree to it
         self.document = self.documentClass()
openSUSE Build Service is sponsored by