File feedparser-issue91-CVE-2011-1156.patch of Package python-feedparser.import4444
Index: feedparser-4.1/tests/wellformed/sanitize/xml_declaration_unexpected_character.xml
===================================================================
--- /dev/null
+++ feedparser-4.1/tests/wellformed/sanitize/xml_declaration_unexpected_character.xml
@@ -0,0 +1,7 @@
+<!--
+Description: xml declaration unexpected character
+Expect: not bozo and feed['title'] == u'<!DOCTYPE ~'
+-->
+<feed xmlns="http://www.w3.org/2005/Atom">
+ <title type="html"><!DOCTYPE ~</title>
+</feed>
Index: feedparser-4.1/feedparser.py
===================================================================
--- feedparser-4.1.orig/feedparser.py
+++ feedparser-4.1/feedparser.py
@@ -1525,6 +1525,14 @@ class _BaseHTMLProcessor(sgmllib.SGMLPar
'''Return processed HTML as a single string'''
return ''.join([str(p) for p in self.pieces])
+ def parse_declaration(self, i):
+ try:
+ return sgmllib.SGMLParser.parse_declaration(self, i)
+ except sgmllib.SGMLParseError:
+ # escape the doctype declaration and continue parsing
+ self.handle_data('<')
+ return i+1
+
class _LooseFeedParser(_FeedParserMixin, _BaseHTMLProcessor):
def __init__(self, baseuri, baselang, encoding):
sgmllib.SGMLParser.__init__(self)