File libxml2-CVE-2017-9050.patch of Package libxml2.4858
From beca86e8c86984b967a6efa05a9653470253edda Mon Sep 17 00:00:00 2001
From: Hugh Davenport <hugh@davenport.net.nz>
Date: Wed, 4 May 2016 11:23:49 +0800
Subject: Detect change of encoding when parsing HTML names
From https://bugzilla.gnome.org/show_bug.cgi?id=758518
Happens when a file has a name getting parsed, but no valid encoding
set, so libxml has to guess what the encoding is. This patch detects
when the buffer location changes, and if it does, restarts the parsing
of the name.
This slightly change a couple of regression tests output
---
HTMLparser.c | 8 ++++++++
result/HTML/758605.html | 2 +-
result/HTML/758605.html.err | 2 +-
result/HTML/758605.html.sax | 3 ++-
4 files changed, 12 insertions(+), 3 deletions(-)
Index: libxml2-2.9.1/HTMLparser.c
===================================================================
--- libxml2-2.9.1.orig/HTMLparser.c
+++ libxml2-2.9.1/HTMLparser.c
@@ -2493,6 +2493,7 @@ htmlParseNameComplex(xmlParserCtxtPtr ct
int len = 0, l;
int c;
int count = 0;
+ const xmlChar *base = ctxt->input->base;
/*
* Handler for more complex cases
@@ -2518,6 +2519,13 @@ htmlParseNameComplex(xmlParserCtxtPtr ct
len += l;
NEXTL(l);
c = CUR_CHAR(l);
+ if (ctxt->input->base != base) {
+ /*
+ * We changed encoding from an unknown encoding
+ * Input buffer changed location, so we better start again
+ */
+ return(htmlParseNameComplex(ctxt));
+ }
}
if (ctxt->input->base > ctxt->input->cur - len)
Index: libxml2-2.9.1/result/HTML/758605.html
===================================================================
--- libxml2-2.9.1.orig/result/HTML/758605.html
+++ libxml2-2.9.1/result/HTML/758605.html
@@ -1,3 +1,3 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
-<html><body><p>&
+<html><body><p>&ê
</p></body></html>
Index: libxml2-2.9.1/result/HTML/758605.html.err
===================================================================
--- libxml2-2.9.1.orig/result/HTML/758605.html.err
+++ libxml2-2.9.1/result/HTML/758605.html.err
@@ -1,3 +1,3 @@
-./test/HTML/758605.html:1: HTML parser error : htmlParseEntityRef: no name
+./test/HTML/758605.html:1: HTML parser error : htmlParseEntityRef: expecting ';'
ê
^
Index: libxml2-2.9.1/result/HTML/758605.html.sax
===================================================================
--- libxml2-2.9.1.orig/result/HTML/758605.html.sax
+++ libxml2-2.9.1/result/HTML/758605.html.sax
@@ -1,10 +1,11 @@
SAX.setDocumentLocator()
SAX.startDocument()
-SAX.error: htmlParseEntityRef: no name
+SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(html)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters(&, 1)
+SAX.characters(ê, 2)
SAX.ignorableWhitespace(
, 1)
SAX.endElement(p)