File perl-HTML-Parser-3.56-entity.diff of Package perl-HTML-Parser

commit b9aae1e43eb2c8e989510187cff0ba3e996f9a4c
Author: Gisle Aas <gisle@aas.no>
Date:   Thu Oct 22 21:45:54 2009 +0200

    decode_entities confused by trailing incomplete entity
    
    Mark Martinec reported crashed when running SpamAssassin, given a
    particular HTML junk mail to parse.  The problem was caused by
    HTML::Parsers decode_entities function confusing itself when it
    encountered strings with incomplete entities at the end of the string.

diff --git a/t/entities.t b/t/entities.t
index 7f6a29a..e96501c 100644
--- a/t/entities.t
+++ b/t/entities.t
@@ -1,6 +1,6 @@
 use HTML::Entities qw(decode_entities encode_entities encode_entities_numeric);
 
-use Test::More tests => 12;
+use Test::More tests => 13;
 
 $a = "V&aring;re norske tegn b&oslash;r &#230res";
 
@@ -71,6 +71,8 @@ is(decode_entities("abc&def&ghi&abc;&def;"), "abc&def&ghi&abc;&def;");
 is(decode_entities("&apos;"), "'");
 is(encode_entities("'", "'"), "&#39;");
 
+is(decode_entities("Attention Home&#959&#969n&#1257rs...1&#1109t T&#1110&#1084e E&#957&#1257&#1075"),
+  "Attention Home\x{3BF}\x{3C9}n\x{4E9}rs...1\x{455}t T\x{456}\x{43C}e E\x{3BD}\x{4E9}\x{433}");
 
 __END__
 # Quoted from rfc1866.txt
diff --git a/util.c b/util.c
index 28fec78..6f56a2b 100644
--- a/util.c
+++ b/util.c
@@ -94,14 +94,14 @@ decode_entities(pTHX_ SV* sv, HV* entity2char, bool expand_prefix)
 	ent_start = s;
 	repl = 0;
 
-	if (*s == '#') {
+	if (s < end && *s == '#') {
 	    UV num = 0;
 	    UV prev = 0;
 	    int ok = 0;
 	    s++;
-	    if (*s == 'x' || *s == 'X') {
+	    if (s < end && (*s == 'x' || *s == 'X')) {
 		s++;
-		while (*s) {
+		while (s < end) {
 		    char *tmp = strchr(PL_hexdigit, *s);
 		    if (!tmp)
 			break;
@@ -117,7 +117,7 @@ decode_entities(pTHX_ SV* sv, HV* entity2char, bool expand_prefix)
 		}
 	    }
 	    else {
-		while (isDIGIT(*s)) {
+		while (s < end && isDIGIT(*s)) {
 		    num = num * 10 + (*s - '0');
 		    if (prev && num < prev) {
 			/* overflow */
@@ -180,7 +180,7 @@ decode_entities(pTHX_ SV* sv, HV* entity2char, bool expand_prefix)
 	}
 	else {
 	    char *ent_name = s;
-	    while (isALNUM(*s))
+	    while (s < end && isALNUM(*s))
 		s++;
 	    if (ent_name != s && entity2char) {
 		SV** svp;
@@ -216,7 +216,7 @@ decode_entities(pTHX_ SV* sv, HV* entity2char, bool expand_prefix)
 
 	if (repl) {
 	    char *repl_allocated = 0;
-	    if (*s == ';')
+	    if (s < end && *s == ';')
 		s++;
 	    t--;  /* '&' already copied, undo it */
 
openSUSE Build Service is sponsored by