Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
SUSE:SLE-12-SP2:Update
grep
pcre-invalid-utf8.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File pcre-invalid-utf8.patch of Package grep
From 16fc7fa0e0f273fa81e382b4303617d707364902 Mon Sep 17 00:00:00 2001 From: Paul Eggert <eggert@cs.ucla.edu> Date: Tue, 9 Sep 2014 12:41:54 -0700 Subject: [PATCH] grep: -P now treats invalid UTF-8 input as non-matching Problem reported by Santiago Vila in: http://bugs.gnu.org/18266 * NEWS: Mention this. * src/pcresearch.c (Pexecute): Treat UTF-8 encoding errors as non-matching data, instead of exiting 'grep'. * tests/pcre-infloop: grep now exits with status 1, not 2. * tests/pcre-invalid-utf8-input: grep now exits with status 0, not 2. --- NEWS | 3 ++ src/pcresearch.c | 70 ++++++++++++++--------------------- tests/pcre-infloop | 2 +- tests/pcre-invalid-utf8-input | 2 +- 4 files changed, 33 insertions(+), 44 deletions(-) Index: grep-2.16/src/pcresearch.c =================================================================== --- grep-2.16.orig/src/pcresearch.c +++ grep-2.16/src/pcresearch.c @@ -137,34 +137,41 @@ Pexecute (char const *buf, size_t size, #else /* This array must have at least two elements; everything after that is just for performance improvement in pcre_exec. */ - int sub[300]; + enum { nsub = 300 }; + int sub[nsub]; - const char *line_buf, *line_end, *line_next; + char const *p = start_ptr ? start_ptr : buf; + int options = p == buf || p[-1] == eolbyte ? 0 : PCRE_NOTBOL; + char const *line_start = buf; int e = PCRE_ERROR_NOMATCH; - ptrdiff_t start_ofs = start_ptr ? start_ptr - buf : 0; + char const *line_end; /* PCRE can't limit the matching to single lines, therefore we have to match each line in the buffer separately. */ - for (line_next = buf; - e == PCRE_ERROR_NOMATCH && line_next < buf + size; - start_ofs -= line_next - line_buf) + for (; p < buf + size; p = line_start = line_end + 1) { - line_buf = line_next; - line_end = memchr (line_buf, eolbyte, (buf + size) - line_buf); - if (line_end == NULL) - line_next = line_end = buf + size; - else - line_next = line_end + 1; + line_end = memchr (p, eolbyte, buf + size - p); - if (start_ptr && start_ptr >= line_end) - continue; - - if (INT_MAX < line_end - line_buf) + if (INT_MAX < line_end - p) error (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit")); - e = pcre_exec (cre, extra, line_buf, line_end - line_buf, - start_ofs < 0 ? 0 : start_ofs, 0, - sub, sizeof sub / sizeof *sub); + /* Treat encoding-error bytes as data that cannot match. */ + for (;;) + { + e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub); + if (e != PCRE_ERROR_BADUTF8) + break; + e = pcre_exec (cre, extra, p, sub[0], 0, + options | PCRE_NO_UTF8_CHECK, sub, nsub); + if (e != PCRE_ERROR_NOMATCH) + break; + p += sub[0] + 1; + options = PCRE_NOTBOL; + } + + if (e != PCRE_ERROR_NOMATCH) + break; + options = 0; } if (e <= 0) @@ -181,10 +188,6 @@ Pexecute (char const *buf, size_t size, error (EXIT_TROUBLE, 0, _("exceeded PCRE's backtracking limit")); - case PCRE_ERROR_BADUTF8: - error (EXIT_TROUBLE, 0, - _("invalid UTF-8 byte sequence in input")); - default: /* For now, we lump all remaining PCRE failures into this basket. If anyone cares to provide sample grep usage that can trigger @@ -198,25 +201,8 @@ Pexecute (char const *buf, size_t size, } else { - /* Narrow down to the line we've found. */ - char const *beg = line_buf + sub[0]; - char const *end = line_buf + sub[1]; - char const *buflim = buf + size; - char eol = eolbyte; - if (!start_ptr) - { - /* FIXME: The case when '\n' is not found indicates a bug: - Since grep is line oriented, the match should never contain - a newline, so there _must_ be a newline following. - */ - if (!(end = memchr (end, eol, buflim - end))) - end = buflim; - else - end++; - while (buf < beg && beg[-1] != eol) - --beg; - } - + char const *beg = start_ptr ? p + sub[0] : line_start; + char const *end = start_ptr ? p + sub[1] : line_end + 1; *match_size = end - beg; return beg - buf; } Index: grep-2.16/tests/pcre-invalid-utf8-input =================================================================== --- grep-2.16.orig/tests/pcre-invalid-utf8-input +++ grep-2.16/tests/pcre-invalid-utf8-input @@ -16,6 +16,6 @@ fail=0 printf 'j\202\nj\n' > in || framework_failure_ LC_ALL=en_US.UTF-8 grep -P j in -test $? -eq 2 || fail=1 +test $? -eq 0 || fail=1 Exit $fail
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor