File pcre-utf8.patch of Package grep.35702

	* src/pcresearch.c (Pexecute): Do not use PCRE_NO_UTF8_CHECK,
	as this leads to undefined behavior when the input is not UTF-8.
	* tests/pcre-infloop, tests/pcre-invalid-utf8-input:
	Exit status is now 2, not 1, when grep -P is given invalid UTF-8
	data in a UTF-8 locale.

	* src/dfa.c (using_utf8): Remove "static inline".
	* src/dfa.h (using_utf8): Declare it.

Index: grep-2.16/src/dfa.c
===================================================================
--- grep-2.16.orig/src/dfa.c
+++ grep-2.16/src/dfa.c
@@ -753,7 +753,7 @@ setbit_case_fold_c (int b, charclass c)
 
 /* UTF-8 encoding allows some optimizations that we can't otherwise
    assume in a multibyte encoding.  */
-static inline int
+int
 using_utf8 (void)
 {
   static int utf8 = -1;
Index: grep-2.16/src/dfa.h
===================================================================
--- grep-2.16.orig/src/dfa.h
+++ grep-2.16/src/dfa.h
@@ -99,3 +99,5 @@ extern void dfawarn (const char *);
    takes a single argument, a NUL-terminated string describing the error.
    The user must supply a dfaerror.  */
 extern _Noreturn void dfaerror (const char *);
+
+extern int using_utf8 (void);
Index: grep-2.16/src/pcresearch.c
===================================================================
--- grep-2.16.orig/src/pcresearch.c
+++ grep-2.16/src/pcresearch.c
@@ -20,14 +20,12 @@
 
 #include <config.h>
 #include "search.h"
+#include "dfa.h"
 #if HAVE_PCRE_H
 # include <pcre.h>
 #elif HAVE_PCRE_PCRE_H
 # include <pcre/pcre.h>
 #endif
-#if HAVE_LANGINFO_CODESET
-# include <langinfo.h>
-#endif
 
 #if HAVE_LIBPCRE
 /* Compiled internal form of a Perl regular expression.  */
@@ -54,21 +52,14 @@ Pcompile (char const *pattern, size_t si
   int e;
   char const *ep;
   char *re = xnmalloc (4, size + 7);
-  int flags = PCRE_MULTILINE | (match_icase ? PCRE_CASELESS : 0);
+  int flags = (PCRE_MULTILINE
+               | (match_icase ? PCRE_CASELESS : 0)
+               | (using_utf8 () ? PCRE_UTF8 : 0));
   char const *patlim = pattern + size;
   char *n = re;
   char const *p;
   char const *pnul;
 
-# if defined HAVE_LANGINFO_CODESET
-  if (STREQ (nl_langinfo (CODESET), "UTF-8"))
-    {
-      /* Enable PCRE's UTF-8 matching.  Note also the use of
-         PCRE_NO_UTF8_CHECK when calling pcre_extra, below.   */
-      flags |= PCRE_UTF8;
-    }
-# endif
-
   /* FIXME: Remove these restrictions.  */
   if (memchr (pattern, '\n', size))
     error (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));
@@ -158,10 +149,6 @@ Pexecute (char const *buf, size_t size,
        e == PCRE_ERROR_NOMATCH && line_next < buf + size;
        start_ofs -= line_next - line_buf)
     {
-      /* Disable the check that would make an invalid byte
-         seqence *in the input* trigger a failure.   */
-      int options = PCRE_NO_UTF8_CHECK;
-
       line_buf = line_next;
       line_end = memchr (line_buf, eolbyte, (buf + size) - line_buf);
       if (line_end == NULL)
@@ -176,7 +163,7 @@ Pexecute (char const *buf, size_t size,
         error (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit"));
 
       e = pcre_exec (cre, extra, line_buf, line_end - line_buf,
-                     start_ofs < 0 ? 0 : start_ofs, options,
+                     start_ofs < 0 ? 0 : start_ofs, 0,
                      sub, sizeof sub / sizeof *sub);
     }
 
Index: grep-2.16/tests/pcre-invalid-utf8-input
===================================================================
--- grep-2.16.orig/tests/pcre-invalid-utf8-input
+++ grep-2.16/tests/pcre-invalid-utf8-input
@@ -15,8 +15,7 @@ fail=0
 
 printf 'j\202\nj\n' > in || framework_failure_
 
-LC_ALL=en_US.UTF-8 grep -P j in > out 2>&1 || fail=1
-compare in out || fail=1
-compare /dev/null err || fail=1
+LC_ALL=en_US.UTF-8 grep -P j in
+test $? -eq 2 || fail=1
 
 Exit $fail
Places

File pcre-utf8.patch of Package grep.35702

Places