File mbcache.diff of Package grep

--- src/dfa.c
+++ src/dfa.c
@@ -2755,7 +2755,8 @@
    match needs to be verified by a backtracking matcher.  Otherwise
    we store a 0 in *backref. */
 size_t
-dfaexec (struct dfa *d, char const *begin, size_t size, int *backref)
+dfaexec (struct dfa *d, char const *begin, size_t size, int *backref,
+	 struct mb_cache *mb_cache)
 {
   register int s;	/* Current state. */
   register unsigned char const *p; /* Current input character. */
@@ -2787,43 +2788,77 @@
 #ifdef MBS_SUPPORT
   if (MB_CUR_MAX > 1)
     {
-      int remain_bytes, i;
       buf_begin = begin;
       buf_end = end;
-
-      /* initialize mblen_buf, and inputwcs.  */
-      MALLOC(mblen_buf, unsigned char, end - (unsigned char const *)begin + 2);
-      MALLOC(inputwcs, wchar_t, end - (unsigned char const *)begin + 2);
-      memset(&mbs, 0, sizeof(mbstate_t));
-      remain_bytes = 0;
-      for (i = 0; i < end - (unsigned char const *)begin + 1; i++)
+      if (mb_cache && mb_cache->mblen_buf && mb_cache->wcs_buf
+	  && begin > mb_cache->orig_buf
+	  && begin + size <= mb_cache->orig_buf + mb_cache->len)
+	{
+	  /* The cache can help us. */
+	  MALLOC (mblen_buf, unsigned char, size + 2);
+	  MALLOC (inputwcs, wchar_t, size + 2);
+	  memcpy (mblen_buf,
+		  mb_cache->mblen_buf + (begin - mb_cache->orig_buf),
+		  (size + 2) * sizeof (unsigned char));
+	  memcpy (inputwcs,
+		  mb_cache->wcs_buf + (begin - mb_cache->orig_buf),
+		  (size + 2) * sizeof (wchar_t));
+	  mblen_buf[size + 1] = 0;
+	  inputwcs[size + 1] = 0;
+	}
+      else
 	{
-	  if (remain_bytes == 0)
+	  int remain_bytes, i;
+
+	  /* initialize mblen_buf, and inputwcs.  */
+	  MALLOC(mblen_buf, unsigned char, end - (unsigned char const *)begin + 2);
+	  MALLOC(inputwcs, wchar_t, end - (unsigned char const *)begin + 2);
+	  memset(&mbs, 0, sizeof(mbstate_t));
+	  remain_bytes = 0;
+	  for (i = 0; i < end - (unsigned char const *)begin + 1; i++)
 	    {
-	      remain_bytes
-		= mbrtowc(inputwcs + i, begin + i,
-			  end - (unsigned char const *)begin - i + 1, &mbs);
-	      if (remain_bytes <= 1)
+	      if (remain_bytes == 0)
 		{
-		  remain_bytes = 0;
-		  inputwcs[i] = (wchar_t)begin[i];
-		  mblen_buf[i] = 0;
+		  remain_bytes
+		    = mbrtowc(inputwcs + i, begin + i,
+			      end - (unsigned char const *)begin - i + 1, &mbs);
+		  if (remain_bytes <= 1)
+		    {
+		      remain_bytes = 0;
+		      inputwcs[i] = (wchar_t)begin[i];
+		      mblen_buf[i] = 0;
+		    }
+		  else
+		    {
+		      mblen_buf[i] = remain_bytes;
+		      remain_bytes--;
+		    }
 		}
 	      else
 		{
 		  mblen_buf[i] = remain_bytes;
+		  inputwcs[i] = 0;
 		  remain_bytes--;
 		}
 	    }
-	  else
+	  mblen_buf[i] = 0;
+	  inputwcs[i] = 0; /* sentinel */
+
+	  if (mb_cache)
 	    {
-	      mblen_buf[i] = remain_bytes;
-	      inputwcs[i] = 0;
-	      remain_bytes--;
+	      /* Populate the cache. */
+	      mb_cache->len = size;
+	      mb_cache->orig_buf = begin;
+	      free (mb_cache->mblen_buf);
+	      free (mb_cache->wcs_buf);
+	      MALLOC (mb_cache->mblen_buf, unsigned char, size + 2);
+	      MALLOC (mb_cache->wcs_buf, wchar_t, size + 2);
+	      memcpy (mb_cache->mblen_buf, mblen_buf,
+		      (size + 2) * sizeof (unsigned char));
+	      memcpy (mb_cache->wcs_buf, inputwcs,
+		      (size + 2) * sizeof (wchar_t));
 	    }
 	}
-      mblen_buf[i] = 0;
-      inputwcs[i] = 0; /* sentinel */
     }
 #endif /* MBS_SUPPORT */
 
--- src/dfa.h
+++ src/dfa.h
@@ -22,6 +22,8 @@
    In addition to clobbering modularity, we eat up valuable
    name space. */
 
+#include "mbcache.h"
+
 #ifdef __STDC__
 # ifndef _PTR_T
 # define _PTR_T
@@ -403,7 +405,8 @@
    order to verify backreferencing; otherwise the flag will be cleared.
    Returns (size_t) -1 if no match is found, or the offset of the first
    character after the first & shortest matching string in the buffer. */
-extern size_t dfaexec PARAMS ((struct dfa *, char const *, size_t, int *));
+extern size_t dfaexec PARAMS ((struct dfa *, char const *, size_t, int *,
+			       struct mb_cache *));
 
 /* Free the storage held by the components of a struct dfa. */
 extern void dfafree PARAMS ((struct dfa *));
--- src/grep.c
+++ src/grep.c
@@ -189,7 +189,8 @@
 
 /* Functions we'll use to search. */
 static void (*compile) PARAMS ((char const *, size_t));
-static size_t (*execute) PARAMS ((char const *, size_t, size_t *, int));
+static size_t (*execute) PARAMS ((char const *, size_t, struct mb_cache *,
+				  size_t *, int));
 
 /* Like error, but suppress the diagnostic if requested.  */
 static void
@@ -506,7 +507,7 @@
 }
 
 static void
-prline (char const *beg, char const *lim, int sep)
+prline (char const *beg, char const *lim, int sep, struct mb_cache *mb_cache)
 {
   if (out_file)
     printf ("%s%c", filename, sep & filename_mask);
@@ -529,7 +530,8 @@
     {
       size_t match_size;
       size_t match_offset;
-      while ((match_offset = (*execute) (beg, lim - beg, &match_size, 1))
+      while ((match_offset = (*execute) (beg, lim - beg, mb_cache,
+					 &match_size, 1))
 	  != (size_t) -1)
         {
 	  char const *b = beg + match_offset;
@@ -563,7 +565,8 @@
 	  int i;
 	  for (i = 0; i < lim - beg; i++)
 	    ibeg[i] = tolower (beg[i]);
-	  while ((match_offset = (*execute) (ibeg, ilim-ibeg, &match_size, 1))
+	  while ((match_offset = (*execute) (ibeg, ilim-ibeg, mb_cache,
+					     &match_size, 1))
 		 != (size_t) -1)
 	    {
 	      char const *b = beg + match_offset;
@@ -581,7 +584,8 @@
 	  lastout = lim;
 	  return;
 	}
-      while (lim-beg && (match_offset = (*execute) (beg, lim - beg, &match_size, 1))
+      while (lim-beg && (match_offset = (*execute) (beg, lim - beg, mb_cache,
+						    &match_size, 1))
 	     != (size_t) -1)
 	{
 	  char const *b = beg + match_offset;
@@ -609,7 +613,7 @@
 /* Print pending lines of trailing context prior to LIM. Trailing context ends
    at the next matching line when OUTLEFT is 0.  */
 static void
-prpending (char const *lim)
+prpending (char const *lim, struct mb_cache *mb_cache)
 {
   if (!lastout)
     lastout = bufbeg;
@@ -619,9 +623,10 @@
       size_t match_size;
       --pending;
       if (outleft
-	  || (((*execute) (lastout, nl - lastout, &match_size, 0) == (size_t) -1)
+	  || (((*execute) (lastout, nl - lastout, mb_cache,
+			   &match_size, 0) == (size_t) -1)
 	      == !out_invert))
-	prline (lastout, nl + 1, '-');
+	prline (lastout, nl + 1, '-', mb_cache);
       else
 	pending = 0;
     }
@@ -630,7 +635,8 @@
 /* Print the lines between BEG and LIM.  Deal with context crap.
    If NLINESP is non-null, store a count of lines between BEG and LIM.  */
 static void
-prtext (char const *beg, char const *lim, int *nlinesp)
+prtext (char const *beg, char const *lim, int *nlinesp,
+	struct mb_cache *mb_cache)
 {
   static int used;		/* avoid printing "--" before any output */
   char const *bp, *p;
@@ -638,7 +644,7 @@
   int i, n;
 
   if (!out_quiet && pending > 0)
-    prpending (beg);
+    prpending (beg, mb_cache);
 
   p = beg;
 
@@ -662,7 +668,7 @@
 	{
 	  char const *nl = memchr (p, eol, beg - p);
 	  nl++;
-	  prline (p, nl, '-');
+	  prline (p, nl, '-', mb_cache);
 	  p = nl;
 	}
     }
@@ -675,7 +681,7 @@
 	  char const *nl = memchr (p, eol, lim - p);
 	  nl++;
 	  if (!out_quiet)
-	    prline (p, nl, ':');
+	    prline (p, nl, ':', mb_cache);
 	  p = nl;
 	}
       *nlinesp = n;
@@ -685,7 +691,7 @@
     }
   else
     if (!out_quiet)
-      prline (beg, lim, ':');
+      prline (beg, lim, ':', mb_cache);
 
   pending = out_quiet ? 0 : out_after;
   used = 1;
@@ -695,7 +701,7 @@
    between matching lines if OUT_INVERT is true).  Return a count of
    lines printed. */
 static int
-grepbuf (char const *beg, char const *lim)
+grepbuf (char const *beg, char const *lim, struct mb_cache *mb_cache)
 {
   int nlines, n;
   register char const *p;
@@ -704,7 +710,8 @@
 
   nlines = 0;
   p = beg;
-  while ((match_offset = (*execute) (p, lim - p, &match_size, 0)) != (size_t) -1)
+  while ((match_offset = (*execute) (p, lim - p, mb_cache,
+				     &match_size, 0)) != (size_t) -1)
     {
       char const *b = p + match_offset;
       char const *endp = b + match_size;
@@ -713,7 +720,7 @@
 	break;
       if (!out_invert)
 	{
-	  prtext (b, endp, (int *) 0);
+	  prtext (b, endp, (int *) 0, mb_cache);
 	  nlines++;
           outleft--;
 	  if (!outleft || done_on_match)
@@ -726,7 +733,7 @@
 	}
       else if (p < b)
 	{
-	  prtext (p, b, &n);
+	  prtext (p, b, &n, mb_cache);
 	  nlines += n;
           outleft -= n;
 	  if (!outleft)
@@ -736,7 +743,7 @@
     }
   if (out_invert && p < lim)
     {
-      prtext (p, lim, &n);
+      prtext (p, lim, &n, mb_cache);
       nlines += n;
       outleft -= n;
     }
@@ -756,7 +763,16 @@
   char *beg;
   char *lim;
   char eol = eolbyte;
+  struct mb_cache *mb_cache;
+#ifdef MBS_SUPPORT
+  struct mb_cache mb_cache_inst;
 
+  mb_cache = &mb_cache_inst;
+  mb_cache->mblen_buf = 0;
+  mb_cache->wcs_buf = 0;
+#else
+  mb_cache = 0;
+#endif
   if (!reset (fd, file, stats))
     return 0;
 
@@ -823,9 +839,9 @@
       if (beg < lim)
 	{
 	  if (outleft)
-	    nlines += grepbuf (beg, lim);
+	    nlines += grepbuf (beg, lim, mb_cache);
 	  if (pending)
-	    prpending (lim);
+	    prpending (lim, mb_cache);
 	  if((!outleft && !pending) || (nlines && done_on_match && !out_invert))
 	    goto finish_grep;
 	}
@@ -853,6 +869,12 @@
 	totalcc = add_count (totalcc, buflim - bufbeg - save);
       if (out_line)
 	nlscan (beg);
+#ifdef MBS_SUPPORT
+      free (mb_cache->wcs_buf);
+      free (mb_cache->mblen_buf);
+      mb_cache->wcs_buf = 0;
+      mb_cache->mblen_buf = 0;
+#endif
       if (! fillbuf (save, stats))
 	{
 	  if (! is_EISDIR (errno, file))
@@ -864,9 +886,9 @@
     {
       *buflim++ = eol;
       if (outleft)
-	nlines += grepbuf (bufbeg + save - residue, buflim);
+	nlines += grepbuf (bufbeg + save - residue, buflim, mb_cache);
       if (pending)
-        prpending (buflim);
+        prpending (buflim, mb_cache);
     }
 
  finish_grep:
@@ -874,6 +896,11 @@
   out_quiet -= not_text;
   if ((not_text & ~out_quiet) && nlines != 0)
     printf (_("Binary file %s matches\n"), filename);
+
+#ifdef MBS_SUPPORT
+  free (mb_cache->wcs_buf);
+  free (mb_cache->mblen_buf);
+#endif
   return nlines;
 }
 
--- src/grep.h
+++ src/grep.h
@@ -20,6 +20,8 @@
 # define __attribute__(x)
 #endif
 
+#include "mbcache.h"
+
 /* Grep.c expects the matchers vector to be terminated
    by an entry with a NULL compile, and to contain at least
    an entry named "default". */
@@ -28,7 +30,8 @@
 {
   char name[8];
   void (*compile) PARAMS ((char const *, size_t));
-  size_t (*execute) PARAMS ((char const *, size_t, size_t *, int));
+  size_t (*execute) PARAMS ((char const *, size_t, struct mb_cache *,
+			     size_t *, int));
 } const matchers[];
 
 /* Exported from fgrepmat.c, egrepmat.c, grepmat.c.  */
--- src/mbcache.h
+++ src/mbcache.h
@@ -0,0 +1,14 @@
+#ifndef MB_CACHE_DEFINED
+#define MB_CACHE_DEFINED
+#ifdef MBS_SUPPORT
+struct mb_cache
+{
+  size_t len;
+  const char *orig_buf; /* not the only reference; do not free */
+  wchar_t *wcs_buf;
+  unsigned char *mblen_buf;
+};
+#else
+struct mb_cache;
+#endif
+#endif
--- src/search.c
+++ src/search.c
@@ -71,18 +71,23 @@
 static int kwset_exact_matches;
 
 #if defined(MBS_SUPPORT)
-static char* check_multibyte_string PARAMS ((char const *buf, size_t size));
+static char* check_multibyte_string PARAMS ((char const *buf, size_t size,
+					     struct mb_cache *,
+					     char const *orig_buf));
 extern int convert_mbstr;
 #endif
 static void kwsinit PARAMS ((void));
 static void kwsmusts PARAMS ((void));
 static void Gcompile PARAMS ((char const *, size_t));
 static void Ecompile PARAMS ((char const *, size_t));
-static size_t EGexecute PARAMS ((char const *, size_t, size_t *, int ));
+static size_t EGexecute PARAMS ((char const *, size_t, struct mb_cache *,
+				 size_t *, int ));
 static void Fcompile PARAMS ((char const *, size_t));
-static size_t Fexecute PARAMS ((char const *, size_t, size_t *, int));
+static size_t Fexecute PARAMS ((char const *, size_t, struct mb_cache *,
+				size_t *, int));
 static void Pcompile PARAMS ((char const *, size_t ));
-static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int));
+static size_t Pexecute PARAMS ((char const *, size_t, struct mb_cache *,
+				size_t *, int));
 
 void
 dfaerror (char const *mesg)
@@ -148,35 +153,61 @@
    are not singlebyte character nor the first byte of a multibyte
    character.  Caller must free the array.  */
 static char*
-check_multibyte_string(char const *buf, size_t size)
+check_multibyte_string(char const *buf, size_t size, struct mb_cache *mb_cache,
+                       char const *orig_buf)
 {
   char *mb_properties = xmalloc(size);
   mbstate_t cur_state;
   wchar_t wc;
   int i;
   memset(&cur_state, 0, sizeof(mbstate_t));
-  memset(mb_properties, 0, sizeof(char)*size);
-  for (i = 0; i < size ;)
-    {
-      size_t mbclen;
-      mbclen = mbrtowc (&wc, buf + i, size - i, &cur_state);
+  if (mb_cache && mb_cache->mblen_buf
+      && orig_buf > mb_cache->orig_buf
+      && orig_buf + size <= mb_cache->orig_buf + mb_cache->len)
+    {
+      /* The cache can help us. */
+      memcpy (mb_properties,
+	      mb_cache->mblen_buf + (orig_buf - mb_cache->orig_buf),
+	      size);
 
-      if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
-	{
-	  /* An invalid sequence, or a truncated multibyte character.
-	     We treat it as a singlebyte character.  */
-	  mbclen = 1;
-	}
-      else if (match_icase)
+    }
+  else
+    {
+      memset(mb_properties, 0, sizeof(char)*size);
+      for (i = 0; i < size ;)
 	{
-	  if (iswupper ((wint_t) wc))
-	    {
-	      wc = towlower ((wint_t) wc);
-	      wcrtomb (buf + i, wc, &cur_state);
-	    }
+         size_t mbclen;
+         mbclen = mbrtowc(&wc, buf + i, size - i, &cur_state);
+
+         if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
+           {
+             /* An invalid sequence, or a truncated multibyte character.
+                We treat it as a singlebyte character.  */
+             mbclen = 1;
+           }
+         else if (match_icase)
+           {
+             if (iswupper((wint_t)wc))
+               {
+                 wc = towlower((wint_t)wc);
+                 wcrtomb(buf + i, wc, &cur_state);
+               }
+           }
+         mb_properties[i] = mbclen;
+         i += mbclen;
+	}
+
+      /* Now populate the cache. */
+      if (mb_cache)
+	{
+	  free (mb_cache->wcs_buf);
+	  mb_cache->wcs_buf = NULL;
+	  free (mb_cache->mblen_buf);
+	  mb_cache->len = size;
+	  mb_cache->orig_buf = orig_buf;
+	  mb_cache->mblen_buf = xmalloc (size);
+	  memcpy (mb_cache->mblen_buf, mb_properties, size);
 	}
-      mb_properties[i] = mbclen;
-      i += mbclen;
     }
 
   return mb_properties;
@@ -363,9 +394,11 @@
 }
 
 static size_t
-EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
+EGexecute (char const *buf, size_t size, struct mb_cache *mb_cache,
+           size_t *match_size, int exact)
 {
   register char const *buflim, *beg, *end;
+  char const *orig_buf = buf;
   char eol = eolbyte;
   int backref, start, len;
   struct kwsmatch kwsm;
@@ -380,7 +413,7 @@
           memcpy (case_buf, buf, size);
           buf = case_buf;
 	  if (kwset && convert_mbstr)
-	    mb_properties = check_multibyte_string (buf, size);
+	    mb_properties = check_multibyte_string (buf, size, mb_cache, orig_buf);
 	}
     }
 #endif /* MBS_SUPPORT */
@@ -416,13 +449,13 @@
 		--beg;
 	      if (kwsm.index < kwset_exact_matches)
 		goto success;
-	      if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
+	      if (dfaexec (&dfa, beg, end - beg, &backref, mb_cache) == (size_t) -1)
 		continue;
 	    }
 	  else
 	    {
 	      /* No good fixed strings; start with DFA. */
-	      size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
+	      size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref, mb_cache);
 	      if (offset == (size_t) -1)
 		break;
 	      /* Narrow down to the line we've found. */
@@ -547,9 +580,11 @@
 }
 
 static size_t
-Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
+Fexecute (char const *buf, size_t size, struct mb_cache *mb_cache,
+	  size_t *match_size, int exact)
 {
   register char const *beg, *try, *end;
+  char const *orig_buf = buf;
   register size_t len;
   char eol = eolbyte;
   struct kwsmatch kwsmatch;
@@ -564,7 +599,7 @@
           memcpy (case_buf, buf, size);
           buf = case_buf;
 	  if (convert_mbstr)
-	    mb_properties = check_multibyte_string (buf, size);
+	    mb_properties = check_multibyte_string (buf, size, mb_cache, orig_buf);
         }
     }
 #endif /* MBS_SUPPORT */
@@ -755,7 +790,8 @@
 }
 
 static size_t
-Pexecute (char const *buf, size_t size, size_t *match_size, int exact)
+Pexecute (char const *buf, size_t size, struct mb_cache *mb_cache,
+	  size_t *match_size, int exact)
 {
 #if !HAVE_LIBPCRE
   abort ();
openSUSE Build Service is sponsored by