[Pcre-svn] [242] code/trunk: Fix pcre2grep loop provoked by …

トップ ページ
このメッセージを削除
著者: Subversion repository
日付:  
To: pcre-svn
題目: [Pcre-svn] [242] code/trunk: Fix pcre2grep loop provoked by \K in a lookbehind assertion in a non-anchored
Revision: 242
          http://www.exim.org/viewvc/pcre2?view=rev&revision=242
Author:   ph10
Date:     2015-04-03 12:14:19 +0100 (Fri, 03 Apr 2015)


Log Message:
-----------
Fix pcre2grep loop provoked by \K in a lookbehind assertion in a non-anchored
pattern.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/RunGrepTest
    code/trunk/src/pcre2grep.c
    code/trunk/testdata/grepinput8
    code/trunk/testdata/grepoutput
    code/trunk/testdata/grepoutput8


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2015-04-01 15:37:54 UTC (rev 241)
+++ code/trunk/ChangeLog    2015-04-03 11:14:19 UTC (rev 242)
@@ -27,7 +27,7 @@


8. For some types of pattern, for example /Z*(|d*){216}/, the auto-
possessification code could take exponential time to complete. A recursion
-depth limit of 10000 has been imposed to limit the resources used by this
+depth limit of 1000 has been imposed to limit the resources used by this
optimization. This infelicity was discovered by the LLVM fuzzer.

9. A pattern such as /(*UTF)[\S\V\H]/, which contains a negated special class
@@ -67,7 +67,10 @@
caused a stack overflow instead of the diagnosis of a non-fixed length
lookbehind assertion. This bug was discovered by the LLVM fuzzer.

+17. The use of \K in a positive lookbehind assertion in a non-anchored pattern
+(e.g. /(?<=\Ka)/) could make pcre2grep loop.

+
Version 10.10 06-March-2015
---------------------------


Modified: code/trunk/RunGrepTest
===================================================================
--- code/trunk/RunGrepTest    2015-04-01 15:37:54 UTC (rev 241)
+++ code/trunk/RunGrepTest    2015-04-03 11:14:19 UTC (rev 242)
@@ -524,6 +524,11 @@
 (cd $srcdir; echo "a" | $valgrind $pcre2grep -M "|a" ) >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep


+echo "---------------------------- Test 107 -----------------------------" >>testtrygrep
+echo "a" >testtemp1grep
+echo "aaaaa" >>testtemp1grep
+(cd $srcdir; $valgrind $pcre2grep --line-offsets '(?<=\Ka)' testtemp1grep) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep

# Now compare the results.

@@ -544,6 +549,10 @@
(cd $srcdir; $valgrind $pcre2grep -n -u -C 3 --newline=any "Match" ./testdata/grepinput8) >>testtrygrep
echo "RC=$?" >>testtrygrep

+ echo "---------------------------- Test U3 ------------------------------" >>testtrygrep
+ (cd $srcdir; $valgrind $pcre2grep --line-offsets -u '(?<=\K\x{17f})' ./testdata/grepinput8) >>testtrygrep
+ echo "RC=$?" >>testtrygrep
+
$cf $srcdir/testdata/grepoutput8 testtrygrep
if [ $? != 0 ] ; then exit 1; fi


Modified: code/trunk/src/pcre2grep.c
===================================================================
--- code/trunk/src/pcre2grep.c    2015-04-01 15:37:54 UTC (rev 241)
+++ code/trunk/src/pcre2grep.c    2015-04-03 11:14:19 UTC (rev 242)
@@ -1433,7 +1433,7 @@


static BOOL
match_patterns(char *matchptr, size_t length, unsigned int options,
- int startoffset, int *mrc)
+ size_t startoffset, int *mrc)
{
int i;
size_t slen = length;
@@ -1581,12 +1581,12 @@
{
int endlinelength;
int mrc = 0;
- int startoffset = 0;
unsigned int options = 0;
BOOL match;
char *matchptr = ptr;
char *t = ptr;
size_t length, linelength;
+ size_t startoffset = 0;

   /* At this point, ptr is at the start of a line. We need to find the length
   of the subject string to pass to pcre_exec(). In multiline mode, it is the
@@ -1729,6 +1729,8 @@
       {
       if (!invert)
         {
+        size_t oldstartoffset;
+
         if (printname != NULL) fprintf(stdout, "%s:", printname);
         if (number) fprintf(stdout, "%d:", linenumber);


@@ -1772,12 +1774,23 @@
           if (printed || printname != NULL || number) fprintf(stdout, "\n");
           }


-        /* Prepare to repeat to find the next match */
+        /* Prepare to repeat to find the next match. If the pattern contained a
+        lookbehind that included \K, it is possible that the end of the match
+        might be at or before the actual starting offset we have just used. In
+        this case, start one character further on. */


         match = FALSE;
         if (line_buffered) fflush(stdout);
         rc = 0;                      /* Had some success */
         startoffset = offsets[1];    /* Restart after the match */
+        oldstartoffset = pcre2_get_startchar(match_data);
+        if (startoffset <= oldstartoffset)
+          {
+          if (startoffset >= length) goto END_ONE_MATCH;  /* Were at end */
+          startoffset = oldstartoffset + 1;
+          if (utf)
+            while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++;
+          }
         goto ONLY_MATCHING_RESTART;
         }
       }
@@ -1917,7 +1930,7 @@
         for (;;)
           {
           startoffset = offsets[1];
-          if (startoffset >= (int)linelength + endlinelength ||
+          if (startoffset >= linelength + endlinelength ||
               !match_patterns(matchptr, length, options, startoffset, &mrc))
             break;
           FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
@@ -1973,6 +1986,7 @@
   /* Advance to after the newline and increment the line number. The file
   offset to the current line is maintained in filepos. */


+ END_ONE_MATCH:
ptr += linelength + endlinelength;
filepos += (int)(linelength + endlinelength);
linenumber++;

Modified: code/trunk/testdata/grepinput8
===================================================================
(Binary files differ)

Modified: code/trunk/testdata/grepoutput
===================================================================
(Binary files differ)

Modified: code/trunk/testdata/grepoutput8
===================================================================
(Binary files differ)