Revision: 242
http://www.exim.org/viewvc/pcre2?view=rev&revision=242
Author: ph10
Date: 2015-04-03 12:14:19 +0100 (Fri, 03 Apr 2015)
Log Message:
-----------
Fix pcre2grep loop provoked by \K in a lookbehind assertion in a non-anchored
pattern.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/RunGrepTest
code/trunk/src/pcre2grep.c
code/trunk/testdata/grepinput8
code/trunk/testdata/grepoutput
code/trunk/testdata/grepoutput8
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-04-01 15:37:54 UTC (rev 241)
+++ code/trunk/ChangeLog 2015-04-03 11:14:19 UTC (rev 242)
@@ -27,7 +27,7 @@
8. For some types of pattern, for example /Z*(|d*){216}/, the auto-
possessification code could take exponential time to complete. A recursion
-depth limit of 10000 has been imposed to limit the resources used by this
+depth limit of 1000 has been imposed to limit the resources used by this
optimization. This infelicity was discovered by the LLVM fuzzer.
9. A pattern such as /(*UTF)[\S\V\H]/, which contains a negated special class
@@ -67,7 +67,10 @@
caused a stack overflow instead of the diagnosis of a non-fixed length
lookbehind assertion. This bug was discovered by the LLVM fuzzer.
+17. The use of \K in a positive lookbehind assertion in a non-anchored pattern
+(e.g. /(?<=\Ka)/) could make pcre2grep loop.
+
Version 10.10 06-March-2015
---------------------------
Modified: code/trunk/RunGrepTest
===================================================================
--- code/trunk/RunGrepTest 2015-04-01 15:37:54 UTC (rev 241)
+++ code/trunk/RunGrepTest 2015-04-03 11:14:19 UTC (rev 242)
@@ -524,6 +524,11 @@
(cd $srcdir; echo "a" | $valgrind $pcre2grep -M "|a" ) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
+echo "---------------------------- Test 107 -----------------------------" >>testtrygrep
+echo "a" >testtemp1grep
+echo "aaaaa" >>testtemp1grep
+(cd $srcdir; $valgrind $pcre2grep --line-offsets '(?<=\Ka)' testtemp1grep) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
# Now compare the results.
@@ -544,6 +549,10 @@
(cd $srcdir; $valgrind $pcre2grep -n -u -C 3 --newline=any "Match" ./testdata/grepinput8) >>testtrygrep
echo "RC=$?" >>testtrygrep
+ echo "---------------------------- Test U3 ------------------------------" >>testtrygrep
+ (cd $srcdir; $valgrind $pcre2grep --line-offsets -u '(?<=\K\x{17f})' ./testdata/grepinput8) >>testtrygrep
+ echo "RC=$?" >>testtrygrep
+
$cf $srcdir/testdata/grepoutput8 testtrygrep
if [ $? != 0 ] ; then exit 1; fi
Modified: code/trunk/src/pcre2grep.c
===================================================================
--- code/trunk/src/pcre2grep.c 2015-04-01 15:37:54 UTC (rev 241)
+++ code/trunk/src/pcre2grep.c 2015-04-03 11:14:19 UTC (rev 242)
@@ -1433,7 +1433,7 @@
static BOOL
match_patterns(char *matchptr, size_t length, unsigned int options,
- int startoffset, int *mrc)
+ size_t startoffset, int *mrc)
{
int i;
size_t slen = length;
@@ -1581,12 +1581,12 @@
{
int endlinelength;
int mrc = 0;
- int startoffset = 0;
unsigned int options = 0;
BOOL match;
char *matchptr = ptr;
char *t = ptr;
size_t length, linelength;
+ size_t startoffset = 0;
/* At this point, ptr is at the start of a line. We need to find the length
of the subject string to pass to pcre_exec(). In multiline mode, it is the
@@ -1729,6 +1729,8 @@
{
if (!invert)
{
+ size_t oldstartoffset;
+
if (printname != NULL) fprintf(stdout, "%s:", printname);
if (number) fprintf(stdout, "%d:", linenumber);
@@ -1772,12 +1774,23 @@
if (printed || printname != NULL || number) fprintf(stdout, "\n");
}
- /* Prepare to repeat to find the next match */
+ /* Prepare to repeat to find the next match. If the pattern contained a
+ lookbehind that included \K, it is possible that the end of the match
+ might be at or before the actual starting offset we have just used. In
+ this case, start one character further on. */
match = FALSE;
if (line_buffered) fflush(stdout);
rc = 0; /* Had some success */
startoffset = offsets[1]; /* Restart after the match */
+ oldstartoffset = pcre2_get_startchar(match_data);
+ if (startoffset <= oldstartoffset)
+ {
+ if (startoffset >= length) goto END_ONE_MATCH; /* Were at end */
+ startoffset = oldstartoffset + 1;
+ if (utf)
+ while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++;
+ }
goto ONLY_MATCHING_RESTART;
}
}
@@ -1917,7 +1930,7 @@
for (;;)
{
startoffset = offsets[1];
- if (startoffset >= (int)linelength + endlinelength ||
+ if (startoffset >= linelength + endlinelength ||
!match_patterns(matchptr, length, options, startoffset, &mrc))
break;
FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
@@ -1973,6 +1986,7 @@
/* Advance to after the newline and increment the line number. The file
offset to the current line is maintained in filepos. */
+ END_ONE_MATCH:
ptr += linelength + endlinelength;
filepos += (int)(linelength + endlinelength);
linenumber++;
Modified: code/trunk/testdata/grepinput8
===================================================================
(Binary files differ)
Modified: code/trunk/testdata/grepoutput
===================================================================
(Binary files differ)
Modified: code/trunk/testdata/grepoutput8
===================================================================
(Binary files differ)