Revision: 632
http://vcs.pcre.org/viewvc?view=rev&revision=632
Author: ph10
Date: 2011-07-22 18:47:49 +0100 (Fri, 22 Jul 2011)
Log Message:
-----------
Fix pcregrep repeated match in same line bug.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/RunGrepTest
code/trunk/pcregrep.c
code/trunk/testdata/grepinput
code/trunk/testdata/grepoutput
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2011-07-22 10:13:28 UTC (rev 631)
+++ code/trunk/ChangeLog 2011-07-22 17:47:49 UTC (rev 632)
@@ -165,6 +165,13 @@
31. A *MARK encountered during the processing of a positive assertion is now
recorded and passed back (compatible with Perl).
+
+32. If --only-matching or --colour was set on a pcregrep call whose pattern
+ had alternative anchored branches, the search for a second match in a line
+ was done as if at the line start. Thus, for example, /^01|^02/ incorrectly
+ matched the line "0102" twice. The same bug affected patterns that started
+ with a backwards assertion. For example /\b01|\b02/ also matched "0102"
+ twice.
Version 8.12 15-Jan-2011
Modified: code/trunk/RunGrepTest
===================================================================
--- code/trunk/RunGrepTest 2011-07-22 10:13:28 UTC (rev 631)
+++ code/trunk/RunGrepTest 2011-07-22 17:47:49 UTC (rev 632)
@@ -340,6 +340,54 @@
(cd $srcdir; $valgrind $pcregrep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtry
echo "RC=$?" >>testtry
+echo "---------------------------- Test 71 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -o "^01|^02|^03" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 72 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep --color=always "^01|^02|^03" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 73 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|^02|^03" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 74 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -o "^01|02|^03" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 75 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep --color=always "^01|02|^03" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 76 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|02|^03" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 77 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -o "^01|^02|03" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 78 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep --color=always "^01|^02|03" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 79 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|^02|03" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 80 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -o "\b01|\b02" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 81 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep --color=always "\\b01|\\b02" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 82 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -o --colour=always "\\b01|\\b02" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
# Now compare the results.
$cf $srcdir/testdata/grepoutput testtry
Modified: code/trunk/pcregrep.c
===================================================================
--- code/trunk/pcregrep.c 2011-07-22 10:13:28 UTC (rev 631)
+++ code/trunk/pcregrep.c 2011-07-22 17:47:49 UTC (rev 632)
@@ -935,10 +935,11 @@
to find all possible matches.
Arguments:
- matchptr the start of the subject
- length the length of the subject to match
- offsets the offets vector to fill in
- mrc address of where to put the result of pcre_exec()
+ matchptr the start of the subject
+ length the length of the subject to match
+ startoffset where to start matching
+ offsets the offets vector to fill in
+ mrc address of where to put the result of pcre_exec()
Returns: TRUE if there was a match
FALSE if there was no match
@@ -946,7 +947,8 @@
*/
static BOOL
-match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
+match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
+ int *mrc)
{
int i;
size_t slen = length;
@@ -958,8 +960,8 @@
}
for (i = 0; i < pattern_count; i++)
{
- *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
- PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
+ *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
+ startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
if (*mrc >= 0) return TRUE;
if (*mrc == PCRE_ERROR_NOMATCH) continue;
fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
@@ -1077,6 +1079,7 @@
{
int endlinelength;
int mrc = 0;
+ int startoffset = 0;
BOOL match;
char *matchptr = ptr;
char *t = ptr;
@@ -1153,7 +1156,7 @@
than NOMATCH. This code is in a subroutine so that it can be re-used for
finding subsequent matches when colouring matched lines. */
- match = match_patterns(matchptr, length, offsets, &mrc);
+ match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
/* If it's a match or a not-match (as required), do what's wanted. */
@@ -1216,11 +1219,14 @@
}
}
else if (printname != NULL || number) fprintf(stdout, "\n");
+ /*
matchptr += offsets[1];
length -= offsets[1];
+ */
match = FALSE;
if (line_buffered) fflush(stdout);
- rc = 0; /* Had some success */
+ rc = 0; /* Had some success */
+ startoffset = offsets[1];
goto ONLY_MATCHING_RESTART;
}
}
@@ -1360,12 +1366,18 @@
fprintf(stdout, "%c[00m", 0x1b);
for (;;)
{
+ /*
last_offset += offsets[1];
matchptr += offsets[1];
length -= offsets[1];
+ */
+
+ startoffset = offsets[1];
+ last_offset = startoffset;
if (last_offset >= linelength + endlinelength ||
- !match_patterns(matchptr, length, offsets, &mrc)) break;
- FWRITE(matchptr, 1, offsets[0], stdout);
+ !match_patterns(matchptr, length, startoffset, offsets, &mrc))
+ break;
+ FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
fprintf(stdout, "%c[%sm", 0x1b, colour_string);
FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
fprintf(stdout, "%c[00m", 0x1b);
Modified: code/trunk/testdata/grepinput
===================================================================
--- code/trunk/testdata/grepinput 2011-07-22 10:13:28 UTC (rev 631)
+++ code/trunk/testdata/grepinput 2011-07-22 17:47:49 UTC (rev 632)
@@ -602,6 +602,8 @@
AB.VE
AB.VE the turtle
+010203040506
+
PUT NEW DATA ABOVE THIS LINE.
=============================
Modified: code/trunk/testdata/grepoutput
===================================================================
--- code/trunk/testdata/grepoutput 2011-07-22 10:13:28 UTC (rev 631)
+++ code/trunk/testdata/grepoutput 2011-07-22 17:47:49 UTC (rev 632)
@@ -10,7 +10,7 @@
7:PATTERN at the start of a line.
8:In the middle of a line, PATTERN appears.
10:This pattern is in lower case.
-608:Check up on PATTERN near the end.
+610:Check up on PATTERN near the end.
RC=0
---------------------------- Test 4 ------------------------------
4
@@ -19,7 +19,7 @@
./testdata/grepinput:7:PATTERN at the start of a line.
./testdata/grepinput:8:In the middle of a line, PATTERN appears.
./testdata/grepinput:10:This pattern is in lower case.
-./testdata/grepinput:608:Check up on PATTERN near the end.
+./testdata/grepinput:610:Check up on PATTERN near the end.
./testdata/grepinputx:3:Here is the pattern again.
./testdata/grepinputx:5:Pattern
./testdata/grepinputx:42:This line contains pattern not on a line by itself.
@@ -28,7 +28,7 @@
7:PATTERN at the start of a line.
8:In the middle of a line, PATTERN appears.
10:This pattern is in lower case.
-608:Check up on PATTERN near the end.
+610:Check up on PATTERN near the end.
3:Here is the pattern again.
5:Pattern
42:This line contains pattern not on a line by itself.
@@ -323,10 +323,10 @@
./testdata/grepinput-9-
./testdata/grepinput:10:This pattern is in lower case.
--
-./testdata/grepinput-605-PUT NEW DATA ABOVE THIS LINE.
-./testdata/grepinput-606-=============================
-./testdata/grepinput-607-
-./testdata/grepinput:608:Check up on PATTERN near the end.
+./testdata/grepinput-607-PUT NEW DATA ABOVE THIS LINE.
+./testdata/grepinput-608-=============================
+./testdata/grepinput-609-
+./testdata/grepinput:610:Check up on PATTERN near the end.
--
./testdata/grepinputx-1-This is a second file of input for the pcregrep tests.
./testdata/grepinputx-2-
@@ -348,8 +348,8 @@
./testdata/grepinput-12-Here follows a whole lot of stuff that makes the file over 24K long.
./testdata/grepinput-13-
--
-./testdata/grepinput:608:Check up on PATTERN near the end.
-./testdata/grepinput-609-This is the last line of this file.
+./testdata/grepinput:610:Check up on PATTERN near the end.
+./testdata/grepinput-611-This is the last line of this file.
--
./testdata/grepinputx:3:Here is the pattern again.
./testdata/grepinputx-4-
@@ -380,6 +380,7 @@
---------------------------- Test 37 -----------------------------
aaaaa0
aaaaa2
+010203040506
RC=0
======== STDERR ========
pcregrep: pcre_exec() gave error -8 while matching this text:
@@ -593,3 +594,43 @@
?[00m?[1;31mtriple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt
?[00mRC=0
+---------------------------- Test 71 -----------------------------
+01
+RC=0
+---------------------------- Test 72 -----------------------------
+?[1;31m01?[00m0203040506
+RC=0
+---------------------------- Test 73 -----------------------------
+?[1;31m01?[00m
+RC=0
+---------------------------- Test 74 -----------------------------
+01
+02
+RC=0
+---------------------------- Test 75 -----------------------------
+?[1;31m01?[00m?[1;31m02?[00m03040506
+RC=0
+---------------------------- Test 76 -----------------------------
+?[1;31m01?[00m
+?[1;31m02?[00m
+RC=0
+---------------------------- Test 77 -----------------------------
+01
+03
+RC=0
+---------------------------- Test 78 -----------------------------
+?[1;31m01?[00m02?[1;31m03?[00m040506
+RC=0
+---------------------------- Test 79 -----------------------------
+?[1;31m01?[00m
+?[1;31m03?[00m
+RC=0
+---------------------------- Test 80 -----------------------------
+01
+RC=0
+---------------------------- Test 81 -----------------------------
+?[1;31m01?[00m0203040506
+RC=0
+---------------------------- Test 82 -----------------------------
+?[1;31m01?[00m
+RC=0