[Pcre-svn] [632] code/trunk: Fix pcregrep repeated match in …

トップ ページ
このメッセージを削除
著者: Subversion repository
日付:  
To: pcre-svn
題目: [Pcre-svn] [632] code/trunk: Fix pcregrep repeated match in same line bug.
Revision: 632
          http://vcs.pcre.org/viewvc?view=rev&revision=632
Author:   ph10
Date:     2011-07-22 18:47:49 +0100 (Fri, 22 Jul 2011)


Log Message:
-----------
Fix pcregrep repeated match in same line bug.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/RunGrepTest
    code/trunk/pcregrep.c
    code/trunk/testdata/grepinput
    code/trunk/testdata/grepoutput


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2011-07-22 10:13:28 UTC (rev 631)
+++ code/trunk/ChangeLog    2011-07-22 17:47:49 UTC (rev 632)
@@ -165,6 +165,13 @@


 31. A *MARK encountered during the processing of a positive assertion is now
     recorded and passed back (compatible with Perl). 
+    
+32. If --only-matching or --colour was set on a pcregrep call whose pattern
+    had alternative anchored branches, the search for a second match in a line
+    was done as if at the line start. Thus, for example, /^01|^02/ incorrectly
+    matched the line "0102" twice. The same bug affected patterns that started
+    with a backwards assertion. For example /\b01|\b02/ also matched "0102"
+    twice. 



Version 8.12 15-Jan-2011

Modified: code/trunk/RunGrepTest
===================================================================
--- code/trunk/RunGrepTest    2011-07-22 10:13:28 UTC (rev 631)
+++ code/trunk/RunGrepTest    2011-07-22 17:47:49 UTC (rev 632)
@@ -340,6 +340,54 @@
 (cd $srcdir; $valgrind $pcregrep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtry
 echo "RC=$?" >>testtry


+echo "---------------------------- Test 71 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -o "^01|^02|^03" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 72 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep --color=always "^01|^02|^03" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 73 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|^02|^03" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 74 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -o "^01|02|^03" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 75 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep --color=always "^01|02|^03" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 76 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|02|^03" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 77 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -o "^01|^02|03" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 78 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep --color=always "^01|^02|03" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 79 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|^02|03" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 80 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -o "\b01|\b02" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 81 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep --color=always "\\b01|\\b02" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 82 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -o --colour=always "\\b01|\\b02" ./testdata/grepinput) >>testtry
+echo "RC=$?" >>testtry
+
# Now compare the results.

$cf $srcdir/testdata/grepoutput testtry

Modified: code/trunk/pcregrep.c
===================================================================
--- code/trunk/pcregrep.c    2011-07-22 10:13:28 UTC (rev 631)
+++ code/trunk/pcregrep.c    2011-07-22 17:47:49 UTC (rev 632)
@@ -935,10 +935,11 @@
 to find all possible matches.


 Arguments:
-  matchptr    the start of the subject
-  length      the length of the subject to match
-  offsets     the offets vector to fill in
-  mrc         address of where to put the result of pcre_exec()
+  matchptr     the start of the subject
+  length       the length of the subject to match
+  startoffset  where to start matching
+  offsets      the offets vector to fill in
+  mrc          address of where to put the result of pcre_exec()


 Returns:      TRUE if there was a match
               FALSE if there was no match
@@ -946,7 +947,8 @@
 */


 static BOOL
-match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
+match_patterns(char *matchptr, size_t length, int startoffset, int *offsets, 
+  int *mrc)
 {
 int i;
 size_t slen = length;
@@ -958,8 +960,8 @@
   }
 for (i = 0; i < pattern_count; i++)
   {
-  *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
-    PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
+  *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
+    startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
   if (*mrc >= 0) return TRUE;
   if (*mrc == PCRE_ERROR_NOMATCH) continue;
   fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
@@ -1077,6 +1079,7 @@
   {
   int endlinelength;
   int mrc = 0;
+  int startoffset = 0; 
   BOOL match;
   char *matchptr = ptr;
   char *t = ptr;
@@ -1153,7 +1156,7 @@
   than NOMATCH. This code is in a subroutine so that it can be re-used for
   finding subsequent matches when colouring matched lines. */


- match = match_patterns(matchptr, length, offsets, &mrc);
+ match = match_patterns(matchptr, length, startoffset, offsets, &mrc);

/* If it's a match or a not-match (as required), do what's wanted. */

@@ -1216,11 +1219,14 @@
             }
           }
         else if (printname != NULL || number) fprintf(stdout, "\n");
+        /* 
         matchptr += offsets[1];
         length -= offsets[1];
+        */ 
         match = FALSE;
         if (line_buffered) fflush(stdout);
-        rc = 0;    /* Had some success */
+        rc = 0;                 /* Had some success */
+        startoffset = offsets[1];
         goto ONLY_MATCHING_RESTART;
         }
       }
@@ -1360,12 +1366,18 @@
         fprintf(stdout, "%c[00m", 0x1b);
         for (;;)
           {
+          /* 
           last_offset += offsets[1];
           matchptr += offsets[1];
           length -= offsets[1];
+          */
+          
+          startoffset = offsets[1];
+          last_offset = startoffset; 
           if (last_offset >= linelength + endlinelength ||
-              !match_patterns(matchptr, length, offsets, &mrc)) break;
-          FWRITE(matchptr, 1, offsets[0], stdout);
+              !match_patterns(matchptr, length, startoffset, offsets, &mrc)) 
+            break;
+          FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
           fprintf(stdout, "%c[%sm", 0x1b, colour_string);
           FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
           fprintf(stdout, "%c[00m", 0x1b);


Modified: code/trunk/testdata/grepinput
===================================================================
--- code/trunk/testdata/grepinput    2011-07-22 10:13:28 UTC (rev 631)
+++ code/trunk/testdata/grepinput    2011-07-22 17:47:49 UTC (rev 632)
@@ -602,6 +602,8 @@
 AB.VE
 AB.VE the turtle


+010203040506
+
PUT NEW DATA ABOVE THIS LINE.
=============================


Modified: code/trunk/testdata/grepoutput
===================================================================
--- code/trunk/testdata/grepoutput    2011-07-22 10:13:28 UTC (rev 631)
+++ code/trunk/testdata/grepoutput    2011-07-22 17:47:49 UTC (rev 632)
@@ -10,7 +10,7 @@
 7:PATTERN at the start of a line.
 8:In the middle of a line, PATTERN appears.
 10:This pattern is in lower case.
-608:Check up on PATTERN near the end.
+610:Check up on PATTERN near the end.
 RC=0
 ---------------------------- Test 4 ------------------------------
 4
@@ -19,7 +19,7 @@
 ./testdata/grepinput:7:PATTERN at the start of a line.
 ./testdata/grepinput:8:In the middle of a line, PATTERN appears.
 ./testdata/grepinput:10:This pattern is in lower case.
-./testdata/grepinput:608:Check up on PATTERN near the end.
+./testdata/grepinput:610:Check up on PATTERN near the end.
 ./testdata/grepinputx:3:Here is the pattern again.
 ./testdata/grepinputx:5:Pattern
 ./testdata/grepinputx:42:This line contains pattern not on a line by itself.
@@ -28,7 +28,7 @@
 7:PATTERN at the start of a line.
 8:In the middle of a line, PATTERN appears.
 10:This pattern is in lower case.
-608:Check up on PATTERN near the end.
+610:Check up on PATTERN near the end.
 3:Here is the pattern again.
 5:Pattern
 42:This line contains pattern not on a line by itself.
@@ -323,10 +323,10 @@
 ./testdata/grepinput-9-
 ./testdata/grepinput:10:This pattern is in lower case.
 --
-./testdata/grepinput-605-PUT NEW DATA ABOVE THIS LINE.
-./testdata/grepinput-606-=============================
-./testdata/grepinput-607-
-./testdata/grepinput:608:Check up on PATTERN near the end.
+./testdata/grepinput-607-PUT NEW DATA ABOVE THIS LINE.
+./testdata/grepinput-608-=============================
+./testdata/grepinput-609-
+./testdata/grepinput:610:Check up on PATTERN near the end.
 --
 ./testdata/grepinputx-1-This is a second file of input for the pcregrep tests.
 ./testdata/grepinputx-2-
@@ -348,8 +348,8 @@
 ./testdata/grepinput-12-Here follows a whole lot of stuff that makes the file over 24K long.
 ./testdata/grepinput-13-
 --
-./testdata/grepinput:608:Check up on PATTERN near the end.
-./testdata/grepinput-609-This is the last line of this file.
+./testdata/grepinput:610:Check up on PATTERN near the end.
+./testdata/grepinput-611-This is the last line of this file.
 --
 ./testdata/grepinputx:3:Here is the pattern again.
 ./testdata/grepinputx-4-
@@ -380,6 +380,7 @@
 ---------------------------- Test 37 -----------------------------
 aaaaa0
 aaaaa2
+010203040506
 RC=0
 ======== STDERR ========
 pcregrep: pcre_exec() gave error -8 while matching this text:
@@ -593,3 +594,43 @@
 ?[00m?[1;31mtriple:    t6_txt    s2_tag    s_txt    p_tag    p_txt    o_tag    o_txt


?[00mRC=0
+---------------------------- Test 71 -----------------------------
+01
+RC=0
+---------------------------- Test 72 -----------------------------
+?[1;31m01?[00m0203040506
+RC=0
+---------------------------- Test 73 -----------------------------
+?[1;31m01?[00m
+RC=0
+---------------------------- Test 74 -----------------------------
+01
+02
+RC=0
+---------------------------- Test 75 -----------------------------
+?[1;31m01?[00m?[1;31m02?[00m03040506
+RC=0
+---------------------------- Test 76 -----------------------------
+?[1;31m01?[00m
+?[1;31m02?[00m
+RC=0
+---------------------------- Test 77 -----------------------------
+01
+03
+RC=0
+---------------------------- Test 78 -----------------------------
+?[1;31m01?[00m02?[1;31m03?[00m040506
+RC=0
+---------------------------- Test 79 -----------------------------
+?[1;31m01?[00m
+?[1;31m03?[00m
+RC=0
+---------------------------- Test 80 -----------------------------
+01
+RC=0
+---------------------------- Test 81 -----------------------------
+?[1;31m01?[00m0203040506
+RC=0
+---------------------------- Test 82 -----------------------------
+?[1;31m01?[00m
+RC=0