[Pcre-svn] [878] code/trunk: Fix multiple multiline matching…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [878] code/trunk: Fix multiple multiline matching issues in pcre2grep.
Revision: 878
          http://www.exim.org/viewvc/pcre2?view=rev&revision=878
Author:   ph10
Date:     2017-11-13 16:52:39 +0000 (Mon, 13 Nov 2017)
Log Message:
-----------
Fix multiple multiline matching issues in pcre2grep.


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/RunGrepTest
    code/trunk/src/pcre2grep.c
    code/trunk/testdata/grepoutput


Added Paths:
-----------
    code/trunk/testdata/grepinputM


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2017-11-09 17:50:59 UTC (rev 877)
+++ code/trunk/ChangeLog    2017-11-13 16:52:39 UTC (rev 878)
@@ -45,7 +45,13 @@
 recursion. ACCEPT in an assertion now closes only those groups that were 
 started within that assertion. Fixes oss-fuzz issues 3852 and 3891.


+13. Multiline matching in pcre2grep was misbehaving if the pattern matched
+within a line, and then matched again at the end of the line and over into
+subsequent lines. Behaviour was different with and without colouring, and
+sometimes context lines were incorrectly printed and/or line endings were lost.
+All these issues should now be fixed.

+
Version 10.30 14-August-2017
----------------------------


Modified: code/trunk/RunGrepTest
===================================================================
--- code/trunk/RunGrepTest    2017-11-09 17:50:59 UTC (rev 877)
+++ code/trunk/RunGrepTest    2017-11-13 16:52:39 UTC (rev 878)
@@ -248,7 +248,7 @@
 echo "RC=$?" >>testtrygrep


echo "---------------------------- Test 36 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude 'grepinput$' --exclude=grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude 'grepinput$' --exclude=grepinput8 --exclude=grepinputM --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep

echo "---------------------------- Test 37 -----------------------------" >>testtrygrep
@@ -391,6 +391,12 @@
echo "---------------------------- Test 70 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep
echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep --color=always -M -n "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -M -n "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep
+echo "RC=$?" >>testtrygrep

echo "---------------------------- Test 71 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -o "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
@@ -494,25 +500,25 @@
echo "RC=$?" >>testtrygrep

echo "---------------------------- Test 96 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include-dir=testdata --exclude '^(?!grepinput)' 'fox' ./test* | sort) >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include-dir=testdata --exclude '^(?!grepinput)' --exclude=grepinputM 'fox' ./test* | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep

echo "---------------------------- Test 97 -----------------------------" >>testtrygrep
echo "grepinput$" >testtemp1grep
echo "grepinput8" >>testtemp1grep
-(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude=grepinputM --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep

echo "---------------------------- Test 98 -----------------------------" >>testtrygrep
echo "grepinput$" >testtemp1grep
echo "grepinput8" >>testtemp1grep
-(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --exclude=grepinput3 --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --exclude=grepinput3 --exclude=grepinputM --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep

echo "---------------------------- Test 99 -----------------------------" >>testtrygrep
echo "grepinput$" >testtemp1grep
echo "grepinput8" >testtemp2grep
-(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include grepinput --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include grepinput --exclude=grepinputM --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep

echo "---------------------------- Test 100 ------------------------------" >>testtrygrep
@@ -582,7 +588,7 @@
echo "RC=$?" >>testtrygrep

echo "---------------------------- Test 116 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep -th 'the' testdata/grepinput*) >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep --exclude=grepinputM -th 'the' testdata/grepinput*) >>testtrygrep
echo "RC=$?" >>testtrygrep

echo "---------------------------- Test 117 -----------------------------" >>testtrygrep
@@ -610,11 +616,21 @@
(cd $srcdir; $valgrind $vjs $pcre2grep -w 'cat|dog' testdata/grepinputv) >>testtrygrep
echo "RC=$?" >>testtrygrep

-echo "---------------------------- Test 122 -----------------------------" >>testtrygrep
+echo "---------------------------- Test 123 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -w 'dog|cat' testdata/grepinputv) >>testtrygrep
echo "RC=$?" >>testtrygrep

+echo "---------------------------- Test 124 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -Mn --colour=always 'start[\s]+end' testdata/grepinputM) >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -Mn --colour=always -A2 'start[\s]+end' testdata/grepinputM) >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -Mn 'start[\s]+end' testdata/grepinputM) >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -Mn -A2 'start[\s]+end' testdata/grepinputM) >>testtrygrep
+echo "RC=$?" >>testtrygrep

+
# Now compare the results.

$cf $srcdir/testdata/grepoutput testtrygrep

Modified: code/trunk/src/pcre2grep.c
===================================================================
--- code/trunk/src/pcre2grep.c    2017-11-09 17:50:59 UTC (rev 877)
+++ code/trunk/src/pcre2grep.c    2017-11-13 16:52:39 UTC (rev 878)
@@ -2505,7 +2505,10 @@
   match = match_patterns(ptr, length, options, startoffset, &mrc);
   options = PCRE2_NOTEMPTY;


- /* If it's a match or a not-match (as required), do what's wanted. */
+ /* If it's a match or a not-match (as required), do what's wanted. NOTE: Use
+ only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its
+ return code - to output data lines, so that binary zeroes are treated as just
+ another data character. */

   if (match != invert)
     {
@@ -2734,27 +2737,6 @@
       if (printname != NULL) fprintf(stdout, "%s:", printname);
       if (number) fprintf(stdout, "%d:", linenumber);


-      /* In multiline mode, we want to print to the end of the line in which
-      the end of the matched string is found, so we adjust linelength and the
-      line number appropriately, but only when there actually was a match
-      (invert not set). Because the PCRE2_FIRSTLINE option is set, the start of
-      the match will always be before the first newline sequence. */
-
-      if (multiline & !invert)
-        {
-        char *endmatch = ptr + offsets[1];
-        t = ptr;
-        while (t <= endmatch)
-          {
-          t = end_of_line(t, endptr, &endlinelength);
-          if (t < endmatch) linenumber++; else break;
-          }
-        linelength = t - ptr - endlinelength;
-        }
-
-      /*** NOTE: Use only fwrite() to output the data line, so that binary
-      zeroes are treated as just another data character. */
-
       /* This extra option, for Jeffrey Friedl's debugging requirements,
       replaces the matched string, or a specific captured string if it exists,
       with X. When this happens, colouring is ignored. */
@@ -2771,10 +2753,13 @@
       else
 #endif


-      /* We have to split the line(s) up if colouring, and search for further
-      matches, but not of course if the line is a non-match. */
+      /* In multiline mode, or if colouring, we have to split the line(s) up
+      and search for further matches, but not of course if the line is a
+      non-match. In multiline mode this is necessary in case there is another
+      match that spans the end of the current line. When colouring we want to
+      colour all matches. */


-      if (do_colour && !invert)
+      if ((multiline || do_colour) && !invert)
         {
         int plength;
         FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
@@ -2781,10 +2766,35 @@
         print_match(ptr + offsets[0], offsets[1] - offsets[0]);
         for (;;)
           {
-          startoffset = offsets[1];
-          if (startoffset >= linelength + endlinelength ||
-              !match_patterns(ptr, length, options, startoffset, &mrc))
-            break;
+          startoffset = offsets[1];  /* Advance after previous match. */
+
+          /* If the current match ended past the end of the line (only possible
+          in multiline mode), we must move on to the line in which it did end
+          before searching for more matches. Because the PCRE2_FIRSTLINE option
+          is set, the start of the match will always be before the first
+          newline sequence. */
+
+          while (startoffset > linelength + endlinelength)
+            {
+            ptr += linelength + endlinelength;
+            filepos += (int)(linelength + endlinelength);
+            linenumber++;
+            startoffset -= (int)(linelength + endlinelength);
+            t = end_of_line(ptr, endptr, &endlinelength);
+            linelength = t - ptr - endlinelength;
+            length = (size_t)(endptr - ptr);
+            }
+
+          /* If startoffset is at the exact end of the line it means this
+          complete line was the final part of the match, so there is nothing
+          more to do. */
+
+          if (startoffset == linelength + endlinelength) break;
+
+          /* Otherwise, run a match from within the final line, and if found,
+          loop for any that may follow. */
+
+          if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
           FWRITE_IGNORE(ptr + startoffset, 1, offsets[0] - startoffset, stdout);
           print_match(ptr + offsets[0], offsets[1] - offsets[0]);
           }
@@ -2797,7 +2807,7 @@
         if (plength > 0) FWRITE_IGNORE(ptr + startoffset, 1, plength, stdout);
         }


-      /* Not colouring; no need to search for further matches */
+      /* Not colouring or multiline; no need to search for further matches. */


       else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout);
       }


Added: code/trunk/testdata/grepinputM
===================================================================
--- code/trunk/testdata/grepinputM                            (rev 0)
+++ code/trunk/testdata/grepinputM    2017-11-13 16:52:39 UTC (rev 878)
@@ -0,0 +1,17 @@
+Data file for multiline tests of multiple matches.
+
+start end in between start
+end and following
+Other stuff
+
+start end in between start
+end and following start
+end other stuff
+
+start end in between start
+
+end
+
+** These two lines must be last.
+start end in between start
+end


Modified: code/trunk/testdata/grepoutput
===================================================================
(Binary files differ)