[Pcre-svn] [528] code/trunk: Fix bad interaction between -o …

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [528] code/trunk: Fix bad interaction between -o and -M in pcre2grep.
Revision: 528
          http://www.exim.org/viewvc/pcre2?view=rev&revision=528
Author:   ph10
Date:     2016-06-17 18:37:26 +0100 (Fri, 17 Jun 2016)
Log Message:
-----------
Fix bad interaction between -o and -M in pcre2grep.


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/RunGrepTest
    code/trunk/src/pcre2grep.c
    code/trunk/testdata/grepinput
    code/trunk/testdata/grepoutput


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2016-06-17 11:45:16 UTC (rev 527)
+++ code/trunk/ChangeLog    2016-06-17 17:37:26 UTC (rev 528)
@@ -147,7 +147,11 @@
 pcre2_match.c, in anticipation that this is needed for the same reason it was 
 recently added to pcrecpp.cc in PCRE1.


+38. Using -o with -M in pcre2grep could cause unnecessary repeated output when
+the match extended over a line boundary, as it tried to find more matches "on
+the same line" - but it was already over the end.

+
Version 10.21 12-January-2016
-----------------------------


Modified: code/trunk/RunGrepTest
===================================================================
--- code/trunk/RunGrepTest    2016-06-17 11:45:16 UTC (rev 527)
+++ code/trunk/RunGrepTest    2016-06-17 17:37:26 UTC (rev 528)
@@ -556,6 +556,18 @@
 (cd $srcdir; $valgrind $vjs $pcre2grep -cq lazy ./testdata/grepinput*) >>testtrygrep
 echo "RC=$?" >>testtrygrep


+echo "---------------------------- Test 110 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep --om-separator / -Mo0 -o1 -o2 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 111 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets -M 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 112 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep --file-offsets -M 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep
+echo "RC=$?" >>testtrygrep
+
# Now compare the results.

$cf $srcdir/testdata/grepoutput testtrygrep

Modified: code/trunk/src/pcre2grep.c
===================================================================
--- code/trunk/src/pcre2grep.c    2016-06-17 11:45:16 UTC (rev 527)
+++ code/trunk/src/pcre2grep.c    2016-06-17 17:37:26 UTC (rev 528)
@@ -1886,7 +1886,7 @@
   size_t startoffset = 0;


/* At this point, ptr is at the start of a line. We need to find the length
- of the subject string to pass to pcre_exec(). In multiline mode, it is the
+ of the subject string to pass to pcre2_match(). In multiline mode, it is the
length remainder of the data in the buffer. Otherwise, it is the length of
the next line, excluding the terminating newline. After matching, we always
advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
@@ -1977,7 +1977,7 @@

match = match_patterns(matchptr, length, options, startoffset, &mrc);
options = PCRE2_NOTEMPTY;
-
+
/* If it's a match or a not-match (as required), do what's wanted. */

   if (match != invert)
@@ -2074,14 +2074,22 @@
             fprintf(stdout, STDOUT_NL);
           }


-        /* Prepare to repeat to find the next match. If the pattern contained a
-        lookbehind that included \K, it is possible that the end of the match
-        might be at or before the actual starting offset we have just used. In
-        this case, start one character further on. */
+        /* Prepare to repeat to find the next match in the line. */


         match = FALSE;
         if (line_buffered) fflush(stdout);
         rc = 0;                      /* Had some success */
+
+        /* If the current match ended past the end of the line (only possible
+        in multiline mode), we are done with this line. */
+
+        if (offsets[1] > linelength) goto END_ONE_MATCH;
+
+        /* If the pattern contained a lookbehind that included \K, it is
+        possible that the end of the match might be at or before the actual
+        starting offset we have just used. In this case, start one character
+        further on. */
+
         startoffset = offsets[1];    /* Restart after the match */
         oldstartoffset = pcre2_get_startchar(match_data);
         if (startoffset <= oldstartoffset)
@@ -2786,12 +2794,12 @@
   }


 sprintf((char *)buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
-p->compiled = pcre2_compile(buffer, -1, options, &errcode, &erroffset,
-  compile_context);
-  
+p->compiled = pcre2_compile(buffer, PCRE2_ZERO_TERMINATED, options, &errcode, 
+  &erroffset, compile_context);
+
 /* Handle successful compile */
- 
-if (p->compiled != NULL) 
+
+if (p->compiled != NULL)
   {
 #ifdef SUPPORT_PCRE2GREP_JIT
   if (use_jit)
@@ -2798,12 +2806,12 @@
     {
     errcode = pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
     if (errcode == 0) return TRUE;
-    erroffset = PCRE2_SIZE_MAX;     /* Will get reduced to patlen below */ 
+    erroffset = PCRE2_SIZE_MAX;     /* Will get reduced to patlen below */
     }
-  else     
+  else
 #endif
   return TRUE;
-  } 
+  }


/* Handle compile and JIT compile errors */


Modified: code/trunk/testdata/grepinput
===================================================================
(Binary files differ)

Modified: code/trunk/testdata/grepoutput
===================================================================
(Binary files differ)