[Pcre-svn] [896] code/trunk: Fix \K issues in pcre2grep.

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [896] code/trunk: Fix \K issues in pcre2grep.
Revision: 896
          http://www.exim.org/viewvc/pcre2?view=rev&revision=896
Author:   ph10
Date:     2017-12-26 15:10:04 +0000 (Tue, 26 Dec 2017)
Log Message:
-----------
Fix \K issues in pcre2grep.


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/RunGrepTest
    code/trunk/src/pcre2grep.c
    code/trunk/testdata/grepoutput


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2017-12-24 10:27:13 UTC (rev 895)
+++ code/trunk/ChangeLog    2017-12-26 15:10:04 UTC (rev 896)
@@ -100,7 +100,12 @@
 end at the original start point. Also arranged for it to detect when \K causes
 the end of a match to be before its start.


+24. Similar to 23 above, strange things (including loops) could happen in
+pcre2grep when \K was used in an assertion when --colour was used or in
+multiline mode. The "end at original start point" bug is fixed, and if the end
+point is found to be before the start point, they are swapped.

+
Version 10.30 14-August-2017
----------------------------


Modified: code/trunk/RunGrepTest
===================================================================
--- code/trunk/RunGrepTest    2017-12-24 10:27:13 UTC (rev 895)
+++ code/trunk/RunGrepTest    2017-12-26 15:10:04 UTC (rev 896)
@@ -630,7 +630,18 @@
 (cd $srcdir; $valgrind $vjs $pcre2grep -Mn -A2 'start[\s]+end' testdata/grepinputM) >>testtrygrep
 echo "RC=$?" >>testtrygrep


+echo "---------------------------- Test 125 -----------------------------" >>testtrygrep
+printf "abcd\n" >testNinputgrep
+$valgrind $vjs $pcre2grep --colour=always '(?<=\K.)' testNinputgrep >>testtrygrep
+echo "RC=$?" >>testtrygrep
+$valgrind $vjs $pcre2grep --colour=always '(?=.\K)' testNinputgrep >>testtrygrep
+echo "RC=$?" >>testtrygrep
+$valgrind $vjs $pcre2grep --colour=always '(?<=\K[ac])' testNinputgrep >>testtrygrep
+echo "RC=$?" >>testtrygrep
+$valgrind $vjs $pcre2grep --colour=always '(?=[ac]\K)' testNinputgrep >>testtrygrep
+echo "RC=$?" >>testtrygrep

+
# Now compare the results.

$cf $srcdir/testdata/grepoutput testtrygrep

Modified: code/trunk/src/pcre2grep.c
===================================================================
--- code/trunk/src/pcre2grep.c    2017-12-24 10:27:13 UTC (rev 895)
+++ code/trunk/src/pcre2grep.c    2017-12-26 15:10:04 UTC (rev 896)
@@ -1607,7 +1607,7 @@
 */


 static void
-do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart, 
+do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
   char *endptr, const char *printname)
 {
 if (after_context > 0 && lastmatchnumber > 0)
@@ -2764,12 +2764,39 @@
       if ((multiline || do_colour) && !invert)
         {
         int plength;
+        PCRE2_SIZE endprevious;
+
+        /* The use of \K may make the end offset earlier than the start. In
+        this situation, swap them round. */
+
+        if (offsets[0] > offsets[1])
+          {
+          PCRE2_SIZE temp = offsets[0];
+          offsets[0] = offsets[1];
+          offsets[1] = temp;
+          }
+
         FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
         print_match(ptr + offsets[0], offsets[1] - offsets[0]);
+
         for (;;)
           {
-          startoffset = offsets[1];  /* Advance after previous match. */
+          PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data);


+          endprevious = offsets[1];
+          startoffset = endprevious;  /* Advance after previous match. */
+
+          /* If the pattern contained a lookbehind that included \K, it is
+          possible that the end of the match might be at or before the actual
+          starting offset we have just used. In this case, start one character
+          further on. */
+
+          if (startoffset <= oldstartoffset)
+            {
+            startoffset = oldstartoffset + 1;
+            if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
+            }
+
           /* If the current match ended past the end of the line (only possible
           in multiline mode), we must move on to the line in which it did end
           before searching for more matches. Because the PCRE2_FIRSTLINE option
@@ -2782,6 +2809,7 @@
             filepos += (int)(linelength + endlinelength);
             linenumber++;
             startoffset -= (int)(linelength + endlinelength);
+            endprevious -= (int)(linelength + endlinelength);
             t = end_of_line(ptr, endptr, &endlinelength);
             linelength = t - ptr - endlinelength;
             length = (size_t)(endptr - ptr);
@@ -2797,7 +2825,18 @@
           loop for any that may follow. */


           if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
-          FWRITE_IGNORE(ptr + startoffset, 1, offsets[0] - startoffset, stdout);
+
+          /* The use of \K may make the end offset earlier than the start. In
+          this situation, swap them round. */
+
+          if (offsets[0] > offsets[1])
+            {
+            PCRE2_SIZE temp = offsets[0];
+            offsets[0] = offsets[1];
+            offsets[1] = temp;
+            }
+
+          FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout);
           print_match(ptr + offsets[0], offsets[1] - offsets[0]);
           }


@@ -2805,8 +2844,8 @@
         and its line-ending characters (if they matched the pattern), so there
         may be no more to print. */


-        plength = (int)((linelength + endlinelength) - startoffset);
-        if (plength > 0) FWRITE_IGNORE(ptr + startoffset, 1, plength, stdout);
+        plength = (int)((linelength + endlinelength) - endprevious);
+        if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout);
         }


       /* Not colouring or multiline; no need to search for further matches. */


Modified: code/trunk/testdata/grepoutput
===================================================================
(Binary files differ)