[Pcre-svn] [409] code/trunk: Forbid \K patterns that end bef…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [409] code/trunk: Forbid \K patterns that end before they start in pcre2_substitute().
Revision: 409
          http://www.exim.org/viewvc/pcre2?view=rev&revision=409
Author:   ph10
Date:     2015-11-03 17:38:00 +0000 (Tue, 03 Nov 2015)
Log Message:
-----------
Forbid \K patterns that end before they start in pcre2_substitute().


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/doc/pcre2api.3
    code/trunk/src/pcre2.h
    code/trunk/src/pcre2.h.in
    code/trunk/src/pcre2_error.c
    code/trunk/src/pcre2_substitute.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2015-11-03 10:39:27 UTC (rev 408)
+++ code/trunk/ChangeLog    2015-11-03 17:38:00 UTC (rev 409)
@@ -257,7 +257,10 @@


74. Give an error if a lookbehind assertion is longer than 65535 code units.

+75. Give an error in pcre2_substitute() if a match ends before it starts (as a
+result of the use of \K).

+
Version 10.20 30-June-2015
--------------------------


Modified: code/trunk/doc/pcre2api.3
===================================================================
--- code/trunk/doc/pcre2api.3    2015-11-03 10:39:27 UTC (rev 408)
+++ code/trunk/doc/pcre2api.3    2015-11-03 17:38:00 UTC (rev 409)
@@ -1,4 +1,4 @@
-.TH PCRE2API 3 "16 October 2015" "PCRE2 10.21"
+.TH PCRE2API 3 "03 November 2015" "PCRE2 10.21"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .sp
@@ -2666,7 +2666,9 @@
 This function calls \fBpcre2_match()\fP and then makes a copy of the subject
 string in \fIoutputbuffer\fP, replacing the part that was matched with the
 \fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This can
-be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
+be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in 
+which a \eK item in a lookahead in the pattern causes the match to end before
+it starts are not supported, and give rise to an error return.
 .P
 The first seven arguments of \fBpcre2_substitute()\fP are the same as for
 \fBpcre2_match()\fP, except that the partial matching options are not
@@ -2769,8 +2771,9 @@
 is not big enough. PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax
 errors in the replacement string, with more particular errors being
 PCRE2_ERROR_BADREPESCAPE (invalid escape sequence),
-PCRE2_ERROR_REPMISSING_BRACE (closing curly bracket not found), and
-PCRE2_BADSUBSTITUTION (syntax error in extended group substitution). As for all
+PCRE2_ERROR_REPMISSING_BRACE (closing curly bracket not found),
+PCRE2_BADSUBSTITUTION (syntax error in extended group substitution), and
+PCRE2_BADSUBPATTERN (the pattern match ended before it started). As for all
 PCRE2 errors, a text message that describes the error can be obtained by
 calling \fBpcre2_get_error_message()\fP.
 .
@@ -3066,6 +3069,6 @@
 .rs
 .sp
 .nf
-Last updated: 16 October 2015
+Last updated: 03 November 2015
 Copyright (c) 1997-2015 University of Cambridge.
 .fi


Modified: code/trunk/src/pcre2.h
===================================================================
--- code/trunk/src/pcre2.h    2015-11-03 10:39:27 UTC (rev 408)
+++ code/trunk/src/pcre2.h    2015-11-03 17:38:00 UTC (rev 409)
@@ -240,6 +240,7 @@
 #define PCRE2_ERROR_BADREPESCAPE      (-57)
 #define PCRE2_ERROR_REPMISSINGBRACE   (-58)
 #define PCRE2_ERROR_BADSUBSTITUTION   (-59)
+#define PCRE2_ERROR_BADSUBSPATTERN    (-60)


/* Request types for pcre2_pattern_info() */


Modified: code/trunk/src/pcre2.h.in
===================================================================
--- code/trunk/src/pcre2.h.in    2015-11-03 10:39:27 UTC (rev 408)
+++ code/trunk/src/pcre2.h.in    2015-11-03 17:38:00 UTC (rev 409)
@@ -240,6 +240,7 @@
 #define PCRE2_ERROR_BADREPESCAPE      (-57)
 #define PCRE2_ERROR_REPMISSINGBRACE   (-58)
 #define PCRE2_ERROR_BADSUBSTITUTION   (-59)
+#define PCRE2_ERROR_BADSUBSPATTERN    (-60)


/* Request types for pcre2_pattern_info() */


Modified: code/trunk/src/pcre2_error.c
===================================================================
--- code/trunk/src/pcre2_error.c    2015-11-03 10:39:27 UTC (rev 408)
+++ code/trunk/src/pcre2_error.c    2015-11-03 17:38:00 UTC (rev 409)
@@ -170,8 +170,8 @@
   "(?| and/or (?J: or (?x: parentheses are too deeply nested\0"
   /* 85 */
   "using \\C is disabled in this PCRE2 library\0"
-  "regular expression is too complicated\0" 
-  "lookbehind assertion is too long\0" 
+  "regular expression is too complicated\0"
+  "lookbehind assertion is too long\0"
   ;


/* Match-time and UTF error texts are in the same format. */
@@ -247,7 +247,9 @@
"offset limit set without PCRE2_USE_OFFSET_LIMIT\0"
"bad escape sequence in replacement string\0"
"expected closing curly bracket in replacement string\0"
- "bad substitution in replacement string\0"
+ "bad substitution in replacement string\0"
+ /* 60 */
+ "match with end before start is not supported\0"
;



Modified: code/trunk/src/pcre2_substitute.c
===================================================================
--- code/trunk/src/pcre2_substitute.c    2015-11-03 10:39:27 UTC (rev 408)
+++ code/trunk/src/pcre2_substitute.c    2015-11-03 17:38:00 UTC (rev 409)
@@ -55,7 +55,7 @@
 /* In extended mode, we recognize ${name:+set text:unset text} and similar
 constructions. This requires the identification of unescaped : and }
 characters. This function scans for such. It must deal with nested ${
-constructions. The pointer to the text is updated, either to the required end 
+constructions. The pointer to the text is updated, either to the required end
 character, or to where an error was detected.


Arguments:
@@ -107,7 +107,7 @@

   else if (*ptr == CHAR_BACKSLASH)
     {
-    int erc; 
+    int erc;
     int errorcode = 0;
     uint32_t ch;


@@ -279,10 +279,10 @@

   rc = pcre2_match(code, subject, length, start_offset, options|goptions,
     match_data, mcontext);
-    
+
 #ifdef SUPPORT_UNICODE
   if (utf) options |= PCRE2_NO_UTF_CHECK;  /* Only need to check once */
-#endif   
+#endif


   /* Any error other than no match returns the error code. No match when not
   doing the special after-empty-match global rematch, or when at the end of the
@@ -320,8 +320,15 @@
     continue;
     }


- /* Handle a successful match. */
+ /* Handle a successful match. Matches that use \K to end before they start
+ are not supported. */

+  if (ovector[1] < ovector[0])
+    {
+    rc = PCRE2_ERROR_BADSUBSPATTERN;
+    goto EXIT;
+    }
+
   subs++;
   if (rc == 0) rc = ovector_count;
   fraglength = ovector[0] - start_offset;
@@ -409,14 +416,14 @@
           next = *ptr;
           if (next < CHAR_0 || next > CHAR_9) break;
           group = group * 10 + next - CHAR_0;
-          
+
           /* A check for a number greater than the hightest captured group
           is sufficient here; no need for a separate overflow check. */
-            
+
           if (group > code->top_bracket)
             {
             rc = PCRE2_ERROR_NOSUBSTRING;
-            goto PTREXIT;   
+            goto PTREXIT;
             }
           }
         }
@@ -439,7 +446,7 @@


       if (inparens)
         {
-        
+
         if (extended && !star && ptr < repend - 2 && next == CHAR_COLON)
           {
           special = *(++ptr);
@@ -501,8 +508,8 @@
       else
         {
         PCRE2_SPTR subptr, subptrend;
-        
-        /* Find a number for a named group. In case there are duplicate names, 
+
+        /* Find a number for a named group. In case there are duplicate names,
         search for the first one that is set. */


         if (group < 0)
@@ -516,18 +523,18 @@
             if (ng < ovector_count)
               {
               if (group < 0) group = ng;          /* First in ovector */
-              if (ovector[ng*2] != PCRE2_UNSET) 
+              if (ovector[ng*2] != PCRE2_UNSET)
                 {
                 group = ng;                       /* First that is set */
                 break;
-                } 
+                }
               }
             }
-            
-          /* If group is still negative, it means we did not find a group that 
+
+          /* If group is still negative, it means we did not find a group that
           is in the ovector. Just set the first group. */
-          
-          if (group < 0) group = GET2(first, 0); 
+
+          if (group < 0) group = GET2(first, 0);
           }


         rc = pcre2_substring_length_bynumber(match_data, group, &sublength);


Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2015-11-03 10:39:27 UTC (rev 408)
+++ code/trunk/testdata/testinput2    2015-11-03 17:38:00 UTC (rev 409)
@@ -4596,4 +4596,7 @@


/(?<!a{65535})x/I

+/(?=a\K)/replace=z
+    BaCaD
+
 # End of testinput2 


Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2015-11-03 10:39:27 UTC (rev 408)
+++ code/trunk/testdata/testoutput2    2015-11-03 17:38:00 UTC (rev 409)
@@ -14690,4 +14690,8 @@
 First code unit = 'x'
 Subject length lower bound = 1


+/(?=a\K)/replace=z
+    BaCaD
+Failed: error -60: match with end before start is not supported
+
 # End of testinput2