[Pcre-svn] [578] code/trunk: Fix *MARK length check in UTF m…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [578] code/trunk: Fix *MARK length check in UTF mode ( it was checking characters, not code
Revision: 578
          http://www.exim.org/viewvc/pcre2?view=rev&revision=578
Author:   ph10
Date:     2016-10-26 17:59:22 +0100 (Wed, 26 Oct 2016)
Log Message:
-----------
Fix *MARK length check in UTF mode (it was checking characters, not code 
units).


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/doc/pcre2limits.3
    code/trunk/src/pcre2_compile.c
    code/trunk/testdata/testinput10
    code/trunk/testdata/testinput9
    code/trunk/testdata/testoutput10
    code/trunk/testdata/testoutput9


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2016-10-23 17:05:09 UTC (rev 577)
+++ code/trunk/ChangeLog    2016-10-26 16:59:22 UTC (rev 578)
@@ -40,6 +40,15 @@
 given only for a callout at the end of the pattern. Automatic callouts are no 
 longer inserted before and after explicit callouts in the pattern.


+Some bugs in the refactored code were subsequently fixed before release:
+
+  (a) An overall recursion such as (?0) inside a lookbehind assertion was not
+      being diagnosed as an error.
+
+  (b) In utf mode, the length of a *MARK (or other verb) name was being checked
+      in characters instead of code units, which could lead to bad code being 
+      compiled, leading to unpredictable behaviour. 
+
 4. Back references are now permitted in lookbehind assertions when there are 
 no duplicated group numbers (that is, (?| has not been used), and, if the 
 reference is by name, there is only one group of that name. The referenced
@@ -96,10 +105,7 @@
 16. The "offset" modifier in pcre2test was not being ignored (as documented)
 when the POSIX API was in use.


-17. An overall recursion such as (?0) inside a lookbehind assertion was not
-being diagnosed as an error.

-
Version 10.22 29-July-2016
--------------------------


Modified: code/trunk/doc/pcre2limits.3
===================================================================
--- code/trunk/doc/pcre2limits.3    2016-10-23 17:05:09 UTC (rev 577)
+++ code/trunk/doc/pcre2limits.3    2016-10-26 16:59:22 UTC (rev 578)
@@ -1,4 +1,4 @@
-.TH PCRE2LIMITS 3 "29 September 2016" "PCRE2 10.23"
+.TH PCRE2LIMITS 3 "26 October 2016" "PCRE2 10.23"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "SIZE AND OTHER LIMITATIONS"
@@ -55,7 +55,8 @@
 maximum number of named subpatterns is 10000.
 .P
 The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
-is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit libraries.
+is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
+32-bit libraries.
 .P
 The maximum length of a string argument to a callout is the largest number a 
 32-bit unsigned integer can hold.
@@ -75,6 +76,6 @@
 .rs
 .sp
 .nf
-Last updated: 29 September 2016
+Last updated: 26 October 2016
 Copyright (c) 1997-2016 University of Cambridge.
 .fi


Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c    2016-10-23 17:05:09 UTC (rev 577)
+++ code/trunk/src/pcre2_compile.c    2016-10-26 16:59:22 UTC (rev 578)
@@ -2161,6 +2161,7 @@
 BOOL okquantifier = FALSE;
 PCRE2_SPTR name;
 PCRE2_SPTR ptrend = cb->end_pattern;
+PCRE2_SPTR verbnamestart = NULL;    /* Value avoids compiler warning */
 named_group *ng;
 nest_save *top_nest = NULL;
 nest_save *end_nests = (nest_save *)(cb->start_workspace + cb->workspace_size);
@@ -2248,8 +2249,10 @@


       case CHAR_RIGHT_PARENTHESIS:
       inverbname = FALSE;
+      /* This is the length in characters */
       verbnamelength = (PCRE2_SIZE)(parsed_pattern - verblengthptr - 1);
-      if (verbnamelength > MAX_MARK)
+      /* But the limit on the length is in code units */
+      if (ptr - verbnamestart - 1 > MAX_MARK)
         {
         ptr--;
         errorcode = ERR76;
@@ -3149,6 +3152,7 @@
           *parsed_pattern++ = verbs[i].meta +
             ((verbs[i].meta != META_MARK)? 0x00010000u:0);
           verblengthptr = parsed_pattern++;
+          verbnamestart = ptr;
           inverbname = TRUE;
           }
         else  /* No verb "name" argument */
@@ -8503,7 +8507,7 @@
       if (META_CODE(*gptr) == META_BIGVALUE) gptr++;
         else if (*gptr == (META_CAPTURE | group)) break;
       }
-     
+
     gptrend = parsed_skip(gptr, PSKIP_KET);
     if (pptr > gptr && pptr < gptrend) goto ISNOTFIXED;  /* Local recursion */
     for (r = recurses; r != NULL; r = r->prev) if (r->groupptr == gptr) break;
@@ -8862,7 +8866,7 @@
   *errorptr = ERR16;
   return NULL;
   }
-
+  
 /* Check that all undefined public option bits are zero. */


if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)

Modified: code/trunk/testdata/testinput10
===================================================================
--- code/trunk/testdata/testinput10    2016-10-23 17:05:09 UTC (rev 577)
+++ code/trunk/testdata/testinput10    2016-10-26 16:59:22 UTC (rev 578)
@@ -454,4 +454,6 @@
 \= Expect no match
     123     


+/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf
+
# End of testinput10

Modified: code/trunk/testdata/testinput9
===================================================================
--- code/trunk/testdata/testinput9    2016-10-23 17:05:09 UTC (rev 577)
+++ code/trunk/testdata/testinput9    2016-10-26 16:59:22 UTC (rev 578)
@@ -258,4 +258,6 @@


/(*MARK:a\x{100}b)z/alt_verbnames

+/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/
+
# End of testinput9

Modified: code/trunk/testdata/testoutput10
===================================================================
--- code/trunk/testdata/testoutput10    2016-10-23 17:05:09 UTC (rev 577)
+++ code/trunk/testdata/testoutput10    2016-10-26 16:59:22 UTC (rev 578)
@@ -1564,4 +1564,7 @@
     123     
 No match


+/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf
+Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
+
# End of testinput10

Modified: code/trunk/testdata/testoutput9
===================================================================
--- code/trunk/testdata/testoutput9    2016-10-23 17:05:09 UTC (rev 577)
+++ code/trunk/testdata/testoutput9    2016-10-26 16:59:22 UTC (rev 578)
@@ -364,4 +364,7 @@
 /(*MARK:a\x{100}b)z/alt_verbnames 
 Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large


+/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/
+Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
+
# End of testinput9