Revision: 578
http://www.exim.org/viewvc/pcre2?view=rev&revision=578
Author: ph10
Date: 2016-10-26 17:59:22 +0100 (Wed, 26 Oct 2016)
Log Message:
-----------
Fix *MARK length check in UTF mode (it was checking characters, not code
units).
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/doc/pcre2limits.3
code/trunk/src/pcre2_compile.c
code/trunk/testdata/testinput10
code/trunk/testdata/testinput9
code/trunk/testdata/testoutput10
code/trunk/testdata/testoutput9
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2016-10-23 17:05:09 UTC (rev 577)
+++ code/trunk/ChangeLog 2016-10-26 16:59:22 UTC (rev 578)
@@ -40,6 +40,15 @@
given only for a callout at the end of the pattern. Automatic callouts are no
longer inserted before and after explicit callouts in the pattern.
+Some bugs in the refactored code were subsequently fixed before release:
+
+ (a) An overall recursion such as (?0) inside a lookbehind assertion was not
+ being diagnosed as an error.
+
+ (b) In utf mode, the length of a *MARK (or other verb) name was being checked
+ in characters instead of code units, which could lead to bad code being
+ compiled, leading to unpredictable behaviour.
+
4. Back references are now permitted in lookbehind assertions when there are
no duplicated group numbers (that is, (?| has not been used), and, if the
reference is by name, there is only one group of that name. The referenced
@@ -96,10 +105,7 @@
16. The "offset" modifier in pcre2test was not being ignored (as documented)
when the POSIX API was in use.
-17. An overall recursion such as (?0) inside a lookbehind assertion was not
-being diagnosed as an error.
-
Version 10.22 29-July-2016
--------------------------
Modified: code/trunk/doc/pcre2limits.3
===================================================================
--- code/trunk/doc/pcre2limits.3 2016-10-23 17:05:09 UTC (rev 577)
+++ code/trunk/doc/pcre2limits.3 2016-10-26 16:59:22 UTC (rev 578)
@@ -1,4 +1,4 @@
-.TH PCRE2LIMITS 3 "29 September 2016" "PCRE2 10.23"
+.TH PCRE2LIMITS 3 "26 October 2016" "PCRE2 10.23"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "SIZE AND OTHER LIMITATIONS"
@@ -55,7 +55,8 @@
maximum number of named subpatterns is 10000.
.P
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
-is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit libraries.
+is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
+32-bit libraries.
.P
The maximum length of a string argument to a callout is the largest number a
32-bit unsigned integer can hold.
@@ -75,6 +76,6 @@
.rs
.sp
.nf
-Last updated: 29 September 2016
+Last updated: 26 October 2016
Copyright (c) 1997-2016 University of Cambridge.
.fi
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2016-10-23 17:05:09 UTC (rev 577)
+++ code/trunk/src/pcre2_compile.c 2016-10-26 16:59:22 UTC (rev 578)
@@ -2161,6 +2161,7 @@
BOOL okquantifier = FALSE;
PCRE2_SPTR name;
PCRE2_SPTR ptrend = cb->end_pattern;
+PCRE2_SPTR verbnamestart = NULL; /* Value avoids compiler warning */
named_group *ng;
nest_save *top_nest = NULL;
nest_save *end_nests = (nest_save *)(cb->start_workspace + cb->workspace_size);
@@ -2248,8 +2249,10 @@
case CHAR_RIGHT_PARENTHESIS:
inverbname = FALSE;
+ /* This is the length in characters */
verbnamelength = (PCRE2_SIZE)(parsed_pattern - verblengthptr - 1);
- if (verbnamelength > MAX_MARK)
+ /* But the limit on the length is in code units */
+ if (ptr - verbnamestart - 1 > MAX_MARK)
{
ptr--;
errorcode = ERR76;
@@ -3149,6 +3152,7 @@
*parsed_pattern++ = verbs[i].meta +
((verbs[i].meta != META_MARK)? 0x00010000u:0);
verblengthptr = parsed_pattern++;
+ verbnamestart = ptr;
inverbname = TRUE;
}
else /* No verb "name" argument */
@@ -8503,7 +8507,7 @@
if (META_CODE(*gptr) == META_BIGVALUE) gptr++;
else if (*gptr == (META_CAPTURE | group)) break;
}
-
+
gptrend = parsed_skip(gptr, PSKIP_KET);
if (pptr > gptr && pptr < gptrend) goto ISNOTFIXED; /* Local recursion */
for (r = recurses; r != NULL; r = r->prev) if (r->groupptr == gptr) break;
@@ -8862,7 +8866,7 @@
*errorptr = ERR16;
return NULL;
}
-
+
/* Check that all undefined public option bits are zero. */
if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
Modified: code/trunk/testdata/testinput10
===================================================================
--- code/trunk/testdata/testinput10 2016-10-23 17:05:09 UTC (rev 577)
+++ code/trunk/testdata/testinput10 2016-10-26 16:59:22 UTC (rev 578)
@@ -454,4 +454,6 @@
\= Expect no match
123
+/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf
+
# End of testinput10
Modified: code/trunk/testdata/testinput9
===================================================================
--- code/trunk/testdata/testinput9 2016-10-23 17:05:09 UTC (rev 577)
+++ code/trunk/testdata/testinput9 2016-10-26 16:59:22 UTC (rev 578)
@@ -258,4 +258,6 @@
/(*MARK:a\x{100}b)z/alt_verbnames
+/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/
+
# End of testinput9
Modified: code/trunk/testdata/testoutput10
===================================================================
--- code/trunk/testdata/testoutput10 2016-10-23 17:05:09 UTC (rev 577)
+++ code/trunk/testdata/testoutput10 2016-10-26 16:59:22 UTC (rev 578)
@@ -1564,4 +1564,7 @@
123
No match
+/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf
+Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
+
# End of testinput10
Modified: code/trunk/testdata/testoutput9
===================================================================
--- code/trunk/testdata/testoutput9 2016-10-23 17:05:09 UTC (rev 577)
+++ code/trunk/testdata/testoutput9 2016-10-26 16:59:22 UTC (rev 578)
@@ -364,4 +364,7 @@
/(*MARK:a\x{100}b)z/alt_verbnames
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
+/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/
+Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
+
# End of testinput9