Revision: 979
http://www.exim.org/viewvc/pcre2?view=rev&revision=979
Author: ph10
Date: 2018-08-04 09:20:18 +0100 (Sat, 04 Aug 2018)
Log Message:
-----------
Fix dynamic options changing bug.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_compile.c
code/trunk/testdata/testinput1
code/trunk/testdata/testoutput1
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2018-08-03 16:56:54 UTC (rev 978)
+++ code/trunk/ChangeLog 2018-08-04 08:20:18 UTC (rev 979)
@@ -140,6 +140,13 @@
Now, when Unicode support is compiled, PCRE2_EXTENDED also discards U+0085,
U+200E, U+200F, U+2028, and U+2029, which are additional characters defined by
Unicode as "Pattern White Space". This makes PCRE2 compatible with Perl.
+
+32. In certain circumstances, option settings within patterns were not being
+correctly processed. For example, the pattern /((?i)A)(?m)B/ incorrectly
+matched "ab". (The (?m) setting lost the fact that (?i) should be reset at the
+end of its group during the parse process, but without another setting such as
+(?m) the compile phase got it right.) This bug was introduced by the
+refactoring in release 10.23.
Version 10.31 12-February-2018
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2018-08-03 16:56:54 UTC (rev 978)
+++ code/trunk/src/pcre2_compile.c 2018-08-04 08:20:18 UTC (rev 979)
@@ -2284,11 +2284,14 @@
#define NSF_RESET 0x0001u
#define NSF_CONDASSERT 0x0002u
-/* Of the options that are changeable within the pattern, these are tracked
-during parsing. The rest are used from META_OPTIONS items when compiling. */
+/* Options that are changeable within the pattern must be tracked during
+parsing. Some (e.g. PCRE2_EXTENDED) are implemented entirely during parsing,
+but all must be tracked so that META_OPTIONS items set the correct values for
+the main compiling phase. */
-#define PARSE_TRACKED_OPTIONS \
- (PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_NO_AUTO_CAPTURE)
+#define PARSE_TRACKED_OPTIONS (PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_DUPNAMES| \
+ PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \
+ PCRE2_UNGREEDY)
/* States used for analyzing ranges in character classes. The two OK values
must be last. */
@@ -2468,16 +2471,16 @@
/* EITHER: not both options set */
((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) !=
(PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) ||
-#ifdef SUPPORT_UNICODE
+#ifdef SUPPORT_UNICODE
/* OR: character > 255 AND not Unicode Pattern White Space */
(c > 255 && (c|1) != 0x200f && (c|1) != 0x2029) ||
-#endif
+#endif
/* OR: not a # comment or isspace() white space */
(c < 256 && c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0
#ifdef SUPPORT_UNICODE
/* and not CHAR_NEL when Unicode is supported */
&& c != CHAR_NEL
-#endif
+#endif
)))
{
PCRE2_SIZE verbnamelength;
@@ -2562,16 +2565,16 @@
character, not a code unit, so we must not use MAX_255 to test its size
because MAX_255 tests code units and is assumed TRUE in 8-bit mode. The
whitespace characters are those designated as "Pattern White Space" by
- Unicode, which are the isspace() characters plus CHAR_NEL (newline), which is
- U+0085 in Unicode, plus U+200E, U+200F, U+2028, and U+2029. These are a
+ Unicode, which are the isspace() characters plus CHAR_NEL (newline), which is
+ U+0085 in Unicode, plus U+200E, U+200F, U+2028, and U+2029. These are a
subset of space characters that match \h and \v. */
if ((options & PCRE2_EXTENDED) != 0)
{
if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue;
-#ifdef SUPPORT_UNICODE
+#ifdef SUPPORT_UNICODE
if (c == CHAR_NEL || (c|1) == 0x200f || (c|1) == 0x2029) continue;
-#endif
+#endif
if (c == CHAR_NUMBER_SIGN)
{
while (ptr < ptrend)
@@ -3590,6 +3593,8 @@
else
{
BOOL hyphenok = TRUE;
+ uint32_t oldoptions = options;
+
top_nest->reset_group = 0;
top_nest->max_group = 0;
set = unset = 0;
@@ -3602,7 +3607,7 @@
options &= ~(PCRE2_CASELESS|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE|
PCRE2_DOTALL|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE);
hyphenok = FALSE;
- ptr++;
+ ptr++;
}
while (ptr < ptrend && *ptr != CHAR_RIGHT_PARENTHESIS &&
@@ -3618,7 +3623,7 @@
goto FAILED;
}
optset = &unset;
- hyphenok = FALSE;
+ hyphenok = FALSE;
break;
case CHAR_J: /* Record that it changed in the external options */
@@ -3677,10 +3682,9 @@
}
else *parsed_pattern++ = META_NOCAPTURE;
- /* If nothing changed, no need to record. The check of hyphenok catches
- the (?^) case. */
+ /* If nothing changed, no need to record. */
- if (set != 0 || unset != 0 || !hyphenok)
+ if (options != oldoptions)
{
*parsed_pattern++ = META_OPTIONS;
*parsed_pattern++ = options;
Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1 2018-08-03 16:56:54 UTC (rev 978)
+++ code/trunk/testdata/testinput1 2018-08-04 08:20:18 UTC (rev 979)
@@ -2184,6 +2184,11 @@
Blah blah
blaH blah
+/((?i)blah)\s+(?m)A(?i:\1)/
+ blah ABLAH
+\= Expect no match
+ blah aBLAH
+
/(?>a*)*/
a
aa
Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1 2018-08-03 16:56:54 UTC (rev 978)
+++ code/trunk/testdata/testoutput1 2018-08-04 08:20:18 UTC (rev 979)
@@ -3346,6 +3346,14 @@
0: blaH blah
1: blaH
+/((?i)blah)\s+(?m)A(?i:\1)/
+ blah ABLAH
+ 0: blah ABLAH
+ 1: blah
+\= Expect no match
+ blah aBLAH
+No match
+
/(?>a*)*/
a
0: a