Revision: 456
http://www.exim.org/viewvc/pcre2?view=rev&revision=456
Author: ph10
Date: 2015-12-03 16:58:31 +0000 (Thu, 03 Dec 2015)
Log Message:
-----------
Fix /x bug when pattern starts with whitespace followed by (?-x).
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_compile.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-12-02 17:39:26 UTC (rev 455)
+++ code/trunk/ChangeLog 2015-12-03 16:58:31 UTC (rev 456)
@@ -365,6 +365,12 @@
(a) Within /x extended #-comments
(b) Within the "name" part of (*MARK) and other *verbs
(c) Within the text argument of a callout
+
+108. If a pattern that was compiled with PCRE2_EXTENDED started with white
+space or a #-type comment that was followed by (?-x), which turns off
+PCRE2_EXTENDED, and there was no subsequent (?x) to turn it on again,
+pcre2_compile() assumed that (?-x) applied to the whole pattern and
+consequently mis-compiled it. This bug was found by the LLVM fuzzer.
Version 10.20 30-June-2015
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2015-12-02 17:39:26 UTC (rev 455)
+++ code/trunk/src/pcre2_compile.c 2015-12-03 16:58:31 UTC (rev 456)
@@ -6862,44 +6862,16 @@
newoptions = (options | set) & (~unset);
/* If the options ended with ')' this is not the start of a nested
- group with option changes, so the options change at this level. If this
- item is right at the start of the pattern, the options can be
- abstracted and made external in the pre-compile phase, and ignored in
- the compile phase. This can be helpful when matching -- for instance in
- caseless checking of required bytes.
+ group with option changes, so the options change at this level. They
+ must also be passed back for use in subsequent branches. Reset the
+ greedy defaults and the case value for firstcu and reqcu. */
- If the code pointer is not (cb->start_code + 1 + LINK_SIZE), we are
- definitely *not* at the start of the pattern because something has been
- compiled. In the pre-compile phase, however, the code pointer can have
- that value after the start, because it gets reset as code is discarded
- during the pre-compile. However, this can happen only at top level - if
- we are within parentheses, the starting BRA will still be present. At
- any parenthesis level, the length value can be used to test if anything
- has been compiled at that level. Thus, a test for both these conditions
- is necessary to ensure we correctly detect the start of the pattern in
- both phases.
-
- If we are not at the pattern start, reset the greedy defaults and the
- case value for firstcu and reqcu. */
-
if (*ptr == CHAR_RIGHT_PARENTHESIS)
{
- if (code == cb->start_code + 1 + LINK_SIZE &&
- (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
- {
- cb->external_options = newoptions;
- }
- else
- {
- greedy_default = ((newoptions & PCRE2_UNGREEDY) != 0);
- greedy_non_default = greedy_default ^ 1;
- req_caseopt = ((newoptions & PCRE2_CASELESS) != 0)? REQ_CASELESS:0;
- }
-
- /* Change options at this level, and pass them back for use
- in subsequent branches. */
-
*optionsptr = options = newoptions;
+ greedy_default = ((newoptions & PCRE2_UNGREEDY) != 0);
+ greedy_non_default = greedy_default ^ 1;
+ req_caseopt = ((newoptions & PCRE2_CASELESS) != 0)? REQ_CASELESS:0;
previous = NULL; /* This item can't be repeated */
continue; /* It is complete */
}
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2015-12-02 17:39:26 UTC (rev 455)
+++ code/trunk/testdata/testinput2 2015-12-03 16:58:31 UTC (rev 456)
@@ -4724,4 +4724,15 @@
# /A(?#X\x00Y)B/
/41 28 3f 23 7b 00 7d 29 42/B,hex
+# Tests for leading comment in extended patterns
+
+/ (?-x):?/extended
+
+/?(?-x):?/extended
+
+/0b 28 3f 2d 78 29 3a/hex,extended
+
+/#comment
+(?-x):?/extended
+
# End of testinput2
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2015-12-02 17:39:26 UTC (rev 455)
+++ code/trunk/testdata/testoutput2 2015-12-03 16:58:31 UTC (rev 456)
@@ -431,8 +431,6 @@
/(?U)<.*>/I
Capturing subpattern count = 0
-Compile options: <none>
-Overall options: ungreedy
First code unit = '<'
Last code unit = '>'
Subject length lower bound = 2
@@ -459,8 +457,6 @@
/(?U)={3,}?/I
Capturing subpattern count = 0
-Compile options: <none>
-Overall options: ungreedy
First code unit = '='
Last code unit = '='
Subject length lower bound = 3
@@ -494,8 +490,6 @@
/(?i)abc/I
Capturing subpattern count = 0
-Compile options: <none>
-Overall options: caseless
First code unit = 'a' (caseless)
Last code unit = 'c' (caseless)
Subject length lower bound = 3
@@ -508,7 +502,7 @@
/(?i)^1234/I
Capturing subpattern count = 0
Compile options: <none>
-Overall options: anchored caseless
+Overall options: anchored
Subject length lower bound = 4
/(^b|(?i)^d)/I
@@ -521,7 +515,7 @@
Capturing subpattern count = 0
May match empty string
Compile options: <none>
-Overall options: anchored dotall
+Overall options: anchored
Subject length lower bound = 0
/[abcd]/I
@@ -531,15 +525,11 @@
/(?i)[abcd]/I
Capturing subpattern count = 0
-Compile options: <none>
-Overall options: caseless
Starting code units: A B C D a b c d
Subject length lower bound = 1
/(?m)[xy]|(b|c)/I
Capturing subpattern count = 1
-Compile options: <none>
-Overall options: multiline
Starting code units: b c x y
Subject length lower bound = 1
@@ -551,8 +541,7 @@
/(?i)(^a|^b)/Im
Capturing subpattern count = 1
-Compile options: multiline
-Overall options: caseless multiline
+Options: multiline
First code unit at start or follows newline
Subject length lower bound = 1
@@ -1153,7 +1142,7 @@
------------------------------------------------------------------
Capturing subpattern count = 1
Compile options: <none>
-Overall options: anchored dotall
+Overall options: anchored
Subject length lower bound = 1
/(?s:.*X|^B)/IB
@@ -2682,8 +2671,7 @@
End
------------------------------------------------------------------
Capturing subpattern count = 0
-Compile options: extended
-Overall options: caseless extended
+Options: extended
First code unit = 'a' (caseless)
Last code unit = 'c' (caseless)
Subject length lower bound = 3
@@ -2697,8 +2685,7 @@
End
------------------------------------------------------------------
Capturing subpattern count = 0
-Compile options: extended
-Overall options: caseless extended
+Options: extended
First code unit = 'a' (caseless)
Last code unit = 'c' (caseless)
Subject length lower bound = 3
@@ -3043,8 +3030,6 @@
End
------------------------------------------------------------------
Capturing subpattern count = 0
-Compile options: <none>
-Overall options: ungreedy
First code unit = 'x'
Last code unit = 'b'
Subject length lower bound = 3
@@ -3427,8 +3412,6 @@
/(?i)[ab]/I
Capturing subpattern count = 0
-Compile options: <none>
-Overall options: caseless
Starting code units: A B a b
Subject length lower bound = 1
@@ -5841,7 +5824,7 @@
A 2
A 3
Compile options: <none>
-Overall options: anchored dupnames
+Overall options: anchored
Duplicate name status changes
Subject length lower bound = 2
a1b\=copy=A
@@ -13734,7 +13717,7 @@
/(*NO_DOTSTAR_ANCHOR)(?s).*\d/info
Capturing subpattern count = 0
Compile options: <none>
-Overall options: dotall no_dotstar_anchor
+Overall options: no_dotstar_anchor
Subject length lower bound = 1
'^(?:(a)|b)(?(1)A|B)'
@@ -15060,4 +15043,15 @@
End
------------------------------------------------------------------
+# Tests for leading comment in extended patterns
+
+/ (?-x):?/extended
+
+/?(?-x):?/extended
+
+/0b 28 3f 2d 78 29 3a/hex,extended
+
+/#comment
+(?-x):?/extended
+
# End of testinput2