Revision: 995
http://www.exim.org/viewvc/pcre2?view=rev&revision=995
Author: ph10
Date: 2018-09-02 17:53:29 +0100 (Sun, 02 Sep 2018)
Log Message:
-----------
Fix anchoring bug in conditionals with only one branch.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_compile.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2018-09-02 16:03:27 UTC (rev 994)
+++ code/trunk/ChangeLog 2018-09-02 16:53:29 UTC (rev 995)
@@ -174,7 +174,12 @@
from distribution tarballs, owing to a typo in Makefile.am which had
testoutput8-16-3 twice. Now fixed.
+39. If the only branch in a conditional subpattern was anchored, the whole
+subpattern was treated as anchored, when it should not have been, since the
+assumed empty second branch cannot be anchored. Demonstrated by test patterns
+such as /(?(1)^())b/ or /(?(?=^))b/.
+
Version 10.31 12-February-2018
------------------------------
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2018-09-02 16:03:27 UTC (rev 994)
+++ code/trunk/src/pcre2_compile.c 2018-09-02 16:53:29 UTC (rev 995)
@@ -1454,8 +1454,8 @@
/* \N{U+ can be handled by the \x{ code. However, this construction is
not valid in EBCDIC environments because it specifies a Unicode
character, not a codepoint in the local code. For example \N{U+0041}
- must be "A" in all environments. Also, in Perl, \N{U+ forces Unicode
- casing semantics for the entire pattern, so allow it only in UTF (i.e.
+ must be "A" in all environments. Also, in Perl, \N{U+ forces Unicode
+ casing semantics for the entire pattern, so allow it only in UTF (i.e.
Unicode) mode. */
if (ptrend - p > 1 && *p == CHAR_U && p[1] == CHAR_PLUS)
@@ -1464,12 +1464,12 @@
*errorcodeptr = ERR93;
#else
if (utf)
- {
+ {
ptr = p + 1;
escape = 0; /* Not a fancy escape after all */
goto COME_FROM_NU;
}
- else *errorcodeptr = ERR93;
+ else *errorcodeptr = ERR93;
#endif
}
@@ -7864,10 +7864,11 @@
if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE)) return FALSE;
}
- /* Condition */
+ /* Condition. If there is no second branch, it can't be anchored. */
else if (op == OP_COND)
{
+ if (scode[GET(scode,1)] != OP_ALT) return FALSE;
if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
return FALSE;
}
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2018-09-02 16:03:27 UTC (rev 994)
+++ code/trunk/testdata/testinput2 2018-09-02 16:53:29 UTC (rev 995)
@@ -5459,4 +5459,19 @@
/(?x-i-i)/
+/(?(?=^))b/I
+ abc
+
+/(?(?=^)|)b/I
+ abc
+
+/(?(?=^)|^)b/I
+ bbc
+\= Expect no match
+ abc
+
+/(?(1)^|^())/I
+
+/(?(1)^())b/I
+
# End of testinput2
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2018-09-02 16:03:27 UTC (rev 994)
+++ code/trunk/testdata/testoutput2 2018-09-02 16:53:29 UTC (rev 995)
@@ -16631,6 +16631,46 @@
/(?x-i-i)/
Failed: error 194 at offset 5: invalid hyphen in option setting
+/(?(?=^))b/I
+Capturing subpattern count = 0
+Last code unit = 'b'
+Subject length lower bound = 1
+ abc
+ 0: b
+
+/(?(?=^)|)b/I
+Capturing subpattern count = 0
+First code unit = 'b'
+Subject length lower bound = 1
+ abc
+ 0: b
+
+/(?(?=^)|^)b/I
+Capturing subpattern count = 0
+Compile options: <none>
+Overall options: anchored
+First code unit = 'b'
+Subject length lower bound = 1
+ bbc
+ 0: b
+\= Expect no match
+ abc
+No match
+
+/(?(1)^|^())/I
+Capturing subpattern count = 1
+Max back reference = 1
+May match empty string
+Compile options: <none>
+Overall options: anchored
+Subject length lower bound = 0
+
+/(?(1)^())b/I
+Capturing subpattern count = 1
+Max back reference = 1
+Last code unit = 'b'
+Subject length lower bound = 1
+
# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data