Revision: 1523
http://vcs.pcre.org/viewvc?view=rev&revision=1523
Author: ph10
Date: 2015-02-08 18:00:45 +0000 (Sun, 08 Feb 2015)
Log Message:
-----------
Disallow quantification of assertions used as conditions.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_compile.c
code/trunk/pcre_internal.h
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-02-08 17:02:05 UTC (rev 1522)
+++ code/trunk/ChangeLog 2015-02-08 18:00:45 UTC (rev 1523)
@@ -53,7 +53,15 @@
The infinite loop is now broken (with the minimum length unset, that is,
zero).
+11. If an assertion that was used as a condition was quantified with a minimum
+ of zero, matching went wrong. In particular, if the whole group had
+ unlimited repetition and could match an empty string, a segfault was
+ likely. The pattern (?(?=0)?)+ is an example that caused this. Perl allows
+ assertions to be quantified, but not if they are being used as conditions,
+ so the above pattern is faulted by Perl. PCRE has now been changed so that
+ it also rejects such patterns.
+
Version 8.36 26-September-2014
------------------------------
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2015-02-08 17:02:05 UTC (rev 1522)
+++ code/trunk/pcre_compile.c 2015-02-08 18:00:45 UTC (rev 1523)
@@ -3639,13 +3639,13 @@
c = *code;
/* When a pattern with bad UTF-8 encoding is compiled with NO_UTF_CHECK,
- it may compile without complaining, but may get into a loop here if the code
+ it may compile without complaining, but may get into a loop here if the code
pointer points to a bad value. This is, of course a documentated possibility,
- when NO_UTF_CHECK is set, so it isn't a bug, but we can detect this case and
+ when NO_UTF_CHECK is set, so it isn't a bug, but we can detect this case and
just give up on this optimization. */
if (c >= OP_TABLE_LENGTH) return;
-
+
if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
{
c -= get_repeat_base(c) - OP_STAR;
@@ -6657,7 +6657,10 @@
(tempptr[2] == CHAR_EQUALS_SIGN ||
tempptr[2] == CHAR_EXCLAMATION_MARK ||
tempptr[2] == CHAR_LESS_THAN_SIGN))
+ {
+ cd->iscondassert = TRUE;
break;
+ }
/* Other conditions use OP_CREF/OP_DNCREF/OP_RREF/OP_DNRREF, and all
need to skip at least 1+IMM2_SIZE bytes at the start of the group. */
@@ -6770,7 +6773,7 @@
goto FAILED;
}
PUT2(code, 2+LINK_SIZE, recno);
- if (recno > cd->top_backref) cd->top_backref = recno;
+ if (recno > cd->top_backref) cd->top_backref = recno;
break;
}
@@ -6793,7 +6796,7 @@
int offset = i++;
int count = 1;
recno = GET2(slot, 0); /* Number from first found */
- if (recno > cd->top_backref) cd->top_backref = recno;
+ if (recno > cd->top_backref) cd->top_backref = recno;
for (; i < cd->names_found; i++)
{
slot += cd->name_entry_size;
@@ -7512,12 +7515,22 @@
goto FAILED;
}
- /* Assertions used not to be repeatable, but this was changed for Perl
- compatibility, so all kinds can now be repeated. We copy code into a
+ /* All assertions used not to be repeatable, but this was changed for Perl
+ compatibility. All kinds can now be repeated except for assertions that are
+ conditions (Perl also forbids these to be repeated). We copy code into a
non-register variable (tempcode) in order to be able to pass its address
- because some compilers complain otherwise. */
+ because some compilers complain otherwise. At the start of a conditional
+ group whose condition is an assertion, cb->iscondassert is set. We unset it
+ here so as to allow assertions later in the group to be quantified. */
- previous = code; /* For handling repetition */
+ if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT &&
+ cd->iscondassert)
+ {
+ previous = NULL;
+ cd->iscondassert = FALSE;
+ }
+ else previous = code;
+
*code = bravalue;
tempcode = code;
tempreqvary = cd->req_varyopt; /* Save value before bracket */
@@ -9118,6 +9131,7 @@
cd->namedrefcount = 0;
cd->start_code = cworkspace;
cd->hwm = cworkspace;
+cd->iscondassert = FALSE;
cd->start_workspace = cworkspace;
cd->workspace_size = COMPILE_WORK_SIZE;
cd->named_groups = named_groups;
@@ -9213,6 +9227,7 @@
codestart = cd->name_table + re->name_entry_size * re->name_count;
cd->start_code = codestart;
cd->hwm = (pcre_uchar *)(cd->start_workspace);
+cd->iscondassert = FALSE;
cd->req_varyopt = 0;
cd->had_accept = FALSE;
cd->had_pruneorskip = FALSE;
Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h 2015-02-08 17:02:05 UTC (rev 1522)
+++ code/trunk/pcre_internal.h 2015-02-08 18:00:45 UTC (rev 1523)
@@ -2446,6 +2446,7 @@
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
BOOL check_lookbehind; /* Lookbehinds need later checking */
BOOL dupnames; /* Duplicate names exist */
+ BOOL iscondassert; /* Next assert is a condition */
int nltype; /* Newline type */
int nllen; /* Newline string length */
pcre_uchar nl[4]; /* Newline string when fixed length */
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2015-02-08 17:02:05 UTC (rev 1522)
+++ code/trunk/testdata/testinput2 2015-02-08 18:00:45 UTC (rev 1523)
@@ -4078,10 +4078,10 @@
/\x{whatever}/
-"((?=(?(?=(?(?=(?(?=())))*)))))"
+"((?=(?(?=(?(?=(?(?=()))))))))"
a
-"(?(?=)?==)(((((((((?=)))))))))"
+"(?(?=)==)(((((((((?=)))))))))"
a
/^(?:(a)|b)(?(1)A|B)/I
@@ -4102,4 +4102,15 @@
/(?<N111>(?J)(?<N111>1(111111)11|)1|1|)(?(<N111>)1)/
+/(?(?=0)?)+/
+
+/(?(?=0)(?=00)?00765)/
+ 00765
+
+/(?(?=0)(?=00)?00765|(?!3).56)/
+ 00765
+ 456
+ ** Failers
+ 356
+
/-- End of testinput2 --/
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2015-02-08 17:02:05 UTC (rev 1522)
+++ code/trunk/testdata/testoutput2 2015-02-08 18:00:45 UTC (rev 1523)
@@ -14224,13 +14224,13 @@
/\x{whatever}/
Failed: non-hex character in \x{} (closing brace missing?) at offset 3
-"((?=(?(?=(?(?=(?(?=())))*)))))"
+"((?=(?(?=(?(?=(?(?=()))))))))"
a
0:
1:
2:
-"(?(?=)?==)(((((((((?=)))))))))"
+"(?(?=)==)(((((((((?=)))))))))"
a
No match
@@ -14275,4 +14275,21 @@
/(?<N111>(?J)(?<N111>1(111111)11|)1|1|)(?(<N111>)1)/
+/(?(?=0)?)+/
+Failed: nothing to repeat at offset 7
+
+/(?(?=0)(?=00)?00765)/
+ 00765
+ 0: 00765
+
+/(?(?=0)(?=00)?00765|(?!3).56)/
+ 00765
+ 0: 00765
+ 456
+ 0: 456
+ ** Failers
+No match
+ 356
+No match
+
/-- End of testinput2 --/