Revision: 381
http://vcs.pcre.org/viewvc?view=rev&revision=381
Author: ph10
Date: 2009-03-03 16:08:23 +0000 (Tue, 03 Mar 2009)
Log Message:
-----------
Fix bug with (?(?=.*b)b|^) thinking it must match at start of line; also fix
bug causing a crash when auto-callout is used with a conditional assertion.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_compile.c
code/trunk/pcre_exec.c
code/trunk/testdata/testinput1
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput1
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2009-03-03 12:32:47 UTC (rev 380)
+++ code/trunk/ChangeLog 2009-03-03 16:08:23 UTC (rev 381)
@@ -39,7 +39,11 @@
8. The pattern (?(?=.*b)b|^) was incorrectly compiled as "match must be at
start or after a newline", because the conditional assertion was not being
- skipped when checking for this condition.
+ correctly handled. The rule now is that both the assertion and what follows
+ in the first alternative must satisfy the test.
+
+9. If auto-callout was enabled in a pattern with a conditional group, PCRE
+ could crash during matching.
Version 7.8 05-Sep-08
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2009-03-03 12:32:47 UTC (rev 380)
+++ code/trunk/pcre_compile.c 2009-03-03 16:08:23 UTC (rev 381)
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2008 University of Cambridge
+ Copyright (c) 1997-2009 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -5811,28 +5811,28 @@
NULL, 0, FALSE);
register int op = *scode;
- /* If we are at the start of a conditional group, skip over the condition.
- before inspecting the first opcode after the condition. */
+ /* If we are at the start of a conditional assertion group, *both* the
+ conditional assertion *and* what follows the condition must satisfy the test
+ for start of line. Other kinds of condition fail. Note that there may be an
+ auto-callout at the start of a condition. */
if (op == OP_COND)
{
scode += 1 + LINK_SIZE;
+ if (*scode == OP_CALLOUT) scode += _pcre_OP_lengths[OP_CALLOUT];
switch (*scode)
{
case OP_CREF:
case OP_RREF:
- scode += 3;
- break;
-
case OP_DEF:
- scode += 1;
- break;
+ return FALSE;
default: /* Assertion */
+ if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
do scode += GET(scode, 1); while (*scode == OP_ALT);
+ scode += 1 + LINK_SIZE;
break;
}
-
scode = first_significant_code(scode, NULL, 0, FALSE);
op = *scode;
}
Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c 2009-03-03 12:32:47 UTC (rev 380)
+++ code/trunk/pcre_exec.c 2009-03-03 16:08:23 UTC (rev 381)
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2008 University of Cambridge
+ Copyright (c) 1997-2009 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -787,6 +787,34 @@
case OP_COND:
case OP_SCOND:
+ /* Because of the way auto-callout works during compile, a callout item is
+ inserted between OP_COND and an assertion condition. */
+
+ if (ecode[LINK_SIZE+1] == OP_CALLOUT)
+ {
+ if (pcre_callout != NULL)
+ {
+ pcre_callout_block cb;
+ cb.version = 1; /* Version 1 of the callout block */
+ cb.callout_number = ecode[LINK_SIZE+2];
+ cb.offset_vector = md->offset_vector;
+ cb.subject = (PCRE_SPTR)md->start_subject;
+ cb.subject_length = md->end_subject - md->start_subject;
+ cb.start_match = mstart - md->start_subject;
+ cb.current_position = eptr - md->start_subject;
+ cb.pattern_position = GET(ecode, LINK_SIZE + 3);
+ cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
+ cb.capture_top = offset_top/2;
+ cb.capture_last = md->capture_last;
+ cb.callout_data = md->callout_data;
+ if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
+ if (rrc < 0) RRETURN(rrc);
+ }
+ ecode += _pcre_OP_lengths[OP_CALLOUT];
+ }
+
+ /* Now see what the actual condition is */
+
if (ecode[LINK_SIZE+1] == OP_RREF) /* Recursion test */
{
offset = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1 2009-03-03 12:32:47 UTC (rev 380)
+++ code/trunk/testdata/testinput1 2009-03-03 16:08:23 UTC (rev 381)
@@ -4055,4 +4055,10 @@
adc
abc
+/(?(?=b).*b|^d)/
+ abc
+
+/(?(?=.*b).*b|^d)/
+ abc
+
/ End of testinput1 /
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2009-03-03 12:32:47 UTC (rev 380)
+++ code/trunk/testdata/testinput2 2009-03-03 16:08:23 UTC (rev 381)
@@ -2726,4 +2726,12 @@
/(abc|pqr|123){0}[xyz]/SI
+/(?(?=.*b)b|^)/CI
+ adc
+ abc
+
+/(?(?=b).*b|^d)/I
+
+/(?(?=.*b).*b|^d)/I
+
/ End of testinput2 /
Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1 2009-03-03 12:32:47 UTC (rev 380)
+++ code/trunk/testdata/testoutput1 2009-03-03 16:08:23 UTC (rev 381)
@@ -6633,4 +6633,12 @@
abc
0: b
+/(?(?=b).*b|^d)/
+ abc
+ 0: b
+
+/(?(?=.*b).*b|^d)/
+ abc
+ 0: ab
+
/ End of testinput1 /
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2009-03-03 12:32:47 UTC (rev 380)
+++ code/trunk/testdata/testoutput2 2009-03-03 16:08:23 UTC (rev 381)
@@ -9638,4 +9638,56 @@
No need char
Starting byte set: x y z
+/(?(?=.*b)b|^)/CI
+Capturing subpattern count = 0
+Partial matching not supported
+Options:
+No first char
+No need char
+ adc
+--->adc
+ +0 ^ (?(?=.*b)b|^)
+ +2 ^ (?=.*b)
+ +5 ^ .*
+ +7 ^ ^ b
+ +7 ^ ^ b
+ +7 ^^ b
+ +7 ^ b
+ 0:
+ abc
+--->abc
+ +0 ^ (?(?=.*b)b|^)
+ +2 ^ (?=.*b)
+ +5 ^ .*
+ +7 ^ ^ b
+ +7 ^ ^ b
+ +7 ^^ b
+ +8 ^ ^ )
+ +9 ^ b
+ +0 ^ (?(?=.*b)b|^)
+ +2 ^ (?=.*b)
+ +5 ^ .*
+ +7 ^ ^ b
+ +7 ^^ b
+ +7 ^ b
+ +8 ^^ )
+ +9 ^ b
++10 ^^ |
++13 ^^
+ 0: b
+
+/(?(?=b).*b|^d)/I
+Capturing subpattern count = 0
+Partial matching not supported
+No options
+No first char
+No need char
+
+/(?(?=.*b).*b|^d)/I
+Capturing subpattern count = 0
+Partial matching not supported
+No options
+First char at start or follows newline
+No need char
+
/ End of testinput2 /