Revision: 380
http://vcs.pcre.org/viewvc?view=rev&revision=380
Author: ph10
Date: 2009-03-03 12:32:47 +0000 (Tue, 03 Mar 2009)
Log Message:
-----------
The pattern (?(?=.*b)b|^) was incorrectly processed as requiring a match at the
start of the subject or after a newline.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_compile.c
code/trunk/testdata/testinput1
code/trunk/testdata/testoutput1
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2009-03-02 20:30:05 UTC (rev 379)
+++ code/trunk/ChangeLog 2009-03-03 12:32:47 UTC (rev 380)
@@ -36,6 +36,10 @@
doesn't make sense to accept an empty string match in pcregrep, so I have
locked it out (using PCRE's PCRE_NOTEMPTY option). By experiment, this
seems to be how GNU grep behaves.
+
+8. The pattern (?(?=.*b)b|^) was incorrectly compiled as "match must be at
+ start or after a newline", because the conditional assertion was not being
+ skipped when checking for this condition.
Version 7.8 05-Sep-08
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2009-03-02 20:30:05 UTC (rev 379)
+++ code/trunk/pcre_compile.c 2009-03-03 12:32:47 UTC (rev 380)
@@ -5810,7 +5810,33 @@
const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],
NULL, 0, FALSE);
register int op = *scode;
+
+ /* If we are at the start of a conditional group, skip over the condition.
+ before inspecting the first opcode after the condition. */
+ if (op == OP_COND)
+ {
+ scode += 1 + LINK_SIZE;
+ switch (*scode)
+ {
+ case OP_CREF:
+ case OP_RREF:
+ scode += 3;
+ break;
+
+ case OP_DEF:
+ scode += 1;
+ break;
+
+ default: /* Assertion */
+ do scode += GET(scode, 1); while (*scode == OP_ALT);
+ break;
+ }
+
+ scode = first_significant_code(scode, NULL, 0, FALSE);
+ op = *scode;
+ }
+
/* Non-capturing brackets */
if (op == OP_BRA)
@@ -5829,8 +5855,10 @@
/* Other brackets */
- else if (op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
- { if (!is_startline(scode, bracket_map, backref_map)) return FALSE; }
+ else if (op == OP_ASSERT || op == OP_ONCE)
+ {
+ if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
+ }
/* .* means "start at start or after \n" if it isn't in brackets that
may be referenced. */
Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1 2009-03-02 20:30:05 UTC (rev 379)
+++ code/trunk/testdata/testinput1 2009-03-03 12:32:47 UTC (rev 380)
@@ -4039,4 +4039,20 @@
/.*[op][xyz]/
fooabcfoo
+/(?(?=.*b)b|^)/
+ adc
+ abc
+
+/(?(?=^.*b)b|^)/
+ adc
+ abc
+
+/(?(?=.*b)b|^)*/
+ adc
+ abc
+
+/(?(?=.*b)b|^)+/
+ adc
+ abc
+
/ End of testinput1 /
Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1 2009-03-02 20:30:05 UTC (rev 379)
+++ code/trunk/testdata/testoutput1 2009-03-03 12:32:47 UTC (rev 380)
@@ -6609,4 +6609,28 @@
fooabcfoo
No match
+/(?(?=.*b)b|^)/
+ adc
+ 0:
+ abc
+ 0: b
+
+/(?(?=^.*b)b|^)/
+ adc
+ 0:
+ abc
+No match
+
+/(?(?=.*b)b|^)*/
+ adc
+ 0:
+ abc
+ 0:
+
+/(?(?=.*b)b|^)+/
+ adc
+ 0:
+ abc
+ 0: b
+
/ End of testinput1 /