Revision: 608
http://vcs.pcre.org/viewvc?view=rev&revision=608
Author: ph10
Date: 2011-06-12 17:25:55 +0100 (Sun, 12 Jun 2011)
Log Message:
-----------
Fix problems with capturing parens and *ACCEPT with recursion.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_exec.c
code/trunk/testdata/testinput11
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput11
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2011-06-12 15:09:49 UTC (rev 607)
+++ code/trunk/ChangeLog 2011-06-12 16:25:55 UTC (rev 608)
@@ -81,6 +81,15 @@
and -d etc. It's slightly incompatible, but I'm hoping nobody is still
using it. It makes it easier to run collection of tests with study enabled,
and thereby test pcre_study() more easily.
+
+15. When (*ACCEPT) was used in a subpattern that was called recursively, the
+ restoration of the capturing data to the outer values was not happening
+ correctly.
+
+16. If a recursively called subpattern ended with (*ACCEPT) and matched an
+ empty string, and PCRE_NOTEMPTY was set, pcre_exec() thought the whole
+ pattern had matched an empty string, and so incorrectly returned a no
+ match.
Version 8.12 15-Jan-2011
Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c 2011-06-12 15:09:49 UTC (rev 607)
+++ code/trunk/pcre_exec.c 2011-06-12 16:25:55 UTC (rev 608)
@@ -1305,22 +1305,24 @@
break;
- /* End of the pattern, either real or forced. If we are in a top-level
- recursion, we should restore the offsets appropriately and continue from
- after the call. */
+ /* End of the pattern, either real or forced. If we are in a recursion, we
+ should restore the offsets appropriately, and if it's a top-level
+ recursion, continue from after the call. */
case OP_ACCEPT:
case OP_END:
- if (md->recursive != NULL && md->recursive->group_num == 0)
+ if (md->recursive != NULL)
{
recursion_info *rec = md->recursive;
- DPRINTF(("End of pattern in a (?0) recursion\n"));
md->recursive = rec->prevrec;
- memmove(md->offset_vector, rec->offset_save,
+ memmove(md->offset_vector, rec->offset_save,
rec->saved_max * sizeof(int));
offset_top = rec->save_offset_top;
- ecode = rec->after_call;
- break;
+ if (rec->group_num == 0)
+ {
+ ecode = rec->after_call;
+ break;
+ }
}
/* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
@@ -1328,14 +1330,14 @@
the subject. In both cases, backtracking will then try other alternatives,
if any. */
- if (eptr == mstart &&
+ else if (eptr == mstart &&
(md->notempty ||
(md->notempty_atstart &&
mstart == md->start_subject + md->start_offset)))
MRRETURN(MATCH_NOMATCH);
/* Otherwise, we have a match. */
-
+
md->end_match_ptr = eptr; /* Record where we ended */
md->end_offset_top = offset_top; /* and how many extracts were taken */
md->start_match_ptr = mstart; /* and the start (\K can modify) */
@@ -1538,7 +1540,7 @@
memcpy(new_recursive.offset_save, md->offset_vector,
new_recursive.saved_max * sizeof(int));
new_recursive.save_offset_top = offset_top;
-
+
/* OK, now we can do the recursion. For each top-level alternative we
restore the offset and recursion data. */
Modified: code/trunk/testdata/testinput11
===================================================================
--- code/trunk/testdata/testinput11 2011-06-12 15:09:49 UTC (rev 607)
+++ code/trunk/testdata/testinput11 2011-06-12 16:25:55 UTC (rev 608)
@@ -584,4 +584,13 @@
ab
aab
+/(?1)(?:(b)){0}/
+ b
+
+/(foo ( \( ((?:(?> [^()]+ )|(?2))*) \) ) )/x
+ foo(bar(baz)+baz(bop))
+
+/(A (A|B(*ACCEPT)|C) D)(E)/x
+ AB
+
/-- End of testinput11 --/
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2011-06-12 15:09:49 UTC (rev 607)
+++ code/trunk/testdata/testinput2 2011-06-12 16:25:55 UTC (rev 608)
@@ -3659,4 +3659,20 @@
/(abc)\1+/S
+/-- Perl doesn't get these right IMO (the 3rd is PCRE-specific) --/
+
+/(?1)(?:(b(*ACCEPT))){0}/
+ b
+
+/(?1)(?:(b(*ACCEPT))){0}c/
+ bc
+ ** Failers
+ b
+
+/(?1)(?:((*ACCEPT))){0}c/
+ c
+ c\N
+
+/-- --/
+
/-- End of testinput2 --/
Modified: code/trunk/testdata/testoutput11
===================================================================
--- code/trunk/testdata/testoutput11 2011-06-12 15:09:49 UTC (rev 607)
+++ code/trunk/testdata/testoutput11 2011-06-12 16:25:55 UTC (rev 608)
@@ -1125,4 +1125,21 @@
aab
No match
+/(?1)(?:(b)){0}/
+ b
+ 0: b
+
+/(foo ( \( ((?:(?> [^()]+ )|(?2))*) \) ) )/x
+ foo(bar(baz)+baz(bop))
+ 0: foo(bar(baz)+baz(bop))
+ 1: foo(bar(baz)+baz(bop))
+ 2: (bar(baz)+baz(bop))
+ 3: bar(baz)+baz(bop)
+
+/(A (A|B(*ACCEPT)|C) D)(E)/x
+ AB
+ 0: AB
+ 1: AB
+ 2: B
+
/-- End of testinput11 --/
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2011-06-12 15:09:49 UTC (rev 607)
+++ code/trunk/testdata/testoutput2 2011-06-12 16:25:55 UTC (rev 608)
@@ -11547,4 +11547,26 @@
/(abc)\1+/S
+/-- Perl doesn't get these right IMO (the 3rd is PCRE-specific) --/
+
+/(?1)(?:(b(*ACCEPT))){0}/
+ b
+ 0: b
+
+/(?1)(?:(b(*ACCEPT))){0}c/
+ bc
+ 0: bc
+ ** Failers
+No match
+ b
+No match
+
+/(?1)(?:((*ACCEPT))){0}c/
+ c
+ 0: c
+ c\N
+ 0: c
+
+/-- --/
+
/-- End of testinput2 --/