Revision: 1284
http://vcs.pcre.org/viewvc?view=rev&revision=1284
Author: ph10
Date: 2013-03-15 11:54:58 +0000 (Fri, 15 Mar 2013)
Log Message:
-----------
Change backtracking behaviour to "first verb encountered".
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_exec.c
code/trunk/testdata/testinput1
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput1
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2013-03-15 10:21:53 UTC (rev 1283)
+++ code/trunk/ChangeLog 2013-03-15 11:54:58 UTC (rev 1284)
@@ -110,7 +110,13 @@
30. Update RunTest with additional test selector options.
+31. PCRE has been changed to be more compatible with Perl when there is more
+ than one backtracking verb present. Previously, in something like
+ (*COMMIT)(*SKIP), COMMIT would override SKIP. Apart from one anomaly (which
+ has been reported), Perl seems to act on whichever backtracking verb is
+ reached first, so PCRE has been changed to follow this behaviour.
+
Version 8.32 30-November-2012
-----------------------------
Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c 2013-03-15 10:21:53 UTC (rev 1283)
+++ code/trunk/pcre_exec.c 2013-03-15 11:54:58 UTC (rev 1284)
@@ -781,23 +781,16 @@
case OP_FAIL:
RRETURN(MATCH_NOMATCH);
- /* COMMIT overrides PRUNE, SKIP, and THEN */
-
case OP_COMMIT:
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
eptrb, RM52);
- if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
- rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
- rrc != MATCH_THEN)
- RRETURN(rrc);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
RRETURN(MATCH_COMMIT);
- /* PRUNE overrides THEN */
-
case OP_PRUNE:
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
eptrb, RM51);
- if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
RRETURN(MATCH_PRUNE);
case OP_PRUNE_ARG:
@@ -807,16 +800,13 @@
eptrb, RM56);
if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
md->mark == NULL) md->mark = ecode + 2;
- if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
RRETURN(MATCH_PRUNE);
- /* SKIP overrides PRUNE and THEN */
-
case OP_SKIP:
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
eptrb, RM53);
- if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
- RRETURN(rrc);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
md->start_match_ptr = eptr; /* Pass back current position */
RRETURN(MATCH_SKIP);
@@ -837,8 +827,7 @@
}
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
eptrb, RM57);
- if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
- RRETURN(rrc);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
/* Pass back the current skip name by overloading md->start_match_ptr and
returning the special MATCH_SKIP_ARG return code. This will either be
Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1 2013-03-15 10:21:53 UTC (rev 1283)
+++ code/trunk/testdata/testinput1 2013-03-15 11:54:58 UTC (rev 1284)
@@ -5327,4 +5327,90 @@
/((*SKIP:r)d){0}a(*SKIP:m)x|ac(*:n)|ac/K
acacd
+/-- Tests that try to figure out how Perl works. My hypothesis is that the
+ first verb that is backtracked onto is the one that acts. This seems to be
+ the case almost all the time, but there is one exception that is perhaps a
+ bug. --/
+
+/-- This matches "aaaac"; each PRUNE advances one character until the subject
+ no longer starts with 5 'a's. --/
+
+/aaaaa(*PRUNE)b|a+c/
+ aaaaaac
+
+/-- Putting SKIP in front of PRUNE makes no difference, as it is never
+backtracked onto, whether or not it has a label. --/
+
+/aaaaa(*SKIP)(*PRUNE)b|a+c/
+ aaaaaac
+
+/aaaaa(*SKIP:N)(*PRUNE)b|a+c/
+ aaaaaac
+
+/aaaa(*:N)a(*SKIP:N)(*PRUNE)b|a+c/
+ aaaaaac
+
+/-- Putting THEN in front makes no difference. */
+
+/aaaaa(*THEN)(*PRUNE)b|a+c/
+ aaaaaac
+
+/-- However, putting COMMIT in front of the prune changes it to "no match". I
+ think this is inconsistent and possibly a bug. For the moment, running this
+ test is moved out of the Perl-compatible file. --/
+
+/aaaaa(*COMMIT)(*PRUNE)b|a+c/
+
+
+/---- OK, lets play the same game again using SKIP instead of PRUNE. ----/
+
+/-- This matches "ac" because SKIP forces the next match to start on the
+ sixth "a". --/
+
+/aaaaa(*SKIP)b|a+c/
+ aaaaaac
+
+/-- Putting PRUNE in front makes no difference. --/
+
+/aaaaa(*PRUNE)(*SKIP)b|a+c/
+ aaaaaac
+
+/-- Putting THEN in front makes no difference. --/
+
+/aaaaa(*THEN)(*SKIP)b|a+c/
+ aaaaaac
+
+/-- In this case, neither does COMMIT. This still matches "ac". --/
+
+/aaaaa(*COMMIT)(*SKIP)b|a+c/
+ aaaaaac
+
+/-- This gives "no match", as expected. --/
+
+/aaaaa(*COMMIT)b|a+c/
+ aaaaaac
+
+
+/------ Tests using THEN ------/
+
+/-- This matches "aaaaaac", as expected. --/
+
+/aaaaa(*THEN)b|a+c/
+ aaaaaac
+
+/-- Putting SKIP in front makes no difference. --/
+
+/aaaaa(*SKIP)(*THEN)b|a+c/
+ aaaaaac
+
+/-- Putting PRUNE in front makes no difference. --/
+
+/aaaaa(*PRUNE)(*THEN)b|a+c/
+ aaaaaac
+
+/-- Putting COMMIT in front makes no difference. --/
+
+/aaaaa(*COMMIT)(*THEN)b|a+c/
+ aaaaaac
+
/-- End of testinput1 --/
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2013-03-15 10:21:53 UTC (rev 1283)
+++ code/trunk/testdata/testinput2 2013-03-15 11:54:58 UTC (rev 1284)
@@ -3851,4 +3851,9 @@
/(?C1)^(?C2)(?(?C99)(?=(?C3)a(?C4))(?C5)a(?C6)a(?C7)|(?C8)b(?C9)b(?C10))(?C11)/
bb
+/-- Perl seems to have a bug with this one --/
+
+/aaaaa(*COMMIT)(*PRUNE)b|a+c/
+ aaaaaac
+
/-- End of testinput2 --/
Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1 2013-03-15 10:21:53 UTC (rev 1283)
+++ code/trunk/testdata/testoutput1 2013-03-15 11:54:58 UTC (rev 1284)
@@ -8829,4 +8829,104 @@
0: ac
MK: n
+/-- Tests that try to figure out how Perl works. My hypothesis is that the
+ first verb that is backtracked onto is the one that acts. This seems to be
+ the case almost all the time, but there is one exception that is perhaps a
+ bug. --/
+
+/-- This matches "aaaac"; each PRUNE advances one character until the subject
+ no longer starts with 5 'a's. --/
+
+/aaaaa(*PRUNE)b|a+c/
+ aaaaaac
+ 0: aaaac
+
+/-- Putting SKIP in front of PRUNE makes no difference, as it is never
+backtracked onto, whether or not it has a label. --/
+
+/aaaaa(*SKIP)(*PRUNE)b|a+c/
+ aaaaaac
+ 0: aaaac
+
+/aaaaa(*SKIP:N)(*PRUNE)b|a+c/
+ aaaaaac
+ 0: aaaac
+
+/aaaa(*:N)a(*SKIP:N)(*PRUNE)b|a+c/
+ aaaaaac
+ 0: aaaac
+
+/-- Putting THEN in front makes no difference. */
+
+/aaaaa(*THEN)(*PRUNE)b|a+c/
+ aaaaaac
+ 0: aaaac
+
+/-- However, putting COMMIT in front of the prune changes it to "no match". I
+ think this is inconsistent and possibly a bug. For the moment, running this
+ test is moved out of the Perl-compatible file. --/
+
+/aaaaa(*COMMIT)(*PRUNE)b|a+c/
+
+
+/---- OK, lets play the same game again using SKIP instead of PRUNE. ----/
+
+/-- This matches "ac" because SKIP forces the next match to start on the
+ sixth "a". --/
+
+/aaaaa(*SKIP)b|a+c/
+ aaaaaac
+ 0: ac
+
+/-- Putting PRUNE in front makes no difference. --/
+
+/aaaaa(*PRUNE)(*SKIP)b|a+c/
+ aaaaaac
+ 0: ac
+
+/-- Putting THEN in front makes no difference. --/
+
+/aaaaa(*THEN)(*SKIP)b|a+c/
+ aaaaaac
+ 0: ac
+
+/-- In this case, neither does COMMIT. This still matches "ac". --/
+
+/aaaaa(*COMMIT)(*SKIP)b|a+c/
+ aaaaaac
+ 0: ac
+
+/-- This gives "no match", as expected. --/
+
+/aaaaa(*COMMIT)b|a+c/
+ aaaaaac
+No match
+
+
+/------ Tests using THEN ------/
+
+/-- This matches "aaaaaac", as expected. --/
+
+/aaaaa(*THEN)b|a+c/
+ aaaaaac
+ 0: aaaaaac
+
+/-- Putting SKIP in front makes no difference. --/
+
+/aaaaa(*SKIP)(*THEN)b|a+c/
+ aaaaaac
+ 0: aaaaaac
+
+/-- Putting PRUNE in front makes no difference. --/
+
+/aaaaa(*PRUNE)(*THEN)b|a+c/
+ aaaaaac
+ 0: aaaaaac
+
+/-- Putting COMMIT in front makes no difference. --/
+
+/aaaaa(*COMMIT)(*THEN)b|a+c/
+ aaaaaac
+ 0: aaaaaac
+
/-- End of testinput1 --/
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2013-03-15 10:21:53 UTC (rev 1283)
+++ code/trunk/testdata/testoutput2 2013-03-15 11:54:58 UTC (rev 1284)
@@ -12642,4 +12642,10 @@
11 ^ ^
0: bb
+/-- Perl seems to have a bug with this one --/
+
+/aaaaa(*COMMIT)(*PRUNE)b|a+c/
+ aaaaaac
+ 0: aaaac
+
/-- End of testinput2 --/