[Pcre-svn] [1284] code/trunk: Change backtracking behaviour …

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [1284] code/trunk: Change backtracking behaviour to "first verb encountered ".
Revision: 1284
          http://vcs.pcre.org/viewvc?view=rev&revision=1284
Author:   ph10
Date:     2013-03-15 11:54:58 +0000 (Fri, 15 Mar 2013)


Log Message:
-----------
Change backtracking behaviour to "first verb encountered".

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_exec.c
    code/trunk/testdata/testinput1
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput1
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2013-03-15 10:21:53 UTC (rev 1283)
+++ code/trunk/ChangeLog    2013-03-15 11:54:58 UTC (rev 1284)
@@ -110,7 +110,13 @@


30. Update RunTest with additional test selector options.

+31. PCRE has been changed to be more compatible with Perl when there is more
+    than one backtracking verb present. Previously, in something like 
+    (*COMMIT)(*SKIP), COMMIT would override SKIP. Apart from one anomaly (which 
+    has been reported), Perl seems to act on whichever backtracking verb is 
+    reached first, so PCRE has been changed to follow this behaviour.


+
Version 8.32 30-November-2012
-----------------------------


Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c    2013-03-15 10:21:53 UTC (rev 1283)
+++ code/trunk/pcre_exec.c    2013-03-15 11:54:58 UTC (rev 1284)
@@ -781,23 +781,16 @@
     case OP_FAIL:
     RRETURN(MATCH_NOMATCH);


-    /* COMMIT overrides PRUNE, SKIP, and THEN */
-
     case OP_COMMIT:
     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
       eptrb, RM52);
-    if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
-        rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
-        rrc != MATCH_THEN)
-      RRETURN(rrc);
+    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     RRETURN(MATCH_COMMIT);


-    /* PRUNE overrides THEN */
-
     case OP_PRUNE:
     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
       eptrb, RM51);
-    if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
+    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     RRETURN(MATCH_PRUNE);


     case OP_PRUNE_ARG:
@@ -807,16 +800,13 @@
       eptrb, RM56);
     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
          md->mark == NULL) md->mark = ecode + 2;
-    if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
+    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     RRETURN(MATCH_PRUNE);


-    /* SKIP overrides PRUNE and THEN */
-
     case OP_SKIP:
     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
       eptrb, RM53);
-    if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
-      RRETURN(rrc);
+    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     md->start_match_ptr = eptr;   /* Pass back current position */
     RRETURN(MATCH_SKIP);


@@ -837,8 +827,7 @@
       }
     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
       eptrb, RM57);
-    if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
-      RRETURN(rrc);
+    if (rrc != MATCH_NOMATCH) RRETURN(rrc);


     /* Pass back the current skip name by overloading md->start_match_ptr and
     returning the special MATCH_SKIP_ARG return code. This will either be


Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1    2013-03-15 10:21:53 UTC (rev 1283)
+++ code/trunk/testdata/testinput1    2013-03-15 11:54:58 UTC (rev 1284)
@@ -5327,4 +5327,90 @@
 /((*SKIP:r)d){0}a(*SKIP:m)x|ac(*:n)|ac/K
     acacd


+/-- Tests that try to figure out how Perl works. My hypothesis is that the
+    first verb that is backtracked onto is the one that acts. This seems to be
+    the case almost all the time, but there is one exception that is perhaps a 
+    bug. --/
+
+/-- This matches "aaaac"; each PRUNE advances one character until the subject
+    no longer starts with 5 'a's. --/
+
+/aaaaa(*PRUNE)b|a+c/
+    aaaaaac
+
+/-- Putting SKIP in front of PRUNE makes no difference, as it is never 
+backtracked onto, whether or not it has a label. --/
+
+/aaaaa(*SKIP)(*PRUNE)b|a+c/
+    aaaaaac
+
+/aaaaa(*SKIP:N)(*PRUNE)b|a+c/
+    aaaaaac
+
+/aaaa(*:N)a(*SKIP:N)(*PRUNE)b|a+c/
+    aaaaaac
+
+/-- Putting THEN in front makes no difference. */
+    
+/aaaaa(*THEN)(*PRUNE)b|a+c/
+    aaaaaac
+ 
+/-- However, putting COMMIT in front of the prune changes it to "no match". I 
+    think this is inconsistent and possibly a bug. For the moment, running this
+    test is moved out of the Perl-compatible file. --/
+
+/aaaaa(*COMMIT)(*PRUNE)b|a+c/
+    
+
+/---- OK, lets play the same game again using SKIP instead of PRUNE. ----/
+
+/-- This matches "ac" because SKIP forces the next match to start on the
+    sixth "a". --/
+
+/aaaaa(*SKIP)b|a+c/
+    aaaaaac
+ 
+/-- Putting PRUNE in front makes no difference. --/
+
+/aaaaa(*PRUNE)(*SKIP)b|a+c/
+    aaaaaac
+
+/-- Putting THEN in front makes no difference. --/
+
+/aaaaa(*THEN)(*SKIP)b|a+c/
+    aaaaaac
+
+/-- In this case, neither does COMMIT. This still matches "ac". --/
+
+/aaaaa(*COMMIT)(*SKIP)b|a+c/
+    aaaaaac
+    
+/-- This gives "no match", as expected. --/
+
+/aaaaa(*COMMIT)b|a+c/
+    aaaaaac
+    
+
+/------ Tests using THEN ------/
+
+/-- This matches "aaaaaac", as expected. --/
+
+/aaaaa(*THEN)b|a+c/
+    aaaaaac
+
+/-- Putting SKIP in front makes no difference. --/
+
+/aaaaa(*SKIP)(*THEN)b|a+c/
+    aaaaaac
+    
+/-- Putting PRUNE in front makes no difference. --/
+
+/aaaaa(*PRUNE)(*THEN)b|a+c/
+    aaaaaac
+    
+/-- Putting COMMIT in front makes no difference. --/
+
+/aaaaa(*COMMIT)(*THEN)b|a+c/
+    aaaaaac
+
 /-- End of testinput1 --/


Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2013-03-15 10:21:53 UTC (rev 1283)
+++ code/trunk/testdata/testinput2    2013-03-15 11:54:58 UTC (rev 1284)
@@ -3851,4 +3851,9 @@
 /(?C1)^(?C2)(?(?C99)(?=(?C3)a(?C4))(?C5)a(?C6)a(?C7)|(?C8)b(?C9)b(?C10))(?C11)/
     bb


+/-- Perl seems to have a bug with this one --/
+
+/aaaaa(*COMMIT)(*PRUNE)b|a+c/
+    aaaaaac
+
 /-- End of testinput2 --/


Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1    2013-03-15 10:21:53 UTC (rev 1283)
+++ code/trunk/testdata/testoutput1    2013-03-15 11:54:58 UTC (rev 1284)
@@ -8829,4 +8829,104 @@
  0: ac
 MK: n


+/-- Tests that try to figure out how Perl works. My hypothesis is that the
+    first verb that is backtracked onto is the one that acts. This seems to be
+    the case almost all the time, but there is one exception that is perhaps a 
+    bug. --/
+
+/-- This matches "aaaac"; each PRUNE advances one character until the subject
+    no longer starts with 5 'a's. --/
+
+/aaaaa(*PRUNE)b|a+c/
+    aaaaaac
+ 0: aaaac
+
+/-- Putting SKIP in front of PRUNE makes no difference, as it is never 
+backtracked onto, whether or not it has a label. --/
+
+/aaaaa(*SKIP)(*PRUNE)b|a+c/
+    aaaaaac
+ 0: aaaac
+
+/aaaaa(*SKIP:N)(*PRUNE)b|a+c/
+    aaaaaac
+ 0: aaaac
+
+/aaaa(*:N)a(*SKIP:N)(*PRUNE)b|a+c/
+    aaaaaac
+ 0: aaaac
+
+/-- Putting THEN in front makes no difference. */
+    
+/aaaaa(*THEN)(*PRUNE)b|a+c/
+    aaaaaac
+ 0: aaaac
+ 
+/-- However, putting COMMIT in front of the prune changes it to "no match". I 
+    think this is inconsistent and possibly a bug. For the moment, running this
+    test is moved out of the Perl-compatible file. --/
+
+/aaaaa(*COMMIT)(*PRUNE)b|a+c/
+    
+
+/---- OK, lets play the same game again using SKIP instead of PRUNE. ----/
+
+/-- This matches "ac" because SKIP forces the next match to start on the
+    sixth "a". --/
+
+/aaaaa(*SKIP)b|a+c/
+    aaaaaac
+ 0: ac
+ 
+/-- Putting PRUNE in front makes no difference. --/
+
+/aaaaa(*PRUNE)(*SKIP)b|a+c/
+    aaaaaac
+ 0: ac
+
+/-- Putting THEN in front makes no difference. --/
+
+/aaaaa(*THEN)(*SKIP)b|a+c/
+    aaaaaac
+ 0: ac
+
+/-- In this case, neither does COMMIT. This still matches "ac". --/
+
+/aaaaa(*COMMIT)(*SKIP)b|a+c/
+    aaaaaac
+ 0: ac
+    
+/-- This gives "no match", as expected. --/
+
+/aaaaa(*COMMIT)b|a+c/
+    aaaaaac
+No match
+    
+
+/------ Tests using THEN ------/
+
+/-- This matches "aaaaaac", as expected. --/
+
+/aaaaa(*THEN)b|a+c/
+    aaaaaac
+ 0: aaaaaac
+
+/-- Putting SKIP in front makes no difference. --/
+
+/aaaaa(*SKIP)(*THEN)b|a+c/
+    aaaaaac
+ 0: aaaaaac
+    
+/-- Putting PRUNE in front makes no difference. --/
+
+/aaaaa(*PRUNE)(*THEN)b|a+c/
+    aaaaaac
+ 0: aaaaaac
+    
+/-- Putting COMMIT in front makes no difference. --/
+
+/aaaaa(*COMMIT)(*THEN)b|a+c/
+    aaaaaac
+ 0: aaaaaac
+
 /-- End of testinput1 --/


Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2013-03-15 10:21:53 UTC (rev 1283)
+++ code/trunk/testdata/testoutput2    2013-03-15 11:54:58 UTC (rev 1284)
@@ -12642,4 +12642,10 @@
  11 ^ ^    
  0: bb


+/-- Perl seems to have a bug with this one --/
+
+/aaaaa(*COMMIT)(*PRUNE)b|a+c/
+    aaaaaac
+ 0: aaaac
+
 /-- End of testinput2 --/