[Pcre-svn] [1271] code/trunk: Confine PRUNE and SKIP to recu…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [1271] code/trunk: Confine PRUNE and SKIP to recursive subpattern calls, like COMMIT.
Revision: 1271
          http://vcs.pcre.org/viewvc?view=rev&revision=1271
Author:   ph10
Date:     2013-03-06 16:50:38 +0000 (Wed, 06 Mar 2013)


Log Message:
-----------
Confine PRUNE and SKIP to recursive subpattern calls, like COMMIT.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_exec.c
    code/trunk/testdata/testinput1
    code/trunk/testdata/testoutput1


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2013-03-05 08:05:17 UTC (rev 1270)
+++ code/trunk/ChangeLog    2013-03-06 16:50:38 UTC (rev 1271)
@@ -93,6 +93,9 @@
 23. Allow an explicit callout to be inserted before an assertion that is the
     condition for a conditional group, for compatibility with automatic 
     callouts, which always insert a callout at this point.
+    
+24. In 8.31, (*COMMIT) was confined to within a recursive subpattern. Perl also 
+    confines (*SKIP) and (*PRUNE) in the same way, and this has now been done.



Version 8.32 30-November-2012

Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c    2013-03-05 08:05:17 UTC (rev 1270)
+++ code/trunk/pcre_exec.c    2013-03-06 16:50:38 UTC (rev 1271)
@@ -56,14 +56,14 @@
 #undef min
 #undef max


-/* The md->capture_last field uses the lower 16 bits for the last captured
+/* The md->capture_last field uses the lower 16 bits for the last captured
substring (which can never be greater than 65535) and a bit in the top half
-to mean "capture vector overflowed". This odd way of doing things was
-implemented when it was realized that preserving and restoring the overflow bit
-whenever the last capture number was saved/restored made for a neater
-interface, and doing it this way saved on (a) another variable, which would
-have increased the stack frame size (a big NO-NO in PCRE) and (b) another
-separate set of save/restore instructions. The following defines are used in
+to mean "capture vector overflowed". This odd way of doing things was
+implemented when it was realized that preserving and restoring the overflow bit
+whenever the last capture number was saved/restored made for a neater
+interface, and doing it this way saved on (a) another variable, which would
+have increased the stack frame size (a big NO-NO in PCRE) and (b) another
+separate set of save/restore instructions. The following defines are used in
implementing this. */

 #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
@@ -87,13 +87,17 @@
 negative to avoid the external error codes. */


 #define MATCH_ACCEPT       (-999)
-#define MATCH_COMMIT       (-998)
-#define MATCH_KETRPOS      (-997)
-#define MATCH_ONCE         (-996)
+#define MATCH_KETRPOS      (-998)
+#define MATCH_ONCE         (-997)
+/* The next 5 must be kept together and in sequence so that a test that checks
+for any one of them can use a range. */
+#define MATCH_COMMIT       (-996)
 #define MATCH_PRUNE        (-995)
 #define MATCH_SKIP         (-994)
 #define MATCH_SKIP_ARG     (-993)
 #define MATCH_THEN         (-992)
+#define MATCH_BACKTRACK_MAX MATCH_THEN
+#define MATCH_BACKTRACK_MIN MATCH_COMMIT


 /* Maximum number of ints of offset to save on the stack for recursive calls.
 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
@@ -1310,15 +1314,15 @@
         cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
         cb.capture_top      = offset_top/2;
         cb.capture_last     = md->capture_last & CAPLMASK;
-        /* Internal change requires this for API compatibility. */ 
-        if (cb.capture_last == 0) cb.capture_last = -1; 
+        /* Internal change requires this for API compatibility. */
+        if (cb.capture_last == 0) cb.capture_last = -1;
         cb.callout_data     = md->callout_data;
         cb.mark             = md->nomatch_mark;
         if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
         if (rrc < 0) RRETURN(rrc);
         }
       ecode += PRIV(OP_lengths)[OP_CALLOUT];
-      codelink -= PRIV(OP_lengths)[OP_CALLOUT]; 
+      codelink -= PRIV(OP_lengths)[OP_CALLOUT];
       }


     condcode = ecode[LINK_SIZE+1];
@@ -1738,8 +1742,8 @@
       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
       cb.capture_top      = offset_top/2;
       cb.capture_last     = md->capture_last & CAPLMASK;
-      /* Internal change requires this for API compatibility. */ 
-      if (cb.capture_last == 0) cb.capture_last = -1; 
+      /* Internal change requires this for API compatibility. */
+      if (cb.capture_last == 0) cb.capture_last = -1;
       cb.callout_data     = md->callout_data;
       cb.mark             = md->nomatch_mark;
       if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
@@ -1785,7 +1789,7 @@
       /* Add to "recursing stack" */


       new_recursive.group_num = recno;
-      new_recursive.saved_capture_last = md->capture_last; 
+      new_recursive.saved_capture_last = md->capture_last;
       new_recursive.subject_position = eptr;
       new_recursive.prevrec = md->recursive;
       md->recursive = &new_recursive;
@@ -1822,7 +1826,7 @@
           md, eptrb, RM6);
         memcpy(md->offset_vector, new_recursive.offset_save,
             new_recursive.saved_max * sizeof(int));
-        md->capture_last = new_recursive.saved_capture_last;     
+        md->capture_last = new_recursive.saved_capture_last;
         md->recursive = new_recursive.prevrec;
         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
           {
@@ -1839,11 +1843,12 @@
           goto RECURSION_MATCHED;        /* Exit loop; end processing */
           }


-        /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it
-        is treated as NOMATCH. */
+        /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
+        recursion; they are treated as NOMATCH. These codes are defined in a 
+        range that can be tested for. Any other return code is an error. */


-        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&
-                 rrc != MATCH_COMMIT)
+        else if (rrc != MATCH_NOMATCH && 
+                 (rrc < MATCH_BACKTRACK_MIN || rrc > MATCH_BACKTRACK_MAX))
           {
           DPRINTF(("Recursion gave error %d\n", rrc));
           if (new_recursive.offset_save != stacksave)
@@ -2629,13 +2634,13 @@
             { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
           }
         break;
-        
+
         case PT_UCNC:
         if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
              c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
              c >= 0xe000) == (op == OP_NOTPROP))
-          RRETURN(MATCH_NOMATCH);  
-        break;            
+          RRETURN(MATCH_NOMATCH);
+        break;


         /* This should never occur */


@@ -4254,7 +4259,7 @@
               }
             }
           break;
-          
+
           case PT_UCNC:
           for (i = 1; i <= min; i++)
             {
@@ -4268,8 +4273,8 @@
                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
                  c >= 0xe000) == prop_fail_result)
               RRETURN(MATCH_NOMATCH);
-            }   
-          break;            
+            }
+          break;


           /* This should not occur */


@@ -5016,7 +5021,7 @@
               }
             }
           /* Control never gets here */
-          
+
           case PT_UCNC:
           for (fi = min;; fi++)
             {
@@ -5032,8 +5037,8 @@
             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
                  c >= 0xe000) == prop_fail_result)
-              RRETURN(MATCH_NOMATCH);    
-            }   
+              RRETURN(MATCH_NOMATCH);
+            }
           /* Control never gets here */


           /* This should never occur */
@@ -5545,7 +5550,7 @@
                  c >= 0xe000) == prop_fail_result)
               break;
             eptr += len;
-            }   
+            }
           break;


           default:
@@ -6894,11 +6899,11 @@
   md->match_function_type = 0;
   md->end_offset_top = 0;
   rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
-  if (md->hitend && start_partial == NULL) 
+  if (md->hitend && start_partial == NULL)
     {
     start_partial = md->start_used_ptr;
     match_partial = start_match;
-    }  
+    }


   switch(rc)
     {
@@ -7032,7 +7037,7 @@
   /* Set the return code to the number of captured strings, or 0 if there were
   too many to fit into the vector. */


-  rc = ((md->capture_last & OVFLBIT) != 0 && 
+  rc = ((md->capture_last & OVFLBIT) != 0 &&
          md->end_offset_top >= arg_offset_max)?
     0 : md->end_offset_top/2;


@@ -7106,7 +7111,7 @@
     {
     offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
     offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
-    if (offsetcount > 2) 
+    if (offsetcount > 2)
       offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
     }
   rc = PCRE_ERROR_PARTIAL;


Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1    2013-03-05 08:05:17 UTC (rev 1270)
+++ code/trunk/testdata/testinput1    2013-03-06 16:50:38 UTC (rev 1271)
@@ -5303,4 +5303,13 @@
 "(?>.*?)foo"
     abcdfooxyz


+/(?:(a(*PRUNE)b)){0}(?:(?1)|ac)/
+    ac
+    
+/(?:(a(*SKIP)b)){0}(?:(?1)|ac)/
+    ac 
+
+/(?:(a(*MARK:X)a+(*SKIP:X)b)){0}(?:(?1)|aac)/
+    aac 
+
 /-- End of testinput1 --/


Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1    2013-03-05 08:05:17 UTC (rev 1270)
+++ code/trunk/testdata/testoutput1    2013-03-06 16:50:38 UTC (rev 1271)
@@ -8795,4 +8795,16 @@
     abcdfooxyz
  0: foo


+/(?:(a(*PRUNE)b)){0}(?:(?1)|ac)/
+    ac
+ 0: ac
+    
+/(?:(a(*SKIP)b)){0}(?:(?1)|ac)/
+    ac 
+ 0: ac
+
+/(?:(a(*MARK:X)a+(*SKIP:X)b)){0}(?:(?1)|aac)/
+    aac 
+ 0: aac
+
 /-- End of testinput1 --/