[Pcre-svn] [447] code/trunk: Capture data when (*ACCEPT) is …

Página Inicial
Delete this message
Autor: Subversion repository
Data:  
Para: pcre-svn
Assunto: [Pcre-svn] [447] code/trunk: Capture data when (*ACCEPT) is inside capturing parentheses.
Revision: 447
          http://vcs.pcre.org/viewvc?view=rev&revision=447
Author:   ph10
Date:     2009-09-15 19:17:54 +0100 (Tue, 15 Sep 2009)


Log Message:
-----------
Capture data when (*ACCEPT) is inside capturing parentheses.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/doc/pcrecompat.3
    code/trunk/doc/pcrepattern.3
    code/trunk/pcre_compile.c
    code/trunk/pcre_exec.c
    code/trunk/pcre_internal.h
    code/trunk/pcre_printint.src


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2009-09-15 10:49:50 UTC (rev 446)
+++ code/trunk/ChangeLog    2009-09-15 18:17:54 UTC (rev 447)
@@ -123,6 +123,11 @@
     with unset values at the outer level. The correct (outer level) value is 
     now given.


+22. If (*ACCEPT) appeared inside capturing parentheses, previous releases of
+    PCRE did not set those parentheses (unlike Perl). I have now found a way to
+    make it do so. The string so far is captured, making this feature
+    compatible with Perl.
+    


Version 7.9 11-Apr-09
---------------------

Modified: code/trunk/doc/pcrecompat.3
===================================================================
--- code/trunk/doc/pcrecompat.3    2009-09-15 10:49:50 UTC (rev 446)
+++ code/trunk/doc/pcrecompat.3    2009-09-15 18:17:54 UTC (rev 447)
@@ -83,8 +83,7 @@
 .P
 11. PCRE does support Perl 5.10's backtracking verbs (*ACCEPT), (*FAIL), (*F),
 (*COMMIT), (*PRUNE), (*SKIP), and (*THEN), but only in the forms without an
-argument. PCRE does not support (*MARK). If (*ACCEPT) is within capturing
-parentheses, PCRE does not set that capture group; this is different to Perl.
+argument. PCRE does not support (*MARK).
 .P
 12. PCRE provides some extensions to the Perl regular expression facilities.
 Perl 5.10 will include new features that are not in earlier versions, some of
@@ -143,6 +142,6 @@
 .rs
 .sp
 .nf
-Last updated: 11 September 2009
+Last updated: 15 September 2009
 Copyright (c) 1997-2009 University of Cambridge.
 .fi


Modified: code/trunk/doc/pcrepattern.3
===================================================================
--- code/trunk/doc/pcrepattern.3    2009-09-15 10:49:50 UTC (rev 446)
+++ code/trunk/doc/pcrepattern.3    2009-09-15 18:17:54 UTC (rev 447)
@@ -2155,14 +2155,13 @@
 .sp
 This verb causes the match to end successfully, skipping the remainder of the
 pattern. When inside a recursion, only the innermost pattern is ended
-immediately. PCRE differs from Perl in what happens if the (*ACCEPT) is inside
-capturing parentheses. In Perl, the data so far is captured: in PCRE no data is
-captured. For example:
+immediately. If the (*ACCEPT) is inside capturing parentheses, the data so far
+is captured. (This feature was added to PCRE at release 8.00.) For example:
 .sp
-  A(A|B(*ACCEPT)|C)D
+  A((?:A|B(*ACCEPT)|C)D)
 .sp
-This matches "AB", "AAD", or "ACD", but when it matches "AB", no data is
-captured.
+This matches "AB", "AAD", or "ACD"; when it matches "AB", "B" is captured by 
+the outer parentheses.
 .sp
   (*FAIL) or (*F)
 .sp
@@ -2259,6 +2258,6 @@
 .rs
 .sp
 .nf
-Last updated: 13 September 2009
+Last updated: 15 September 2009
 Copyright (c) 1997-2009 University of Cambridge.
 .fi


Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c    2009-09-15 10:49:50 UTC (rev 446)
+++ code/trunk/pcre_compile.c    2009-09-15 18:17:54 UTC (rev 447)
@@ -4440,8 +4440,19 @@
         if (namelen == verbs[i].len &&
             strncmp((char *)name, vn, namelen) == 0)
           {
-          *code = verbs[i].op;
-          if (*code++ == OP_ACCEPT) cd->had_accept = TRUE;
+          /* Check for open captures before ACCEPT */
+            
+          if (verbs[i].op == OP_ACCEPT)
+            {
+            open_capitem *oc; 
+            cd->had_accept = TRUE; 
+            for (oc = cd->open_caps; oc != NULL; oc = oc->next)
+              {
+              *code++ = OP_CLOSE;
+              PUT2INC(code, 0, oc->number); 
+              }  
+            }  
+          *code++ = verbs[i].op;
           break;
           }
         vn += verbs[i].len + 1;
@@ -5669,6 +5680,8 @@
 uschar *last_branch = code;
 uschar *start_bracket = code;
 uschar *reverse_count = NULL;
+open_capitem capitem;
+int capnumber = 0;
 int firstbyte, reqbyte;
 int branchfirstbyte, branchreqbyte;
 int length;
@@ -5695,6 +5708,17 @@
 them global. It tests the value of length for (2 + 2*LINK_SIZE) in the
 pre-compile phase to find out whether anything has yet been compiled or not. */


+/* If this is a capturing subpattern, add to the chain of open capturing items
+so that we can detect them if (*ACCEPT) is encountered. */
+
+if (*code == OP_CBRA)
+ {
+ capnumber = GET2(code, 1 + LINK_SIZE);
+ capitem.number = capnumber;
+ capitem.next = cd->open_caps;
+ cd->open_caps = &capitem;
+ }
+
/* Offset is set zero to mark that this bracket is still open */

 PUT(code, 1, 0);
@@ -5830,6 +5854,10 @@
         }
       while (branch_length > 0);
       }
+      
+    /* If it was a capturing subpattern, remove it from the chain. */
+    
+    if (capnumber > 0) cd->open_caps = cd->open_caps->next;


     /* Fill in the ket */


@@ -6398,6 +6426,7 @@
cd->req_varyopt = 0;
cd->external_options = options;
cd->external_flags = 0;
+cd->open_caps = NULL;

/* Now do the pre-compile. On error, errorcode will be set non-zero, so we
don't need to look at the result of the function here. The initial options have
@@ -6472,6 +6501,7 @@
cd->hwm = cworkspace;
cd->req_varyopt = 0;
cd->had_accept = FALSE;
+cd->open_caps = NULL;

/* Set up a starting, non-extracting bracket, then compile the expression. On
error, errorcode will be set non-zero, so we don't need to look at the result

Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c    2009-09-15 10:49:50 UTC (rev 446)
+++ code/trunk/pcre_exec.c    2009-09-15 18:17:54 UTC (rev 447)
@@ -909,8 +909,32 @@
       ecode += 1 + LINK_SIZE;
       }
     break;
+    


+    /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
+    to close any currently open capturing brackets. */
+    
+    case OP_CLOSE:
+    number = GET2(ecode, 1); 
+    offset = number << 1;
+      
+#ifdef DEBUG
+      printf("end bracket %d at *ACCEPT", number);
+      printf("\n");
+#endif


+    md->capture_last = number;
+    if (offset >= md->offset_max) md->offset_overflow = TRUE; else
+      {
+      md->offset_vector[offset] =
+        md->offset_vector[md->offset_end - number];
+      md->offset_vector[offset+1] = eptr - md->start_subject;
+      if (offset_top <= offset) offset_top = offset + 2;
+      }
+    ecode += 3;
+    break;   
+
+
     /* End of the pattern, either real or forced. If we are in a top-level
     recursion, we should restore the offsets appropriately and continue from
     after the call. */


Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h    2009-09-15 10:49:50 UTC (rev 446)
+++ code/trunk/pcre_internal.h    2009-09-15 18:17:54 UTC (rev 447)
@@ -1364,10 +1364,11 @@


   OP_FAIL,           /* 109 */
   OP_ACCEPT,         /* 110 */
+  OP_CLOSE,          /* 111 Used before OP_ACCEPT to close open captures */ 


/* This is used to skip a subpattern with a {0} quantifier */

-  OP_SKIPZERO        /* 111 */
+  OP_SKIPZERO        /* 112 */
 };



@@ -1393,7 +1394,7 @@
   "Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond",        \
   "Cond ref", "Cond rec", "Cond def", "Brazero", "Braminzero",    \
   "*PRUNE", "*SKIP", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT",      \
-  "Skip zero"
+  "Close", "Skip zero"



 /* This macro defines the length of fixed length operations in the compiled
@@ -1458,7 +1459,7 @@
   1,                             /* DEF                                    */ \
   1, 1,                          /* BRAZERO, BRAMINZERO                    */ \
   1, 1, 1, 1,                    /* PRUNE, SKIP, THEN, COMMIT,             */ \
-  1, 1, 1                        /* FAIL, ACCEPT, SKIPZERO                 */
+  1, 1, 3, 1                     /* FAIL, ACCEPT, CLOSE, SKIPZERO          */



/* A magic value for OP_RREF to indicate the "any recursion" condition. */
@@ -1521,6 +1522,15 @@
uschar start_bits[32];
} pcre_study_data;

+/* Structure for building a chain of open capturing subpatterns during 
+compiling, so that instructions to close them can be compiled when (*ACCEPT) is 
+encountered. */
+
+typedef struct open_capitem {
+  struct open_capitem *next;    /* Chain link */
+  pcre_uint16 number;           /* Capture number */
+} open_capitem;    
+
 /* Structure for passing "static" information around between the functions
 doing the compiling, so that they are thread-safe. */


@@ -1533,6 +1543,7 @@
   const uschar *start_code;     /* The start of the compiled code */
   const uschar *start_pattern;  /* The start of the pattern */
   const uschar *end_pattern;    /* The end of the pattern */
+  open_capitem *open_caps;      /* Chain of open capture items */ 
   uschar *hwm;                  /* High watermark of workspace */
   uschar *name_table;           /* The name/number table */
   int  names_found;             /* Number of entries so far */


Modified: code/trunk/pcre_printint.src
===================================================================
--- code/trunk/pcre_printint.src    2009-09-15 10:49:50 UTC (rev 446)
+++ code/trunk/pcre_printint.src    2009-09-15 18:17:54 UTC (rev 447)
@@ -245,6 +245,10 @@
       else fprintf(f, "    ");
     fprintf(f, "%s", OP_names[*code]);
     break;
+    
+    case OP_CLOSE:
+    fprintf(f, "    %s %d", OP_names[*code], GET2(code, 1));
+    break;   


     case OP_CREF:
     fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);