[Pcre-svn] [613] code/trunk: Fix problem with the interactio…

Página Inicial
Delete this message
Autor: Subversion repository
Data:  
Para: pcre-svn
Assunto: [Pcre-svn] [613] code/trunk: Fix problem with the interaction of (*ACCEPT) in an assertion with
Revision: 613
          http://vcs.pcre.org/viewvc?view=rev&revision=613
Author:   ph10
Date:     2011-07-02 17:59:52 +0100 (Sat, 02 Jul 2011)


Log Message:
-----------
Fix problem with the interaction of (*ACCEPT) in an assertion with
PCRE_NOTEMPTY.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_compile.c
    code/trunk/pcre_dfa_exec.c
    code/trunk/pcre_exec.c
    code/trunk/pcre_internal.h
    code/trunk/pcre_study.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2011-07-02 15:20:59 UTC (rev 612)
+++ code/trunk/ChangeLog    2011-07-02 16:59:52 UTC (rev 613)
@@ -110,6 +110,9 @@
 20. If /S is present twice on a test pattern in pcretest input, it *disables*
     studying, thereby overriding the use of -s on the command line. This is
     necessary for one or two tests to keep the output identical in both cases. 
+    
+21. When (*ACCEPT) was used in an assertion that matched an empty string and
+    PCRE_NOTEMPTY was set, PCRE applied the non-empty test to the assertion. 



Version 8.12 15-Jan-2011

Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c    2011-07-02 15:20:59 UTC (rev 612)
+++ code/trunk/pcre_compile.c    2011-07-02 16:59:52 UTC (rev 613)
@@ -4931,22 +4931,29 @@
         if (namelen == verbs[i].len &&
             strncmp((char *)name, vn, namelen) == 0)
           {
-          /* Check for open captures before ACCEPT */
+          /* Check for open captures before ACCEPT and convert it to 
+          ASSERT_ACCEPT if in an assertion. */


           if (verbs[i].op == OP_ACCEPT)
             {
             open_capitem *oc;
+            if (arglen != 0)
+              {
+              *errorcodeptr = ERR59;
+              goto FAILED;
+              }   
             cd->had_accept = TRUE;
             for (oc = cd->open_caps; oc != NULL; oc = oc->next)
               {
               *code++ = OP_CLOSE;
               PUT2INC(code, 0, oc->number);
               }
+            *code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
             }


-          /* Handle the cases with/without an argument */
+          /* Handle other cases with/without an argument */


-          if (arglen == 0)
+          else if (arglen == 0)
             {
             if (verbs[i].op < 0)   /* Argument is mandatory */
               {
@@ -5235,6 +5242,7 @@
         /* ------------------------------------------------------------ */
         case CHAR_EQUALS_SIGN:                 /* Positive lookahead */
         bravalue = OP_ASSERT;
+        cd->assert_depth += 1; 
         ptr++;
         break;


@@ -5249,6 +5257,7 @@
           continue;
           }
         bravalue = OP_ASSERT_NOT;
+        cd->assert_depth += 1; 
         break;



@@ -5258,11 +5267,13 @@
           {
           case CHAR_EQUALS_SIGN:               /* Positive lookbehind */
           bravalue = OP_ASSERTBACK;
+          cd->assert_depth += 1; 
           ptr += 2;
           break;


           case CHAR_EXCLAMATION_MARK:          /* Negative lookbehind */
           bravalue = OP_ASSERTBACK_NOT;
+          cd->assert_depth += 1; 
           ptr += 2;
           break;


@@ -5830,6 +5841,9 @@
            &length_prevgroup           /* Pre-compile phase */
          ))
       goto FAILED;
+      
+    if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)
+      cd->assert_depth -= 1; 


     /* At the end of compiling, code is still pointing to the start of the
     group, while tempcode has been updated to point past the end of the group
@@ -7152,6 +7166,7 @@
 */


cd->final_bracount = cd->bracount; /* Save for checking forward references */
+cd->assert_depth = 0;
cd->bracount = 0;
cd->names_found = 0;
cd->name_table = (uschar *)re + re->name_table_offset;

Modified: code/trunk/pcre_dfa_exec.c
===================================================================
--- code/trunk/pcre_dfa_exec.c    2011-07-02 15:20:59 UTC (rev 612)
+++ code/trunk/pcre_dfa_exec.c    2011-07-02 16:59:52 UTC (rev 613)
@@ -170,9 +170,10 @@
   0, 0,                          /* RREF, NRREF                            */
   0,                             /* DEF                                    */
   0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
-  0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG,                */
-  0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG,        */
-  0, 0, 0, 0, 0                  /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO  */
+  0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
+  0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
+  0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
+  0, 0                           /* CLOSE, SKIPZERO  */
 };


 /* This table identifies those opcodes that inspect a character. It is used to
@@ -237,9 +238,10 @@
   0, 0,                          /* RREF, NRREF                            */
   0,                             /* DEF                                    */
   0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
-  0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG,                */
-  0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG,        */
-  0, 0, 0, 0, 0                  /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO  */
+  0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
+  0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
+  0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
+  0, 0                           /* CLOSE, SKIPZERO                        */
 };


/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,

Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c    2011-07-02 15:20:59 UTC (rev 612)
+++ code/trunk/pcre_exec.c    2011-07-02 16:59:52 UTC (rev 613)
@@ -1296,6 +1296,7 @@
     recursion, continue from after the call. */


     case OP_ACCEPT:
+    case OP_ASSERT_ACCEPT: 
     case OP_END:
     if (md->recursive != NULL)
       {
@@ -1311,12 +1312,12 @@
         } 
       }


-    /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
-    set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
-    the subject. In both cases, backtracking will then try other alternatives,
-    if any. */
+    /* Otherwise, if we have matched an empty string, fail if not in an 
+    assertion and if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
+    is set and we have matched at the start of the subject. In both cases,
+    backtracking will then try other alternatives, if any. */


-    else if (eptr == mstart &&
+    else if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
         (md->notempty ||
           (md->notempty_atstart &&
             mstart == md->start_subject + md->start_offset)))
@@ -5899,12 +5900,17 @@
 md->use_ucp = (re->options & PCRE_UCP) != 0;
 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;


+/* Some options are unpacked into BOOL variables in the hope that testing
+them will be faster than individual option bits. */
+
 md->notbol = (options & PCRE_NOTBOL) != 0;
 md->noteol = (options & PCRE_NOTEOL) != 0;
 md->notempty = (options & PCRE_NOTEMPTY) != 0;
 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
+              
+ 
 md->hitend = FALSE;
 md->mark = NULL;                        /* In case never set */



Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h    2011-07-02 15:20:59 UTC (rev 612)
+++ code/trunk/pcre_internal.h    2011-07-02 16:59:52 UTC (rev 613)
@@ -1445,7 +1445,8 @@
   OP_KETRMIN,        /* 116 order. They are for groups the repeat for ever. */
   OP_KETRPOS,        /* 117 Possessive unlimited repeat. */


- /* The assertions must come before BRA, CBRA, ONCE, and COND.*/
+ /* The assertions must come before BRA, CBRA, ONCE, and COND, and the four
+ asserts must remain in order. */

   OP_ASSERT,         /* 118 Positive lookahead */
   OP_ASSERT_NOT,     /* 119 Negative lookahead */
@@ -1455,7 +1456,7 @@


/* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come after the assertions,
with ONCE first, as there's a test for >= ONCE for a subpattern that isn't an
- assertion. The POS versions must immediately follow the non-POS versions in
+ assertion. The POS versions must immediately follow the non-POS versions in
each case. */

   OP_ONCE,           /* 123 Atomic group */
@@ -1484,7 +1485,7 @@


   OP_BRAZERO,        /* 139 These two must remain together and in this */
   OP_BRAMINZERO,     /* 140 order. */
-  OP_BRAPOSZERO,     /* 141 */ 
+  OP_BRAPOSZERO,     /* 141 */


/* These are backtracking control verbs */

@@ -1501,11 +1502,12 @@

   OP_FAIL,           /* 150 */
   OP_ACCEPT,         /* 151 */
-  OP_CLOSE,          /* 152 Used before OP_ACCEPT to close open captures */
+  OP_ASSERT_ACCEPT,  /* 152 Used inside assertions */
+  OP_CLOSE,          /* 153 Used before OP_ACCEPT to close open captures */


/* This is used to skip a subpattern with a {0} quantifier */

-  OP_SKIPZERO,       /* 153 */
+  OP_SKIPZERO,       /* 154 */


   /* This is not an opcode, but is used to check that tables indexed by opcode
   are the correct length, in order to catch updating errors - there have been
@@ -1557,7 +1559,8 @@
   "Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def",   \
   "Brazero", "Braminzero", "Braposzero",                          \
   "*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP",                  \
-  "*THEN", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT",                \
+  "*THEN", "*THEN", "*COMMIT", "*FAIL",                           \
+  "*ACCEPT", "*ASSERT_ACCEPT",                                    \
   "Close", "Skip zero"



@@ -1639,7 +1642,8 @@
   3, 1, 3,                       /* MARK, PRUNE, PRUNE_ARG                 */ \
   1, 3,                          /* SKIP, SKIP_ARG                         */ \
   1+LINK_SIZE, 3+LINK_SIZE,      /* THEN, THEN_ARG                         */ \
-  1, 1, 1, 3, 1                  /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO  */ \
+  1, 1, 1, 1,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */ \
+  3, 1                           /* CLOSE, SKIPZERO  */


 /* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion"
 condition. */
@@ -1737,6 +1741,7 @@
   int  final_bracount;          /* Saved value after first pass */
   int  top_backref;             /* Maximum back reference */
   unsigned int backref_map;     /* Bitmap of low back refs */
+  int  assert_depth;            /* Depth of nested assertions */ 
   int  external_options;        /* External (initial) options */
   int  external_flags;          /* External flag bits to be set */
   int  req_varyopt;             /* "After variable item" flag for reqbyte */
@@ -1793,8 +1798,8 @@
   int    name_entry_size;       /* Size of entry in names table */
   uschar *name_table;           /* Table of names */
   uschar nl[4];                 /* Newline string when fixed */
-  const uschar *lcc;            /* Points to lower casing table */
-  const uschar *ctypes;         /* Points to table of type maps */
+  const  uschar *lcc;           /* Points to lower casing table */
+  const  uschar *ctypes;        /* Points to table of type maps */
   BOOL   offset_overflow;       /* Set if too many extractions */
   BOOL   notbol;                /* NOTBOL flag */
   BOOL   noteol;                /* NOTEOL flag */
@@ -1806,7 +1811,7 @@
   BOOL   notempty_atstart;      /* Empty string match at start not wanted */
   BOOL   hitend;                /* Hit the end of the subject at some point */
   BOOL   bsr_anycrlf;           /* \R is just any CRLF, not full Unicode */
-  const uschar *start_code;     /* For use when recursing */
+  const  uschar *start_code;    /* For use when recursing */
   USPTR  start_subject;         /* Start of the subject string */
   USPTR  end_subject;           /* End of the subject string */
   USPTR  start_match_ptr;       /* Start of matched string */
@@ -1816,12 +1821,12 @@
   int    end_offset_top;        /* Highwater mark at end of match */
   int    capture_last;          /* Most recent capture number */
   int    start_offset;          /* The start offset value */
-  int    match_function_type;   /* Set for certain special calls of MATCH() */ 
+  int    match_function_type;   /* Set for certain special calls of MATCH() */
   eptrblock *eptrchain;         /* Chain of eptrblocks for tail recursions */
   int    eptrn;                 /* Next free eptrblock */
   recursion_info *recursive;    /* Linked list of recursion data */
   void  *callout_data;          /* To pass back to callouts */
-  const uschar *mark;           /* Mark pointer to pass back */
+  const  uschar *mark;          /* Mark pointer to pass back */
 } match_data;


/* A similar structure is used for the same purpose by the DFA matching

Modified: code/trunk/pcre_study.c
===================================================================
--- code/trunk/pcre_study.c    2011-07-02 15:20:59 UTC (rev 612)
+++ code/trunk/pcre_study.c    2011-07-02 16:59:52 UTC (rev 613)
@@ -142,6 +142,7 @@
     counting stops. */


     case OP_ACCEPT: 
+    case OP_ASSERT_ACCEPT: 
     *had_accept_ptr = TRUE;
     /* Fall through */ 
     case OP_ALT:
@@ -715,6 +716,7 @@
       /* Fail for a valid opcode that implies no starting bits. */


       case OP_ACCEPT:
+      case OP_ASSERT_ACCEPT: 
       case OP_ALLANY:
       case OP_ANY:
       case OP_ANYBYTE:


Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2011-07-02 15:20:59 UTC (rev 612)
+++ code/trunk/testdata/testinput2    2011-07-02 16:59:52 UTC (rev 613)
@@ -3745,4 +3745,16 @@


/-- --/

+"(?=a*(*ACCEPT)b)c"
+    c
+    c\N 
+    
+/(?1)c(?(DEFINE)((*ACCEPT)b))/
+    c
+    c\N  
+    
+/(?>(*ACCEPT)b)c/
+    c
+    c\N  
+
 /-- End of testinput2 --/


Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2011-07-02 15:20:59 UTC (rev 612)
+++ code/trunk/testdata/testoutput2    2011-07-02 16:59:52 UTC (rev 613)
@@ -11877,4 +11877,22 @@


/-- --/

+"(?=a*(*ACCEPT)b)c"
+    c
+ 0: c
+    c\N 
+ 0: c
+    
+/(?1)c(?(DEFINE)((*ACCEPT)b))/
+    c
+ 0: c
+    c\N  
+ 0: c
+    
+/(?>(*ACCEPT)b)c/
+    c
+ 0: 
+    c\N  
+No match
+
 /-- End of testinput2 --/