[Pcre-svn] [614] code/trunk: Fix atomic group and assertion …

トップ ページ
このメッセージを削除
著者: Subversion repository
日付:  
To: pcre-svn
題目: [Pcre-svn] [614] code/trunk: Fix atomic group and assertion capturing problems.
Revision: 614
          http://vcs.pcre.org/viewvc?view=rev&revision=614
Author:   ph10
Date:     2011-07-09 11:48:16 +0100 (Sat, 09 Jul 2011)


Log Message:
-----------
Fix atomic group and assertion capturing problems.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_exec.c
    code/trunk/testdata/testinput1
    code/trunk/testdata/testoutput1


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2011-07-02 16:59:52 UTC (rev 613)
+++ code/trunk/ChangeLog    2011-07-09 10:48:16 UTC (rev 614)
@@ -113,6 +113,15 @@


 21. When (*ACCEPT) was used in an assertion that matched an empty string and
     PCRE_NOTEMPTY was set, PCRE applied the non-empty test to the assertion. 
+    
+22. When an atomic group that contained a capturing parenthesis was 
+    successfully matched, but the branch in which it appeared failed, the 
+    capturing was not being forgotten if a higher numbered group was later 
+    captured. For example, /(?>(a))b|(a)c/ when matching "ac" set capturing
+    group 1 to "a", when in fact it should be unset. This applied to multi-
+    branched capturing and non-capturing groups, repeated or not, and also to 
+    positive assertions (capturing in negative assertions is not well defined 
+    in PCRE) and also to nested atomic groups. 



Version 8.12 15-Jan-2011

Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c    2011-07-02 16:59:52 UTC (rev 613)
+++ code/trunk/pcre_exec.c    2011-07-09 10:48:16 UTC (rev 614)
@@ -847,6 +847,23 @@
         if (rrc != MATCH_NOMATCH &&
             (rrc != MATCH_THEN || md->start_match_ptr != ecode))
           RRETURN(rrc);
+
+        /* If md->end_offset_top is greater than offset_top, it means that the
+        branch we have just failed to match did manage to match some capturing
+        parentheses within an atomic group or an assertion. Although offset_top
+        reverts to its original value at this level, we must unset the captured
+        values in case a later match sets a higher capturing number. Example:
+        matching /((?>(a))b|(a)c)/ against "ac". This captures 3, but we need
+        to ensure that 2 - which was captured in the atomic matching - is
+        unset. */
+      
+        if (md->end_offset_top > offset_top)   
+          {
+          register int *iptr = md->offset_vector + offset_top;
+          register int *iend = md->offset_vector + md->end_offset_top;
+          while (iptr < iend) *iptr++ = -1;
+          }
+
         md->capture_last = save_capture_last;
         ecode += GET(ecode, 1);
         if (*ecode != OP_ALT) break; 
@@ -892,6 +909,16 @@
       if (rrc != MATCH_NOMATCH &&
           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
         RRETURN(rrc);
+
+      /* See explanatory comment above under OP_CBRA. */
+       
+      if (md->end_offset_top > offset_top)   
+        {
+        register int *iptr = md->offset_vector + offset_top;
+        register int *iend = md->offset_vector + md->end_offset_top;
+        while (iptr < iend) *iptr++ = -1;
+        }
+
       ecode += GET(ecode, 1);
       if (*ecode != OP_ALT) break; 
       }
@@ -962,6 +989,16 @@
         if (rrc != MATCH_NOMATCH &&
             (rrc != MATCH_THEN || md->start_match_ptr != ecode))
           RRETURN(rrc);
+
+        /* See explanatory comment above under OP_CBRA. */
+         
+        if (md->end_offset_top > offset_top)   
+          {
+          register int *iptr = md->offset_vector + offset_top;
+          register int *iend = md->offset_vector + md->end_offset_top;
+          while (iptr < iend) *iptr++ = -1;
+          }
+
         md->capture_last = save_capture_last;
         ecode += GET(ecode, 1);
         if (*ecode != OP_ALT) break; 
@@ -1024,6 +1061,16 @@
       if (rrc != MATCH_NOMATCH &&
           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
         RRETURN(rrc);
+  
+      /* See explanatory comment above under OP_CBRA. */
+       
+      if (md->end_offset_top > offset_top)   
+        {
+        register int *iptr = md->offset_vector + offset_top;
+        register int *iend = md->offset_vector + md->end_offset_top;
+        while (iptr < iend) *iptr++ = -1;
+        }
+
       ecode += GET(ecode, 1);
       if (*ecode != OP_ALT) break; 
       }
@@ -1366,6 +1413,16 @@
       if (rrc != MATCH_NOMATCH &&
           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
         RRETURN(rrc);
+
+      /* See explanatory comment above under OP_CBRA. */
+       
+      if (md->end_offset_top > offset_top)   
+        {
+        register int *iptr = md->offset_vector + offset_top;
+        register int *iend = md->offset_vector + md->end_offset_top;
+        while (iptr < iend) *iptr++ = -1;
+        }
+
       ecode += GET(ecode, 1);
       }
     while (*ecode == OP_ALT);
@@ -1593,6 +1650,16 @@
       if (rrc != MATCH_NOMATCH &&
           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
         RRETURN(rrc);
+
+      /* See explanatory comment above under OP_CBRA. */
+       
+      if (md->end_offset_top > offset_top)   
+        {
+        register int *iptr = md->offset_vector + offset_top;
+        register int *iend = md->offset_vector + md->end_offset_top;
+        while (iptr < iend) *iptr++ = -1;
+        }
+
       ecode += GET(ecode,1);
       }
     while (*ecode == OP_ALT);
@@ -1601,8 +1668,8 @@


     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);


-    /* Continue as from after the assertion, updating the offsets high water
-    mark, since extracts may have been taken. */
+    /* Continue after the group, updating the offsets high water mark, since
+    extracts may have been taken. */


     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);


@@ -6298,6 +6365,7 @@
md->start_used_ptr = start_match;
md->match_call_count = 0;
md->match_function_type = 0;
+ md->end_offset_top = 0;
rc = match(start_match, md->start_code, start_match, NULL, 2, md, NULL, 0);
if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;


Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1    2011-07-02 16:59:52 UTC (rev 613)
+++ code/trunk/testdata/testinput1    2011-07-09 10:48:16 UTC (rev 614)
@@ -4133,4 +4133,28 @@
 /((a|)+)+Z/
     Z


+/(a)b|(a)c/
+    ac
+
+/(?>(a))b|(a)c/
+    ac
+
+/(?=(a))ab|(a)c/
+    ac
+
+/((?>(a))b|(a)c)/
+    ac
+
+/((?>(a))b|(a)c)++/
+    ac
+
+/(?:(?>(a))b|(a)c)++/
+    ac
+
+/(?=(?>(a))b|(a)c)(..)/
+    ac
+
+/(?>(?>(a))b|(a)c)/
+    ac
+
 /-- End of testinput1 --/


Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1    2011-07-02 16:59:52 UTC (rev 613)
+++ code/trunk/testdata/testoutput1    2011-07-09 10:48:16 UTC (rev 614)
@@ -6750,4 +6750,55 @@
  1: 
  2: 


+/(a)b|(a)c/
+    ac
+ 0: ac
+ 1: <unset>
+ 2: a
+
+/(?>(a))b|(a)c/
+    ac
+ 0: ac
+ 1: <unset>
+ 2: a
+
+/(?=(a))ab|(a)c/
+    ac
+ 0: ac
+ 1: <unset>
+ 2: a
+
+/((?>(a))b|(a)c)/
+    ac
+ 0: ac
+ 1: ac
+ 2: <unset>
+ 3: a
+
+/((?>(a))b|(a)c)++/
+    ac
+ 0: ac
+ 1: ac
+ 2: <unset>
+ 3: a
+
+/(?:(?>(a))b|(a)c)++/
+    ac
+ 0: ac
+ 1: <unset>
+ 2: a
+
+/(?=(?>(a))b|(a)c)(..)/
+    ac
+ 0: ac
+ 1: <unset>
+ 2: a
+ 3: ac
+
+/(?>(?>(a))b|(a)c)/
+    ac
+ 0: ac
+ 1: <unset>
+ 2: a
+
 /-- End of testinput1 --/