[Pcre-svn] [618] code/trunk: Re-do atomic group processing t…

トップ ページ
このメッセージを削除
著者: Subversion repository
日付:  
To: pcre-svn
題目: [Pcre-svn] [618] code/trunk: Re-do atomic group processing to fix backtrack capture bugs.
Revision: 618
          http://vcs.pcre.org/viewvc?view=rev&revision=618
Author:   ph10
Date:     2011-07-16 18:24:16 +0100 (Sat, 16 Jul 2011)


Log Message:
-----------
Re-do atomic group processing to fix backtrack capture bugs. Recursion is also
re-worked.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_compile.c
    code/trunk/pcre_exec.c
    code/trunk/pcre_internal.h
    code/trunk/testdata/testinput1
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput1
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2011-07-12 11:00:10 UTC (rev 617)
+++ code/trunk/ChangeLog    2011-07-16 17:24:16 UTC (rev 618)
@@ -126,6 +126,13 @@
 23. Add the ++ qualifier feature to pcretest, to show the remainder of the 
     subject after a captured substring (to make it easier to tell which of a 
     number of identical substrings has been captured).
+    
+24. The way atomic groups are processed by pcre_exec() has been changed so that
+    if they are repeated, backtracking one repetition now resets captured 
+    values correctly. For example, if ((?>(a+)b)+aabab) is matched against
+    "aaaabaaabaabab" the value of captured group 2 is now correctly recorded as 
+    "aaa". Previously, it would have been "a". As part of this code 
+    refactoring, the way recursive calls are handled has also been changed.



Version 8.12 15-Jan-2011

Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c    2011-07-12 11:00:10 UTC (rev 617)
+++ code/trunk/pcre_compile.c    2011-07-16 17:24:16 UTC (rev 618)
@@ -1694,6 +1694,7 @@
 for (;;)
   {
   register int c = *code;
+
   if (c == OP_END) return NULL;


   /* XCLASS is used for classes that cannot be represented just by a bit
@@ -4726,7 +4727,14 @@
         }


       /* If the maximum is unlimited, set a repeater in the final copy. For
-      ONCE brackets, that's all we need to do.
+      ONCE brackets, that's all we need to do. 
+      
+      (To be done next, after recursion adjusted)
+      However, possessively repeated 
+      ONCE brackets can be converted into non-capturing brackets, as the 
+      behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to 
+      deal with possessive ONCEs specially.
+      (....) 


       Otherwise, if the quantifier was possessive, we convert the BRA code to
       the POS form, and the KET code to KETRPOS. (It turns out to be convenient
@@ -4748,7 +4756,12 @@
         uschar *ketcode = code - 1 - LINK_SIZE;
         uschar *bracode = ketcode - GET(ketcode, 1);


-        if (*bracode == OP_ONCE)
+/****
+        if (*bracode == OP_ONCE && possessive_quantifier)
+          *bracode = OP_BRA; 
+****/
+           
+        if (*bracode == OP_ONCE) 
           *ketcode = OP_KETRMAX + repeat_type;
         else
           {
@@ -5685,7 +5698,7 @@
           /* Insert the recursion/subroutine item, automatically wrapped inside
           "once" brackets. Set up a "previous group" length so that a
           subsequent quantifier will work. */
-
+          
           *code = OP_ONCE;
           PUT(code, 1, 2 + 2*LINK_SIZE);
           code += 1 + LINK_SIZE;


Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c    2011-07-12 11:00:10 UTC (rev 617)
+++ code/trunk/pcre_exec.c    2011-07-16 17:24:16 UTC (rev 618)
@@ -76,10 +76,11 @@
 #define MATCH_ACCEPT       (-999)
 #define MATCH_COMMIT       (-998)
 #define MATCH_KETRPOS      (-997)
-#define MATCH_PRUNE        (-996)
-#define MATCH_SKIP         (-995)
-#define MATCH_SKIP_ARG     (-994)
-#define MATCH_THEN         (-993)
+#define MATCH_ONCE         (-996)
+#define MATCH_PRUNE        (-995)
+#define MATCH_SKIP         (-994)
+#define MATCH_SKIP_ARG     (-993)
+#define MATCH_THEN         (-992)


/* This is a convenience macro for code that occurs many times. */

@@ -276,7 +277,7 @@
        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
-       RM61,  RM62, RM63};
+       RM61,  RM62, RM63, RM64, RM65, RM66 };


 /* These versions of the macros use the stack, as normal. There are debugging
 versions and production versions. Note that the "rw" argument of RMATCH isn't
@@ -845,6 +846,7 @@
         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; 
         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, 
           eptrb, RM1);
+        if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
         if (rrc != MATCH_NOMATCH &&
             (rrc != MATCH_THEN || md->start_match_ptr != ecode))
           RRETURN(rrc);
@@ -854,13 +856,15 @@
         }


       DPRINTF(("bracket %d failed\n", number));
-
       md->offset_vector[offset] = save_offset1;
       md->offset_vector[offset+1] = save_offset2;
       md->offset_vector[md->offset_end - number] = save_offset3;
+      
+      /* At this point, rrc will be one of MATCH_ONCE, MATCH_NOMATCH, or 
+      MATCH_THEN. */


       if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;
-      RRETURN(MATCH_NOMATCH);
+      RRETURN(((rrc == MATCH_ONCE)? MATCH_ONCE:MATCH_NOMATCH));
       }


     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
@@ -874,29 +878,49 @@
     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
     /* VVVVVVVVVVVVVVVVVVVVVVVVV */


-    /* Non-capturing bracket, except for possessive with unlimited repeat. Loop
-    for all the alternatives. When we get to the final alternative within the
-    brackets, we used to return the result of a recursive call to match()
-    whatever happened so it was possible to reduce stack usage by turning this
-    into a tail recursion, except in the case of a possibly empty group. 
-    However, now that there is the possiblity of (*THEN) occurring in the final 
-    alternative, this optimization is no longer possible. */
+    /* Non-capturing or atomic group, except for possessive with unlimited
+    repeat. Loop for all the alternatives. When we get to the final alternative
+    within the brackets, we used to return the result of a recursive call to
+    match() whatever happened so it was possible to reduce stack usage by
+    turning this into a tail recursion, except in the case of a possibly empty
+    group. However, now that there is the possiblity of (*THEN) occurring in
+    the final alternative, this optimization is no longer possible. 
+   
+    MATCH_ONCE is returned when the end of an atomic group is successfully 
+    reached, but subsequent matching fails. It passes back up the tree (causing 
+    captured values to be reset) until the original atomic group level is 
+    reached. This is tested by comparing md->once_target with the start of the
+    group. At this point, the return is converted into MATCH_NOMATCH so that
+    previous backup points can be taken. */


+    case OP_ONCE:
     case OP_BRA:
     case OP_SBRA:
     DPRINTF(("start non-capturing bracket\n"));
+
     for (;;)
       {
-      if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
+      if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;
       RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb, 
         RM2);
       if (rrc != MATCH_NOMATCH &&
           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
+        {  
+        if (rrc == MATCH_ONCE)
+          {
+          const uschar *scode = ecode;
+          if (*scode != OP_ONCE)           /* If not at start, find it */
+            {
+            while (*scode == OP_ALT) scode += GET(scode, 1);
+            scode -= GET(scode, 1);
+            }   
+          if (md->once_target == scode) rrc = MATCH_NOMATCH;
+          } 
         RRETURN(rrc);
+        } 
       ecode += GET(ecode, 1);
       if (*ecode != OP_ALT) break; 
       }
-
     if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;
     RRETURN(MATCH_NOMATCH);


@@ -1299,10 +1323,14 @@
     case OP_ACCEPT:
     case OP_ASSERT_ACCEPT: 
     case OP_END:
+    
+/* 
     if (md->recursive != NULL)
       {
       recursion_info *rec = md->recursive;
+
       md->recursive = rec->prevrec;
+
       memmove(md->offset_vector, rec->offset_save, 
         rec->saved_max * sizeof(int));
       offset_top = rec->save_offset_top;
@@ -1312,13 +1340,16 @@
         break;
         } 
       }
-
+*/
     /* Otherwise, if we have matched an empty string, fail if not in an 
     assertion and if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
     is set and we have matched at the start of the subject. In both cases,
     backtracking will then try other alternatives, if any. */


-    else if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
+/*    else */ if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
+
+         md->recursive == NULL &&
+
         (md->notempty ||
           (md->notempty_atstart &&
             mstart == md->start_subject + md->start_offset)))
@@ -1481,19 +1512,19 @@
     /* Recursion either matches the current regex, or some subexpression. The
     offset data is the offset to the starting bracket from the start of the
     whole pattern. (This is so that it works from duplicated subpatterns.)
+    
+    The state of the capturing groups is preserved over recursion, and
+    re-instated afterwards. We don't know how many are started and not yet 
+    finished (offset_top records the completed total) so we just have to save
+    all the potential data. There may be up to 65535 such values, which is too
+    large to put on the stack, but using malloc for small numbers seems
+    expensive. As a compromise, the stack is used when there are no more than
+    REC_STACK_SAVE_MAX values to store; otherwise malloc is used.


-    If there are any capturing brackets started but not finished, we have to
-    save their starting points and reinstate them after the recursion. However,
-    we don't know how many such there are (offset_top records the completed
-    total) so we just have to save all the potential data. There may be up to
-    65535 such values, which is too large to put on the stack, but using malloc
-    for small numbers seems expensive. As a compromise, the stack is used when
-    there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
-    is used.
-
     There are also other values that have to be saved. We use a chained
     sequence of blocks that actually live on the stack. Thanks to Robin Houston
-    for the original version of this logic. */
+    for the original version of this logic. It has, however, been hacked around 
+    a lot, so he is not to blame for the current way it works. */


     case OP_RECURSE:
       {
@@ -1506,12 +1537,11 @@
       new_recursive.prevrec = md->recursive;
       md->recursive = &new_recursive;


-      /* Find where to continue from afterwards */
+      /* Where to continue from afterwards */


       ecode += 1 + LINK_SIZE;
-      new_recursive.after_call = ecode;


-      /* Now save the offset data. */
+      /* Now save the offset data */


       new_recursive.saved_max = md->offset_end;
       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
@@ -1522,13 +1552,12 @@
           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
         }
-
       memcpy(new_recursive.offset_save, md->offset_vector,
             new_recursive.saved_max * sizeof(int));
-      new_recursive.save_offset_top = offset_top;


-      /* OK, now we can do the recursion. For each top-level alternative we
-      restore the offset and recursion data. */
+      /* OK, now we can do the recursion. After processing each alternative,
+      restore the offset data. If there were nested recursions, md->recursive 
+      might be changed, so reset it before looping. */


       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
       cbegroup = (*callpat >= OP_SBRA);
@@ -1537,13 +1566,22 @@
         if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
         RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
           md, eptrb, RM6);
+        memcpy(md->offset_vector, new_recursive.offset_save,
+            new_recursive.saved_max * sizeof(int));
         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
           {
           DPRINTF(("Recursion matched\n"));
           md->recursive = new_recursive.prevrec;
           if (new_recursive.offset_save != stacksave)
             (pcre_free)(new_recursive.offset_save);
-          MRRETURN(MATCH_MATCH);
+
+          /* Set where we got to in the subject, and reset the start in case
+          it was changed by \K. This *is* propagated back out of a recursion, 
+          for Perl compatibility. */ 
+           
+          eptr = md->end_match_ptr;
+          mstart = md->start_match_ptr;
+          goto RECURSION_MATCHED;        /* Exit loop; end processing */
           }
         else if (rrc != MATCH_NOMATCH &&
                 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
@@ -1555,8 +1593,6 @@
           }


         md->recursive = &new_recursive;
-        memcpy(md->offset_vector, new_recursive.offset_save,
-            new_recursive.saved_max * sizeof(int));
         callpat += GET(callpat, 1);
         }
       while (*callpat == OP_ALT);
@@ -1567,79 +1603,10 @@
         (pcre_free)(new_recursive.offset_save);
       MRRETURN(MATCH_NOMATCH);
       }
-    /* Control never reaches here */
+      
+    RECURSION_MATCHED:
+    break;


-    /* "Once" brackets are like assertion brackets except that after a match,
-    the point in the subject string is not moved back. Thus there can never be
-    a move back into the brackets. Friedl calls these "atomic" subpatterns.
-    Check the alternative branches in turn - the matching won't pass the KET
-    for this kind of subpattern. If any one branch matches, we carry on as at
-    the end of a normal bracket, leaving the subject pointer, but resetting
-    the start-of-match value in case it was changed by \K. */
-
-    case OP_ONCE:
-    prev = ecode;
-    saved_eptr = eptr;
-
-    do
-      {
-      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
-      if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
-        {
-        mstart = md->start_match_ptr;
-        break;
-        }
-      if (rrc != MATCH_NOMATCH &&
-          (rrc != MATCH_THEN || md->start_match_ptr != ecode))
-        RRETURN(rrc);
-      ecode += GET(ecode,1);
-      }
-    while (*ecode == OP_ALT);
-
-    /* If hit the end of the group (which could be repeated), fail */
-
-    if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
-
-    /* Continue after the group, updating the offsets high water mark, since
-    extracts may have been taken. */
-
-    do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
-
-    offset_top = md->end_offset_top;
-    eptr = md->end_match_ptr;
-
-    /* For a non-repeating ket, just continue at this level. This also
-    happens for a repeating ket if no characters were matched in the group.
-    This is the forcible breaking of infinite loops as implemented in Perl
-    5.005. */
-
-    if (*ecode == OP_KET || eptr == saved_eptr)
-      {
-      ecode += 1+LINK_SIZE;
-      break;
-      }
-
-    /* The repeating kets try the rest of the pattern or restart from the
-    preceding bracket, in the appropriate order. The second "call" of match()
-    uses tail recursion, to avoid using another stack frame. */
-
-    if (*ecode == OP_KETRMIN)
-      {
-      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM8);
-      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-      ecode = prev;
-      }
-    else  /* OP_KETRMAX */
-      {
-      md->match_function_type = MATCH_CBEGROUP; 
-      RMATCH(eptr, prev, offset_top, md, eptrb, RM9);
-      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-      ecode += 1 + LINK_SIZE;
-      }
-    goto TAIL_RECURSE;
-
-    /* Control never gets here */
-
     /* An alternation is the end of a branch; scan along to find the end of the
     bracketed group and go to there. */


@@ -1691,26 +1658,25 @@
     case OP_KETRMAX:
     case OP_KETRPOS: 
     prev = ecode - GET(ecode, 1);
-
+    
     /* If this was a group that remembered the subject start, in order to break
     infinite repeats of empty string matches, retrieve the subject start from
     the chain. Otherwise, set it NULL. */


-    if (*prev >= OP_SBRA)
+    if (*prev >= OP_SBRA || *prev == OP_ONCE)
       {
       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
       eptrb = eptrb->epb_prev;              /* Backup to previous group */
       }
     else saved_eptr = NULL;


-    /* If we are at the end of an assertion group or an atomic group, stop
-    matching and return MATCH_MATCH, but record the current high water mark for
-    use by positive assertions. We also need to record the match start in case
-    it was changed by \K. */
+    /* If we are at the end of an assertion group, stop matching and return
+    MATCH_MATCH, but record the current high water mark for use by positive
+    assertions. We also need to record the match start in case it was changed
+    by \K. */


     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
-        *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
-        *prev == OP_ONCE)
+        *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT)
       {
       md->end_match_ptr = eptr;      /* For ONCE */
       md->end_offset_top = offset_top;
@@ -1720,9 +1686,11 @@


     /* For capturing groups we have to check the group number back at the start
     and if necessary complete handling an extraction by setting the offsets and
-    bumping the high water mark. Note that whole-pattern recursion is coded as
-    a recurse into group 0, so it won't be picked up here. Instead, we catch it
-    when the OP_END is reached. Other recursion is handled here. */
+    bumping the high water mark. Whole-pattern recursion is coded as a recurse
+    into group 0, so it won't be picked up here. Instead, we catch it when the
+    OP_END is reached. Other recursion is handled here. We just have to record
+    the current subject position and start match pointer and give a MATCH
+    return. */


     if (*prev == OP_CBRA || *prev == OP_SCBRA ||
         *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
@@ -1735,6 +1703,17 @@
       printf("\n");
 #endif


+      /* Handle a recursively called group. */
+
+      if (md->recursive != NULL && md->recursive->group_num == number)
+        {
+        md->end_match_ptr = eptr;
+        md->start_match_ptr = mstart;
+        RRETURN(MATCH_MATCH);
+        }
+
+      /* Deal with capturing */
+
       md->capture_last = number;
       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
         {
@@ -1762,32 +1741,27 @@
         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
         if (offset_top <= offset) offset_top = offset + 2;
         }
-
-      /* Handle a recursively called group. Restore the offsets
-      appropriately and continue from after the call. */
-
-      if (md->recursive != NULL && md->recursive->group_num == number)
-        {
-        recursion_info *rec = md->recursive;
-        DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
-        md->recursive = rec->prevrec;
-        memcpy(md->offset_vector, rec->offset_save,
-          rec->saved_max * sizeof(int));
-        offset_top = rec->save_offset_top;
-        ecode = rec->after_call;
-        break;
-        }
       }


-    /* For a non-repeating ket, just continue at this level. This also
-    happens for a repeating ket if no characters were matched in the group.
-    This is the forcible breaking of infinite loops as implemented in Perl
-    5.005. If there is an options reset, it will get obeyed in the normal
-    course of events. */
+    /* For an ordinary non-repeating ket, just continue at this level. This
+    also happens for a repeating ket if no characters were matched in the
+    group. This is the forcible breaking of infinite loops as implemented in
+    Perl 5.005. For a non-repeating atomic group, establish a backup point by 
+    processing the rest of the pattern at a lower level. If this results in a 
+    NOMATCH return, pass MATCH_ONCE back to the original OP_ONCE level, thereby 
+    bypassing intermediate backup points, but resetting any captures that 
+    happened along the way. */


     if (*ecode == OP_KET || eptr == saved_eptr)
       {
-      ecode += 1 + LINK_SIZE;
+      if (*prev == OP_ONCE)
+        {
+        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
+        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+        md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
+        RRETURN(MATCH_ONCE); 
+        }  
+      ecode += 1 + LINK_SIZE;    /* Carry on at this level */
       break;
       }


@@ -1805,12 +1779,20 @@
     /* The normal repeating kets try the rest of the pattern or restart from
     the preceding bracket, in the appropriate order. In the second case, we can
     use tail recursion to avoid using another stack frame, unless we have an
-    unlimited repeat of a group that can match an empty string. */
+    an atomic group or an unlimited repeat of a group that can match an empty
+    string. */


     if (*ecode == OP_KETRMIN)
       {
-      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
+      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+      if (*prev == OP_ONCE)
+        {
+        RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
+        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+        md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
+        RRETURN(MATCH_ONCE); 
+        }  
       if (*prev >= OP_SBRA)    /* Could match an empty string */
         {
         md->match_function_type = MATCH_CBEGROUP; 
@@ -1824,7 +1806,15 @@
       {
       if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; 
       RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
+      if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+      if (*prev == OP_ONCE)
+        {
+        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
+        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+        md->once_target = prev;
+        RRETURN(MATCH_ONCE); 
+        }  
       ecode += 1 + LINK_SIZE;
       goto TAIL_RECURSE;
       }
@@ -5707,7 +5697,8 @@
   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
-  LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63)
+  LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
+  LBL(65) LBL(66) 
 #ifdef SUPPORT_UTF8
   LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
   LBL(32) LBL(34) LBL(42) LBL(46)


Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h    2011-07-12 11:00:10 UTC (rev 617)
+++ code/trunk/pcre_internal.h    2011-07-16 17:24:16 UTC (rev 618)
@@ -1765,11 +1765,9 @@


 typedef struct recursion_info {
   struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
-  int group_num;                /* Number of group that was called */
-  const uschar *after_call;     /* "Return value": points after the call in the expr */
-  int *offset_save;             /* Pointer to start of saved offsets */
-  int saved_max;                /* Number of saved offsets */
-  int save_offset_top;          /* Current value of offset_top */
+  int group_num;                  /* Number of group that was called */
+  int *offset_save;               /* Pointer to start of saved offsets */
+  int saved_max;                  /* Number of saved offsets */
 } recursion_info;


 /* Structure for building a chain of data for holding the values of the subject
@@ -1827,6 +1825,7 @@
   recursion_info *recursive;    /* Linked list of recursion data */
   void  *callout_data;          /* To pass back to callouts */
   const  uschar *mark;          /* Mark pointer to pass back */
+  const  uschar *once_target;   /* Where to back up to for atomic groups */ 
 } match_data;


/* A similar structure is used for the same purpose by the DFA matching

Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1    2011-07-12 11:00:10 UTC (rev 617)
+++ code/trunk/testdata/testinput1    2011-07-16 17:24:16 UTC (rev 618)
@@ -4157,4 +4157,22 @@
 /(?>(?>(a))b|(a)c)/
     ac


+/(?:(?>([ab])))+a=/+
+    =ba=
+
+/(?>([ab]))+a=/+
+    =ba=
+
+/((?>(a+)b)+(aabab))/
+    aaaabaaabaabab
+
+/(?>a+|ab)+?c/
+    aabc
+
+/(?>a+|ab)+c/
+    aabc
+
+/(?:a+|ab)+c/
+    aabc
+
 /-- End of testinput1 --/


Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2011-07-12 11:00:10 UTC (rev 617)
+++ code/trunk/testdata/testinput2    2011-07-16 17:24:16 UTC (rev 618)
@@ -3757,4 +3757,10 @@
     c
     c\N  


+/(?:(?>(a)))+a%/++
+    %aa%
+
+/(a)b|ac/++
+    ac\O3
+
 /-- End of testinput2 --/


Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1    2011-07-12 11:00:10 UTC (rev 617)
+++ code/trunk/testdata/testoutput1    2011-07-16 17:24:16 UTC (rev 618)
@@ -6801,4 +6801,35 @@
  1: <unset>
  2: a


+/(?:(?>([ab])))+a=/+
+    =ba=
+ 0: ba=
+ 0+ 
+ 1: b
+
+/(?>([ab]))+a=/+
+    =ba=
+ 0: ba=
+ 0+ 
+ 1: b
+
+/((?>(a+)b)+(aabab))/
+    aaaabaaabaabab
+ 0: aaaabaaabaabab
+ 1: aaaabaaabaabab
+ 2: aaa
+ 3: aabab
+
+/(?>a+|ab)+?c/
+    aabc
+No match
+
+/(?>a+|ab)+c/
+    aabc
+No match
+
+/(?:a+|ab)+c/
+    aabc
+ 0: aabc
+
 /-- End of testinput1 --/


Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2011-07-12 11:00:10 UTC (rev 617)
+++ code/trunk/testdata/testoutput2    2011-07-16 17:24:16 UTC (rev 618)
@@ -11895,4 +11895,17 @@
     c\N  
 No match


+/(?:(?>(a)))+a%/++
+    %aa%
+ 0: aa%
+ 0+ 
+ 1: a
+ 1+ a%
+
+/(a)b|ac/++
+    ac\O3
+Matched, but too many substrings
+ 0: ac
+ 0+ 
+
 /-- End of testinput2 --/