[Pcre-svn] [1525] code/trunk: Fix bug that did not allow zer…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [1525] code/trunk: Fix bug that did not allow zero case for (a) *+ when ovector was too small to
Revision: 1525
          http://vcs.pcre.org/viewvc?view=rev&revision=1525
Author:   ph10
Date:     2015-02-11 16:48:35 +0000 (Wed, 11 Feb 2015)


Log Message:
-----------
Fix bug that did not allow zero case for (a)*+ when ovector was too small to
capture.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_exec.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2015-02-10 16:49:16 UTC (rev 1524)
+++ code/trunk/ChangeLog    2015-02-11 16:48:35 UTC (rev 1525)
@@ -61,7 +61,11 @@
     so the above pattern is faulted by Perl. PCRE has now been changed so that
     it also rejects such patterns.


+12. A possessive capturing group such as (a)*+ with a minimum repeat of zero
+    failed to allow the zero-repeat case if pcre2_exec() was called with an
+    ovector too small to capture the group.


+
Version 8.36 26-September-2014
------------------------------


Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c    2015-02-10 16:49:16 UTC (rev 1524)
+++ code/trunk/pcre_exec.c    2015-02-11 16:48:35 UTC (rev 1525)
@@ -1136,94 +1136,82 @@
     printf("\n");
 #endif


-    if (offset < md->offset_max)
-      {
-      matched_once = FALSE;
-      code_offset = (int)(ecode - md->start_code);
+    if (offset >= md->offset_max) goto POSSESSIVE_NON_CAPTURE;


-      save_offset1 = md->offset_vector[offset];
-      save_offset2 = md->offset_vector[offset+1];
-      save_offset3 = md->offset_vector[md->offset_end - number];
-      save_capture_last = md->capture_last;
+    matched_once = FALSE;
+    code_offset = (int)(ecode - md->start_code);


-      DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
+    save_offset1 = md->offset_vector[offset];
+    save_offset2 = md->offset_vector[offset+1];
+    save_offset3 = md->offset_vector[md->offset_end - number];
+    save_capture_last = md->capture_last;


-      /* Each time round the loop, save the current subject position for use
-      when the group matches. For MATCH_MATCH, the group has matched, so we
-      restart it with a new subject starting position, remembering that we had
-      at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
-      usual. If we haven't matched any alternatives in any iteration, check to
-      see if a previous iteration matched. If so, the group has matched;
-      continue from afterwards. Otherwise it has failed; restore the previous
-      capture values before returning NOMATCH. */
+    DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));


-      for (;;)
+    /* Each time round the loop, save the current subject position for use
+    when the group matches. For MATCH_MATCH, the group has matched, so we
+    restart it with a new subject starting position, remembering that we had
+    at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
+    usual. If we haven't matched any alternatives in any iteration, check to
+    see if a previous iteration matched. If so, the group has matched;
+    continue from afterwards. Otherwise it has failed; restore the previous
+    capture values before returning NOMATCH. */
+
+    for (;;)
+      {
+      md->offset_vector[md->offset_end - number] =
+        (int)(eptr - md->start_subject);
+      if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
+      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
+        eptrb, RM63);
+      if (rrc == MATCH_KETRPOS)
         {
-        md->offset_vector[md->offset_end - number] =
-          (int)(eptr - md->start_subject);
-        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
-        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
-          eptrb, RM63);
-        if (rrc == MATCH_KETRPOS)
+        offset_top = md->end_offset_top;
+        ecode = md->start_code + code_offset;
+        save_capture_last = md->capture_last;
+        matched_once = TRUE;
+        mstart = md->start_match_ptr;    /* In case \K changed it */
+        if (eptr == md->end_match_ptr)   /* Matched an empty string */
           {
-          offset_top = md->end_offset_top;
-          ecode = md->start_code + code_offset;
-          save_capture_last = md->capture_last;
-          matched_once = TRUE;
-          mstart = md->start_match_ptr;    /* In case \K changed it */
-          if (eptr == md->end_match_ptr)   /* Matched an empty string */
-            {
-            do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
-            break;
-            }
-          eptr = md->end_match_ptr;
-          continue;
+          do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
+          break;
           }
-
-        /* See comment in the code for capturing groups above about handling
-        THEN. */
-
-        if (rrc == MATCH_THEN)
-          {
-          next = ecode + GET(ecode,1);
-          if (md->start_match_ptr < next &&
-              (*ecode == OP_ALT || *next == OP_ALT))
-            rrc = MATCH_NOMATCH;
-          }
-
-        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-        md->capture_last = save_capture_last;
-        ecode += GET(ecode, 1);
-        if (*ecode != OP_ALT) break;
+        eptr = md->end_match_ptr;
+        continue;
         }


-      if (!matched_once)
-        {
-        md->offset_vector[offset] = save_offset1;
-        md->offset_vector[offset+1] = save_offset2;
-        md->offset_vector[md->offset_end - number] = save_offset3;
-        }
+      /* See comment in the code for capturing groups above about handling
+      THEN. */


-      if (allow_zero || matched_once)
+      if (rrc == MATCH_THEN)
         {
-        ecode += 1 + LINK_SIZE;
-        break;
+        next = ecode + GET(ecode,1);
+        if (md->start_match_ptr < next &&
+            (*ecode == OP_ALT || *next == OP_ALT))
+          rrc = MATCH_NOMATCH;
         }


-      RRETURN(MATCH_NOMATCH);
+      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+      md->capture_last = save_capture_last;
+      ecode += GET(ecode, 1);
+      if (*ecode != OP_ALT) break;
       }


-    /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
-    as a non-capturing bracket. */
+    if (!matched_once)
+      {
+      md->offset_vector[offset] = save_offset1;
+      md->offset_vector[offset+1] = save_offset2;
+      md->offset_vector[md->offset_end - number] = save_offset3;
+      }


-    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
-    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
+    if (allow_zero || matched_once)
+      {
+      ecode += 1 + LINK_SIZE;
+      break;
+      }


-    DPRINTF(("insufficient capture room: treat as non-capturing\n"));
+    RRETURN(MATCH_NOMATCH);


-    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
-    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
-
     /* Non-capturing possessive bracket with unlimited repeat. We come here
     from BRAZERO with allow_zero = TRUE. The code is similar to the above,
     without the capturing complication. It is written out separately for speed
@@ -1404,11 +1392,11 @@
         condition = TRUE;


         /* Advance ecode past the assertion to the start of the first branch,
-        but adjust it so that the general choosing code below works. If the 
-        assertion has a quantifier that allows zero repeats we must skip over 
+        but adjust it so that the general choosing code below works. If the
+        assertion has a quantifier that allows zero repeats we must skip over
         the BRAZERO. This is a lunatic thing to do, but somebody did! */
-        
-        if (*ecode == OP_BRAZERO) ecode++; 
+
+        if (*ecode == OP_BRAZERO) ecode++;
         ecode += GET(ecode, 1);
         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
         ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
@@ -1840,11 +1828,11 @@
         are defined in a range that can be tested for. */


         if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
-          { 
+          {
           if (new_recursive.offset_save != stacksave)
             (PUBL(free))(new_recursive.offset_save);
           RRETURN(MATCH_NOMATCH);
-          } 
+          }


         /* Any return code other than NOMATCH is an error. */



Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2015-02-10 16:49:16 UTC (rev 1524)
+++ code/trunk/testdata/testinput2    2015-02-11 16:48:35 UTC (rev 1525)
@@ -4113,4 +4113,12 @@
      ** Failers
      356   


+'^(a)*+(\w)'
+    g
+    g\O3
+
+'^(?:a)*+(\w)'
+    g
+    g\O3
+
 /-- End of testinput2 --/


Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2015-02-10 16:49:16 UTC (rev 1524)
+++ code/trunk/testdata/testoutput2    2015-02-11 16:48:35 UTC (rev 1525)
@@ -14292,4 +14292,21 @@
      356   
 No match


+'^(a)*+(\w)'
+    g
+ 0: g
+ 1: <unset>
+ 2: g
+    g\O3
+Matched, but too many substrings
+ 0: g
+
+'^(?:a)*+(\w)'
+    g
+ 0: g
+ 1: g
+    g\O3
+Matched, but too many substrings
+ 0: g
+
 /-- End of testinput2 --/