[Pcre-svn] [133] code/trunk: Fix bug for (*ACCEPT) inside a …

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [133] code/trunk: Fix bug for (*ACCEPT) inside a capturing group.
Revision: 133
          http://www.exim.org/viewvc/pcre2?view=rev&revision=133
Author:   ph10
Date:     2014-11-05 16:05:19 +0000 (Wed, 05 Nov 2014)


Log Message:
-----------
Fix bug for (*ACCEPT) inside a capturing group.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/src/pcre2_match.c
    code/trunk/src/pcre2test.c
    code/trunk/testdata/testinput1
    code/trunk/testdata/testoutput1


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2014-11-03 18:27:56 UTC (rev 132)
+++ code/trunk/ChangeLog    2014-11-05 16:05:19 UTC (rev 133)
@@ -44,4 +44,11 @@
 information. This applied to any pattern with a group that matched no
 characters, for example: /(?:(?=.)|(?<!x))a/.


+7. When an (*ACCEPT) is triggered inside capturing parentheses, it arranges for
+those parentheses to be closed with whatever has been captured so far. However,
+it was failing to mark any other groups between the hightest capture so far and
+the currrent group as "unset". Thus, the ovector for those groups contained
+whatever was previously there. An example is the pattern /(x)|((*ACCEPT))/ when
+matched against "abcd".
+
****

Modified: code/trunk/src/pcre2_match.c
===================================================================
--- code/trunk/src/pcre2_match.c    2014-11-03 18:27:56 UTC (rev 132)
+++ code/trunk/src/pcre2_match.c    2014-11-05 16:05:19 UTC (rev 133)
@@ -1465,7 +1465,18 @@
       mb->ovector[offset] =
         mb->ovector[mb->offset_end - number];
       mb->ovector[offset+1] = eptr - mb->start_subject;
-      if (offset_top <= offset) offset_top = offset + 2;
+
+      /* If this group is at or above the current highwater mark, ensure that
+      any groups between the current high water mark and this group are marked
+      unset and then update the high water mark. */
+
+      if (offset >= offset_top)
+        {
+        register PCRE2_SIZE *iptr = mb->ovector + offset_top;
+        register PCRE2_SIZE *iend = mb->ovector + offset;
+        while (iptr < iend) *iptr++ = PCRE2_UNSET;
+        offset_top = offset + 2;
+        }
       }
     ecode += 1 + IMM2_SIZE;
     break;
@@ -6321,18 +6332,18 @@
 *           Match a Regular Expression           *
 *************************************************/


-/* This function applies a compiled re to a subject string and picks out
+/* This function applies a compiled pattern to a subject string and picks out
portions of the string if it matches. Two elements in the vector are set for
each substring: the offsets to the start and end of the substring.

 Arguments:
-  context         points a PCRE2 context
   code            points to the compiled expression
   subject         points to the subject string
   length          length of subject string (may contain binary zeros)
   start_offset    where to start in the subject string
   options         option bits
   match_data      points to a match_data block
+  mcontext        points a PCRE2 context


 Returns:          > 0 => success; value is the number of ovector pairs filled
                   = 0 => success, but ovector is not big enough


Modified: code/trunk/src/pcre2test.c
===================================================================
--- code/trunk/src/pcre2test.c    2014-11-03 18:27:56 UTC (rev 132)
+++ code/trunk/src/pcre2test.c    2014-11-05 16:05:19 UTC (rev 133)
@@ -163,6 +163,7 @@
 #define CFAIL_UNSET UINT32_MAX  /* Unset value for cfail fields */
 #define DFA_WS_DIMENSION 1000   /* Size of DFA workspace */
 #define DEFAULT_OVECCOUNT 15    /* Default ovector count */
+#define JUNK_OFFSET 0xdeadbeef  /* For initializing ovector */
 #define LOOPREPEAT 500000       /* Default loop count for timing */
 #define VERSION_SIZE 64         /* Size of buffer for the version strings */


@@ -4685,12 +4686,18 @@

for (gmatched = 0;; gmatched++)
{
+ PCRE2_SIZE j;
int capcount;
PCRE2_SIZE *ovector;
PCRE2_SIZE ovecsave[2];

ovector = FLD(match_data, ovector);

+  /* Fill the ovector with junk to detect elements that do not get set
+  when they should be. */
+    
+  for (j = 0; j < 2*dat_datctl.oveccount; j++) ovector[j] = JUNK_OFFSET;
+
   /* When matching is via pcre2_match(), we will detect the use of JIT via the
   stack callback function. */


@@ -4786,7 +4793,7 @@
       {
       PCRE2_SET_CALLOUT(dat_context, NULL, NULL);  /* No callout */
       }
-
+      
     /* Run a single DFA or NFA match. */


     if ((dat_datctl.control & CTL_DFA) != 0)
@@ -4887,14 +4894,27 @@
         fprintf(outfile, "Start of matched string is beyond its end - "
           "displaying from end to start.\n");
         }
+        
+      fprintf(outfile, "%2d: ", i/2);


-      fprintf(outfile, "%2d: ", i/2);
+      /* Check for an unset group */
+
       if (start == PCRE2_UNSET)
         {
         fprintf(outfile, "<unset>\n");
         continue;
         }


+      /* Check for silly offsets, in particular, values that have not been
+      set when they should have been. */ 
+        
+      if (start > ulen || end > ulen)
+        {
+        fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
+          start, end);
+        continue;    
+        }  
+ 
       /* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
       JIT, it is disabled above, with a comment.) When the match is done by the
       interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
@@ -4918,7 +4938,6 @@


         if (showallused)
           {
-          PCRE2_SIZE j;
           PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
           PCHARS(lmiddle, pp, start, end - start, utf, outfile);
           PCHARS(lright, pp, end, rightchar - end, utf, outfile);
@@ -4944,7 +4963,6 @@
             fprintf(outfile, " (JIT)");
           if (startchar != start)
             {
-            PCRE2_SIZE j;
             fprintf(outfile, "\n    ");
             for (j = 0; j < lleft; j++) fprintf(outfile, "^");
             }


Modified: code/trunk/testdata/testinput1
===================================================================
(Binary files differ)

Modified: code/trunk/testdata/testoutput1
===================================================================
(Binary files differ)