Revision: 133
http://www.exim.org/viewvc/pcre2?view=rev&revision=133
Author: ph10
Date: 2014-11-05 16:05:19 +0000 (Wed, 05 Nov 2014)
Log Message:
-----------
Fix bug for (*ACCEPT) inside a capturing group.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_match.c
code/trunk/src/pcre2test.c
code/trunk/testdata/testinput1
code/trunk/testdata/testoutput1
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2014-11-03 18:27:56 UTC (rev 132)
+++ code/trunk/ChangeLog 2014-11-05 16:05:19 UTC (rev 133)
@@ -44,4 +44,11 @@
information. This applied to any pattern with a group that matched no
characters, for example: /(?:(?=.)|(?<!x))a/.
+7. When an (*ACCEPT) is triggered inside capturing parentheses, it arranges for
+those parentheses to be closed with whatever has been captured so far. However,
+it was failing to mark any other groups between the hightest capture so far and
+the currrent group as "unset". Thus, the ovector for those groups contained
+whatever was previously there. An example is the pattern /(x)|((*ACCEPT))/ when
+matched against "abcd".
+
****
Modified: code/trunk/src/pcre2_match.c
===================================================================
--- code/trunk/src/pcre2_match.c 2014-11-03 18:27:56 UTC (rev 132)
+++ code/trunk/src/pcre2_match.c 2014-11-05 16:05:19 UTC (rev 133)
@@ -1465,7 +1465,18 @@
mb->ovector[offset] =
mb->ovector[mb->offset_end - number];
mb->ovector[offset+1] = eptr - mb->start_subject;
- if (offset_top <= offset) offset_top = offset + 2;
+
+ /* If this group is at or above the current highwater mark, ensure that
+ any groups between the current high water mark and this group are marked
+ unset and then update the high water mark. */
+
+ if (offset >= offset_top)
+ {
+ register PCRE2_SIZE *iptr = mb->ovector + offset_top;
+ register PCRE2_SIZE *iend = mb->ovector + offset;
+ while (iptr < iend) *iptr++ = PCRE2_UNSET;
+ offset_top = offset + 2;
+ }
}
ecode += 1 + IMM2_SIZE;
break;
@@ -6321,18 +6332,18 @@
* Match a Regular Expression *
*************************************************/
-/* This function applies a compiled re to a subject string and picks out
+/* This function applies a compiled pattern to a subject string and picks out
portions of the string if it matches. Two elements in the vector are set for
each substring: the offsets to the start and end of the substring.
Arguments:
- context points a PCRE2 context
code points to the compiled expression
subject points to the subject string
length length of subject string (may contain binary zeros)
start_offset where to start in the subject string
options option bits
match_data points to a match_data block
+ mcontext points a PCRE2 context
Returns: > 0 => success; value is the number of ovector pairs filled
= 0 => success, but ovector is not big enough
Modified: code/trunk/src/pcre2test.c
===================================================================
--- code/trunk/src/pcre2test.c 2014-11-03 18:27:56 UTC (rev 132)
+++ code/trunk/src/pcre2test.c 2014-11-05 16:05:19 UTC (rev 133)
@@ -163,6 +163,7 @@
#define CFAIL_UNSET UINT32_MAX /* Unset value for cfail fields */
#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
+#define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
#define LOOPREPEAT 500000 /* Default loop count for timing */
#define VERSION_SIZE 64 /* Size of buffer for the version strings */
@@ -4685,12 +4686,18 @@
for (gmatched = 0;; gmatched++)
{
+ PCRE2_SIZE j;
int capcount;
PCRE2_SIZE *ovector;
PCRE2_SIZE ovecsave[2];
ovector = FLD(match_data, ovector);
+ /* Fill the ovector with junk to detect elements that do not get set
+ when they should be. */
+
+ for (j = 0; j < 2*dat_datctl.oveccount; j++) ovector[j] = JUNK_OFFSET;
+
/* When matching is via pcre2_match(), we will detect the use of JIT via the
stack callback function. */
@@ -4786,7 +4793,7 @@
{
PCRE2_SET_CALLOUT(dat_context, NULL, NULL); /* No callout */
}
-
+
/* Run a single DFA or NFA match. */
if ((dat_datctl.control & CTL_DFA) != 0)
@@ -4887,14 +4894,27 @@
fprintf(outfile, "Start of matched string is beyond its end - "
"displaying from end to start.\n");
}
+
+ fprintf(outfile, "%2d: ", i/2);
- fprintf(outfile, "%2d: ", i/2);
+ /* Check for an unset group */
+
if (start == PCRE2_UNSET)
{
fprintf(outfile, "<unset>\n");
continue;
}
+ /* Check for silly offsets, in particular, values that have not been
+ set when they should have been. */
+
+ if (start > ulen || end > ulen)
+ {
+ fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
+ start, end);
+ continue;
+ }
+
/* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
JIT, it is disabled above, with a comment.) When the match is done by the
interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
@@ -4918,7 +4938,6 @@
if (showallused)
{
- PCRE2_SIZE j;
PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
PCHARS(lmiddle, pp, start, end - start, utf, outfile);
PCHARS(lright, pp, end, rightchar - end, utf, outfile);
@@ -4944,7 +4963,6 @@
fprintf(outfile, " (JIT)");
if (startchar != start)
{
- PCRE2_SIZE j;
fprintf(outfile, "\n ");
for (j = 0; j < lleft; j++) fprintf(outfile, "^");
}
Modified: code/trunk/testdata/testinput1
===================================================================
(Binary files differ)
Modified: code/trunk/testdata/testoutput1
===================================================================
(Binary files differ)