Revision: 613
http://vcs.pcre.org/viewvc?view=rev&revision=613
Author: ph10
Date: 2011-07-02 17:59:52 +0100 (Sat, 02 Jul 2011)
Log Message:
-----------
Fix problem with the interaction of (*ACCEPT) in an assertion with
PCRE_NOTEMPTY.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_compile.c
code/trunk/pcre_dfa_exec.c
code/trunk/pcre_exec.c
code/trunk/pcre_internal.h
code/trunk/pcre_study.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2011-07-02 15:20:59 UTC (rev 612)
+++ code/trunk/ChangeLog 2011-07-02 16:59:52 UTC (rev 613)
@@ -110,6 +110,9 @@
20. If /S is present twice on a test pattern in pcretest input, it *disables*
studying, thereby overriding the use of -s on the command line. This is
necessary for one or two tests to keep the output identical in both cases.
+
+21. When (*ACCEPT) was used in an assertion that matched an empty string and
+ PCRE_NOTEMPTY was set, PCRE applied the non-empty test to the assertion.
Version 8.12 15-Jan-2011
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2011-07-02 15:20:59 UTC (rev 612)
+++ code/trunk/pcre_compile.c 2011-07-02 16:59:52 UTC (rev 613)
@@ -4931,22 +4931,29 @@
if (namelen == verbs[i].len &&
strncmp((char *)name, vn, namelen) == 0)
{
- /* Check for open captures before ACCEPT */
+ /* Check for open captures before ACCEPT and convert it to
+ ASSERT_ACCEPT if in an assertion. */
if (verbs[i].op == OP_ACCEPT)
{
open_capitem *oc;
+ if (arglen != 0)
+ {
+ *errorcodeptr = ERR59;
+ goto FAILED;
+ }
cd->had_accept = TRUE;
for (oc = cd->open_caps; oc != NULL; oc = oc->next)
{
*code++ = OP_CLOSE;
PUT2INC(code, 0, oc->number);
}
+ *code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
}
- /* Handle the cases with/without an argument */
+ /* Handle other cases with/without an argument */
- if (arglen == 0)
+ else if (arglen == 0)
{
if (verbs[i].op < 0) /* Argument is mandatory */
{
@@ -5235,6 +5242,7 @@
/* ------------------------------------------------------------ */
case CHAR_EQUALS_SIGN: /* Positive lookahead */
bravalue = OP_ASSERT;
+ cd->assert_depth += 1;
ptr++;
break;
@@ -5249,6 +5257,7 @@
continue;
}
bravalue = OP_ASSERT_NOT;
+ cd->assert_depth += 1;
break;
@@ -5258,11 +5267,13 @@
{
case CHAR_EQUALS_SIGN: /* Positive lookbehind */
bravalue = OP_ASSERTBACK;
+ cd->assert_depth += 1;
ptr += 2;
break;
case CHAR_EXCLAMATION_MARK: /* Negative lookbehind */
bravalue = OP_ASSERTBACK_NOT;
+ cd->assert_depth += 1;
ptr += 2;
break;
@@ -5830,6 +5841,9 @@
&length_prevgroup /* Pre-compile phase */
))
goto FAILED;
+
+ if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)
+ cd->assert_depth -= 1;
/* At the end of compiling, code is still pointing to the start of the
group, while tempcode has been updated to point past the end of the group
@@ -7152,6 +7166,7 @@
*/
cd->final_bracount = cd->bracount; /* Save for checking forward references */
+cd->assert_depth = 0;
cd->bracount = 0;
cd->names_found = 0;
cd->name_table = (uschar *)re + re->name_table_offset;
Modified: code/trunk/pcre_dfa_exec.c
===================================================================
--- code/trunk/pcre_dfa_exec.c 2011-07-02 15:20:59 UTC (rev 612)
+++ code/trunk/pcre_dfa_exec.c 2011-07-02 16:59:52 UTC (rev 613)
@@ -170,9 +170,10 @@
0, 0, /* RREF, NRREF */
0, /* DEF */
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
- 0, 0, 0, /* MARK, PRUNE, PRUNE_ARG, */
- 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG, */
- 0, 0, 0, 0, 0 /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO */
+ 0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
+ 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
+ 0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
+ 0, 0 /* CLOSE, SKIPZERO */
};
/* This table identifies those opcodes that inspect a character. It is used to
@@ -237,9 +238,10 @@
0, 0, /* RREF, NRREF */
0, /* DEF */
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
- 0, 0, 0, /* MARK, PRUNE, PRUNE_ARG, */
- 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG, */
- 0, 0, 0, 0, 0 /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO */
+ 0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
+ 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
+ 0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
+ 0, 0 /* CLOSE, SKIPZERO */
};
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c 2011-07-02 15:20:59 UTC (rev 612)
+++ code/trunk/pcre_exec.c 2011-07-02 16:59:52 UTC (rev 613)
@@ -1296,6 +1296,7 @@
recursion, continue from after the call. */
case OP_ACCEPT:
+ case OP_ASSERT_ACCEPT:
case OP_END:
if (md->recursive != NULL)
{
@@ -1311,12 +1312,12 @@
}
}
- /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
- set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
- the subject. In both cases, backtracking will then try other alternatives,
- if any. */
+ /* Otherwise, if we have matched an empty string, fail if not in an
+ assertion and if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
+ is set and we have matched at the start of the subject. In both cases,
+ backtracking will then try other alternatives, if any. */
- else if (eptr == mstart &&
+ else if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
(md->notempty ||
(md->notempty_atstart &&
mstart == md->start_subject + md->start_offset)))
@@ -5899,12 +5900,17 @@
md->use_ucp = (re->options & PCRE_UCP) != 0;
md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
+/* Some options are unpacked into BOOL variables in the hope that testing
+them will be faster than individual option bits. */
+
md->notbol = (options & PCRE_NOTBOL) != 0;
md->noteol = (options & PCRE_NOTEOL) != 0;
md->notempty = (options & PCRE_NOTEMPTY) != 0;
md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
+
+
md->hitend = FALSE;
md->mark = NULL; /* In case never set */
Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h 2011-07-02 15:20:59 UTC (rev 612)
+++ code/trunk/pcre_internal.h 2011-07-02 16:59:52 UTC (rev 613)
@@ -1445,7 +1445,8 @@
OP_KETRMIN, /* 116 order. They are for groups the repeat for ever. */
OP_KETRPOS, /* 117 Possessive unlimited repeat. */
- /* The assertions must come before BRA, CBRA, ONCE, and COND.*/
+ /* The assertions must come before BRA, CBRA, ONCE, and COND, and the four
+ asserts must remain in order. */
OP_ASSERT, /* 118 Positive lookahead */
OP_ASSERT_NOT, /* 119 Negative lookahead */
@@ -1455,7 +1456,7 @@
/* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come after the assertions,
with ONCE first, as there's a test for >= ONCE for a subpattern that isn't an
- assertion. The POS versions must immediately follow the non-POS versions in
+ assertion. The POS versions must immediately follow the non-POS versions in
each case. */
OP_ONCE, /* 123 Atomic group */
@@ -1484,7 +1485,7 @@
OP_BRAZERO, /* 139 These two must remain together and in this */
OP_BRAMINZERO, /* 140 order. */
- OP_BRAPOSZERO, /* 141 */
+ OP_BRAPOSZERO, /* 141 */
/* These are backtracking control verbs */
@@ -1501,11 +1502,12 @@
OP_FAIL, /* 150 */
OP_ACCEPT, /* 151 */
- OP_CLOSE, /* 152 Used before OP_ACCEPT to close open captures */
+ OP_ASSERT_ACCEPT, /* 152 Used inside assertions */
+ OP_CLOSE, /* 153 Used before OP_ACCEPT to close open captures */
/* This is used to skip a subpattern with a {0} quantifier */
- OP_SKIPZERO, /* 153 */
+ OP_SKIPZERO, /* 154 */
/* This is not an opcode, but is used to check that tables indexed by opcode
are the correct length, in order to catch updating errors - there have been
@@ -1557,7 +1559,8 @@
"Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def", \
"Brazero", "Braminzero", "Braposzero", \
"*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \
- "*THEN", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT", \
+ "*THEN", "*THEN", "*COMMIT", "*FAIL", \
+ "*ACCEPT", "*ASSERT_ACCEPT", \
"Close", "Skip zero"
@@ -1639,7 +1642,8 @@
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \
1, 3, /* SKIP, SKIP_ARG */ \
1+LINK_SIZE, 3+LINK_SIZE, /* THEN, THEN_ARG */ \
- 1, 1, 1, 3, 1 /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO */ \
+ 1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \
+ 3, 1 /* CLOSE, SKIPZERO */
/* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion"
condition. */
@@ -1737,6 +1741,7 @@
int final_bracount; /* Saved value after first pass */
int top_backref; /* Maximum back reference */
unsigned int backref_map; /* Bitmap of low back refs */
+ int assert_depth; /* Depth of nested assertions */
int external_options; /* External (initial) options */
int external_flags; /* External flag bits to be set */
int req_varyopt; /* "After variable item" flag for reqbyte */
@@ -1793,8 +1798,8 @@
int name_entry_size; /* Size of entry in names table */
uschar *name_table; /* Table of names */
uschar nl[4]; /* Newline string when fixed */
- const uschar *lcc; /* Points to lower casing table */
- const uschar *ctypes; /* Points to table of type maps */
+ const uschar *lcc; /* Points to lower casing table */
+ const uschar *ctypes; /* Points to table of type maps */
BOOL offset_overflow; /* Set if too many extractions */
BOOL notbol; /* NOTBOL flag */
BOOL noteol; /* NOTEOL flag */
@@ -1806,7 +1811,7 @@
BOOL notempty_atstart; /* Empty string match at start not wanted */
BOOL hitend; /* Hit the end of the subject at some point */
BOOL bsr_anycrlf; /* \R is just any CRLF, not full Unicode */
- const uschar *start_code; /* For use when recursing */
+ const uschar *start_code; /* For use when recursing */
USPTR start_subject; /* Start of the subject string */
USPTR end_subject; /* End of the subject string */
USPTR start_match_ptr; /* Start of matched string */
@@ -1816,12 +1821,12 @@
int end_offset_top; /* Highwater mark at end of match */
int capture_last; /* Most recent capture number */
int start_offset; /* The start offset value */
- int match_function_type; /* Set for certain special calls of MATCH() */
+ int match_function_type; /* Set for certain special calls of MATCH() */
eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */
int eptrn; /* Next free eptrblock */
recursion_info *recursive; /* Linked list of recursion data */
void *callout_data; /* To pass back to callouts */
- const uschar *mark; /* Mark pointer to pass back */
+ const uschar *mark; /* Mark pointer to pass back */
} match_data;
/* A similar structure is used for the same purpose by the DFA matching
Modified: code/trunk/pcre_study.c
===================================================================
--- code/trunk/pcre_study.c 2011-07-02 15:20:59 UTC (rev 612)
+++ code/trunk/pcre_study.c 2011-07-02 16:59:52 UTC (rev 613)
@@ -142,6 +142,7 @@
counting stops. */
case OP_ACCEPT:
+ case OP_ASSERT_ACCEPT:
*had_accept_ptr = TRUE;
/* Fall through */
case OP_ALT:
@@ -715,6 +716,7 @@
/* Fail for a valid opcode that implies no starting bits. */
case OP_ACCEPT:
+ case OP_ASSERT_ACCEPT:
case OP_ALLANY:
case OP_ANY:
case OP_ANYBYTE:
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2011-07-02 15:20:59 UTC (rev 612)
+++ code/trunk/testdata/testinput2 2011-07-02 16:59:52 UTC (rev 613)
@@ -3745,4 +3745,16 @@
/-- --/
+"(?=a*(*ACCEPT)b)c"
+ c
+ c\N
+
+/(?1)c(?(DEFINE)((*ACCEPT)b))/
+ c
+ c\N
+
+/(?>(*ACCEPT)b)c/
+ c
+ c\N
+
/-- End of testinput2 --/
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2011-07-02 15:20:59 UTC (rev 612)
+++ code/trunk/testdata/testoutput2 2011-07-02 16:59:52 UTC (rev 613)
@@ -11877,4 +11877,22 @@
/-- --/
+"(?=a*(*ACCEPT)b)c"
+ c
+ 0: c
+ c\N
+ 0: c
+
+/(?1)c(?(DEFINE)((*ACCEPT)b))/
+ c
+ 0: c
+ c\N
+ 0: c
+
+/(?>(*ACCEPT)b)c/
+ c
+ 0:
+ c\N
+No match
+
/-- End of testinput2 --/