Revision: 1274
http://vcs.pcre.org/viewvc?view=rev&revision=1274
Author: ph10
Date: 2013-03-08 11:35:41 +0000 (Fri, 08 Mar 2013)
Log Message:
-----------
Fix the case where there are two or more SKIPs that may have to be ignored.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_exec.c
code/trunk/pcre_internal.h
code/trunk/testdata/testinput1
code/trunk/testdata/testoutput1
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2013-03-08 10:25:02 UTC (rev 1273)
+++ code/trunk/ChangeLog 2013-03-08 11:35:41 UTC (rev 1274)
@@ -101,7 +101,10 @@
26. Fix infinite loop when /(?<=(*SKIP)ac)a/ is matched against aa.
+27. Fix the case where there are two or more SKIPs with arguments that may be
+ ignored.
+
Version 8.32 30-November-2012
-----------------------------
Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c 2013-03-08 10:25:02 UTC (rev 1273)
+++ code/trunk/pcre_exec.c 2013-03-08 11:35:41 UTC (rev 1274)
@@ -821,11 +821,16 @@
RRETURN(MATCH_SKIP);
/* Note that, for Perl compatibility, SKIP with an argument does NOT set
- nomatch_mark. There is a flag that disables this opcode when re-matching a
- pattern that ended with a SKIP for which there was not a matching MARK. */
+ nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
+ not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
+ that failed and any that preceed it (either they also failed, or were not
+ triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
+ SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
+ set to the count of the one that failed. */
case OP_SKIP_ARG:
- if (md->ignore_skip_arg)
+ md->skip_arg_count++;
+ if (md->skip_arg_count <= md->ignore_skip_arg)
{
ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
break;
@@ -834,11 +839,11 @@
eptrb, RM57);
if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
RRETURN(rrc);
-
+
/* Pass back the current skip name by overloading md->start_match_ptr and
returning the special MATCH_SKIP_ARG return code. This will either be
caught by a matching MARK, or get to the top, where it causes a rematch
- with the md->ignore_skip_arg flag set. */
+ with md->ignore_skip_arg set to the value of md->skip_arg_count. */
md->start_match_ptr = ecode + 2;
RRETURN(MATCH_SKIP_ARG);
@@ -6516,7 +6521,7 @@
md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
md->use_ucp = (re->options & PCRE_UCP) != 0;
md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
-md->ignore_skip_arg = FALSE;
+md->ignore_skip_arg = 0;
/* Some options are unpacked into BOOL variables in the hope that testing
them will be faster than individual option bits. */
@@ -6898,6 +6903,7 @@
md->match_call_count = 0;
md->match_function_type = 0;
md->end_offset_top = 0;
+ md->skip_arg_count = 0;
rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
if (md->hitend && start_partial == NULL)
{
@@ -6916,7 +6922,7 @@
case MATCH_SKIP_ARG:
new_start_match = start_match;
- md->ignore_skip_arg = TRUE;
+ md->ignore_skip_arg = md->skip_arg_count;
break;
/* SKIP passes back the next starting point explicitly, but if it is no
@@ -6931,12 +6937,12 @@
/* Fall through */
/* NOMATCH and PRUNE advance by one character. THEN at this level acts
- exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */
+ exactly like PRUNE. Unset ignore SKIP-with-argument. */
case MATCH_NOMATCH:
case MATCH_PRUNE:
case MATCH_THEN:
- md->ignore_skip_arg = FALSE;
+ md->ignore_skip_arg = 0;
new_start_match = start_match + 1;
#ifdef SUPPORT_UTF
if (utf)
Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h 2013-03-08 10:25:02 UTC (rev 1273)
+++ code/trunk/pcre_internal.h 2013-03-08 11:35:41 UTC (rev 1274)
@@ -2469,6 +2469,8 @@
int nllen; /* Newline string length */
int name_count; /* Number of names in name table */
int name_entry_size; /* Size of entry in names table */
+ unsigned int skip_arg_count; /* For counting SKIP_ARGs */
+ unsigned int ignore_skip_arg; /* For re-run when SKIP arg name not found */
pcre_uchar *name_table; /* Table of names */
pcre_uchar nl[4]; /* Newline string when fixed */
const pcre_uint8 *lcc; /* Points to lower casing table */
@@ -2485,7 +2487,6 @@
BOOL hitend; /* Hit the end of the subject at some point */
BOOL bsr_anycrlf; /* \R is just any CRLF, not full Unicode */
BOOL hasthen; /* Pattern contains (*THEN) */
- BOOL ignore_skip_arg; /* For re-run when SKIP name not found */
const pcre_uchar *start_code; /* For use when recursing */
PCRE_PUCHAR start_subject; /* Start of the subject string */
PCRE_PUCHAR end_subject; /* End of the subject string */
Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1 2013-03-08 10:25:02 UTC (rev 1273)
+++ code/trunk/testdata/testinput1 2013-03-08 11:35:41 UTC (rev 1274)
@@ -5315,4 +5315,16 @@
/(?<=(*SKIP)ac)a/
aa
+/A(*MARK:A)A+(*SKIP:B)(B|Z) | AC/xK
+ AAAC
+
+/a(*SKIP:m)x|ac(*:n)(*SKIP:n)d|ac/K
+ acacd
+
+/A(*SKIP:m)x|A(*SKIP:n)x|AB/K
+ AB
+
+/((*SKIP:r)d){0}a(*SKIP:m)x|ac(*:n)|ac/K
+ acacd
+
/-- End of testinput1 --/
Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1 2013-03-08 10:25:02 UTC (rev 1273)
+++ code/trunk/testdata/testoutput1 2013-03-08 11:35:41 UTC (rev 1274)
@@ -8811,4 +8811,22 @@
aa
No match
+/A(*MARK:A)A+(*SKIP:B)(B|Z) | AC/xK
+ AAAC
+ 0: AC
+
+/a(*SKIP:m)x|ac(*:n)(*SKIP:n)d|ac/K
+ acacd
+ 0: acd
+MK: n
+
+/A(*SKIP:m)x|A(*SKIP:n)x|AB/K
+ AB
+ 0: AB
+
+/((*SKIP:r)d){0}a(*SKIP:m)x|ac(*:n)|ac/K
+ acacd
+ 0: ac
+MK: n
+
/-- End of testinput1 --/