[Pcre-svn] [1274] code/trunk: Fix the case where there are t…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [1274] code/trunk: Fix the case where there are two or more SKIPs that may have to be ignored .
Revision: 1274
          http://vcs.pcre.org/viewvc?view=rev&revision=1274
Author:   ph10
Date:     2013-03-08 11:35:41 +0000 (Fri, 08 Mar 2013)


Log Message:
-----------
Fix the case where there are two or more SKIPs that may have to be ignored.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_exec.c
    code/trunk/pcre_internal.h
    code/trunk/testdata/testinput1
    code/trunk/testdata/testoutput1


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2013-03-08 10:25:02 UTC (rev 1273)
+++ code/trunk/ChangeLog    2013-03-08 11:35:41 UTC (rev 1274)
@@ -101,7 +101,10 @@


26. Fix infinite loop when /(?<=(*SKIP)ac)a/ is matched against aa.

+27. Fix the case where there are two or more SKIPs with arguments that may be 
+    ignored. 


+
Version 8.32 30-November-2012
-----------------------------


Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c    2013-03-08 10:25:02 UTC (rev 1273)
+++ code/trunk/pcre_exec.c    2013-03-08 11:35:41 UTC (rev 1274)
@@ -821,11 +821,16 @@
     RRETURN(MATCH_SKIP);


     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
-    nomatch_mark. There is a flag that disables this opcode when re-matching a
-    pattern that ended with a SKIP for which there was not a matching MARK. */
+    nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was 
+    not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
+    that failed and any that preceed it (either they also failed, or were not 
+    triggered). To do this, we maintain a count of executed SKIP_ARGs. If a 
+    SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
+    set to the count of the one that failed. */


     case OP_SKIP_ARG:
-    if (md->ignore_skip_arg)
+    md->skip_arg_count++;
+    if (md->skip_arg_count <= md->ignore_skip_arg)
       {
       ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
       break;
@@ -834,11 +839,11 @@
       eptrb, RM57);
     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
       RRETURN(rrc);
-
+      
     /* Pass back the current skip name by overloading md->start_match_ptr and
     returning the special MATCH_SKIP_ARG return code. This will either be
     caught by a matching MARK, or get to the top, where it causes a rematch
-    with the md->ignore_skip_arg flag set. */
+    with md->ignore_skip_arg set to the value of md->skip_arg_count. */


     md->start_match_ptr = ecode + 2;
     RRETURN(MATCH_SKIP_ARG);
@@ -6516,7 +6521,7 @@
 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
 md->use_ucp = (re->options & PCRE_UCP) != 0;
 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
-md->ignore_skip_arg = FALSE;
+md->ignore_skip_arg = 0;


 /* Some options are unpacked into BOOL variables in the hope that testing
 them will be faster than individual option bits. */
@@ -6898,6 +6903,7 @@
   md->match_call_count = 0;
   md->match_function_type = 0;
   md->end_offset_top = 0;
+  md->skip_arg_count = 0;
   rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
   if (md->hitend && start_partial == NULL)
     {
@@ -6916,7 +6922,7 @@


     case MATCH_SKIP_ARG:
     new_start_match = start_match;
-    md->ignore_skip_arg = TRUE;
+    md->ignore_skip_arg = md->skip_arg_count;
     break;


     /* SKIP passes back the next starting point explicitly, but if it is no
@@ -6931,12 +6937,12 @@
     /* Fall through */


     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
-    exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */
+    exactly like PRUNE. Unset ignore SKIP-with-argument. */


     case MATCH_NOMATCH:
     case MATCH_PRUNE:
     case MATCH_THEN:
-    md->ignore_skip_arg = FALSE;
+    md->ignore_skip_arg = 0;
     new_start_match = start_match + 1;
 #ifdef SUPPORT_UTF
     if (utf)


Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h    2013-03-08 10:25:02 UTC (rev 1273)
+++ code/trunk/pcre_internal.h    2013-03-08 11:35:41 UTC (rev 1274)
@@ -2469,6 +2469,8 @@
   int    nllen;                   /* Newline string length */
   int    name_count;              /* Number of names in name table */
   int    name_entry_size;         /* Size of entry in names table */
+  unsigned int skip_arg_count;    /* For counting SKIP_ARGs */
+  unsigned int ignore_skip_arg;   /* For re-run when SKIP arg name not found */
   pcre_uchar *name_table;         /* Table of names */
   pcre_uchar nl[4];               /* Newline string when fixed */
   const  pcre_uint8 *lcc;         /* Points to lower casing table */
@@ -2485,7 +2487,6 @@
   BOOL   hitend;                  /* Hit the end of the subject at some point */
   BOOL   bsr_anycrlf;             /* \R is just any CRLF, not full Unicode */
   BOOL   hasthen;                 /* Pattern contains (*THEN) */
-  BOOL   ignore_skip_arg;         /* For re-run when SKIP name not found */
   const  pcre_uchar *start_code;  /* For use when recursing */
   PCRE_PUCHAR start_subject;      /* Start of the subject string */
   PCRE_PUCHAR end_subject;        /* End of the subject string */


Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1    2013-03-08 10:25:02 UTC (rev 1273)
+++ code/trunk/testdata/testinput1    2013-03-08 11:35:41 UTC (rev 1274)
@@ -5315,4 +5315,16 @@
 /(?<=(*SKIP)ac)a/
     aa


+/A(*MARK:A)A+(*SKIP:B)(B|Z) | AC/xK
+    AAAC
+
+/a(*SKIP:m)x|ac(*:n)(*SKIP:n)d|ac/K
+    acacd
+
+/A(*SKIP:m)x|A(*SKIP:n)x|AB/K
+    AB
+
+/((*SKIP:r)d){0}a(*SKIP:m)x|ac(*:n)|ac/K
+    acacd
+
 /-- End of testinput1 --/


Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1    2013-03-08 10:25:02 UTC (rev 1273)
+++ code/trunk/testdata/testoutput1    2013-03-08 11:35:41 UTC (rev 1274)
@@ -8811,4 +8811,22 @@
     aa
 No match


+/A(*MARK:A)A+(*SKIP:B)(B|Z) | AC/xK
+    AAAC
+ 0: AC
+
+/a(*SKIP:m)x|ac(*:n)(*SKIP:n)d|ac/K
+    acacd
+ 0: acd
+MK: n
+
+/A(*SKIP:m)x|A(*SKIP:n)x|AB/K
+    AB
+ 0: AB
+
+/((*SKIP:r)d){0}a(*SKIP:m)x|ac(*:n)|ac/K
+    acacd
+ 0: ac
+MK: n
+
 /-- End of testinput1 --/