[Pcre-svn] [697] code/trunk: Fix minimum length bug with *AC…

トップ ページ
このメッセージを削除
著者: Subversion repository
日付:  
To: pcre-svn
題目: [Pcre-svn] [697] code/trunk: Fix minimum length bug with *ACCEPT.
Revision: 697
          http://vcs.pcre.org/viewvc?view=rev&revision=697
Author:   ph10
Date:     2011-09-19 13:28:24 +0100 (Mon, 19 Sep 2011)


Log Message:
-----------
Fix minimum length bug with *ACCEPT.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/configure.ac
    code/trunk/pcre_study.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2011-09-18 15:09:49 UTC (rev 696)
+++ code/trunk/ChangeLog    2011-09-19 12:28:24 UTC (rev 697)
@@ -41,6 +41,13 @@
    (*PRUNE) or any other control that caused it to give a non-standard return,
    invalid errors such as "Error -26 (nested recursion at the same subject
    position)" or even infinite loops could occur.
+   
+7. If a pattern such as /a(*SKIP)c|b(*ACCEPT)|/ was studied, it stopped 
+   computing the minimum length on reaching *ACCEPT, and so ended up with the 
+   wrong value of 1 rather than 0. Further investigation indicates that
+   computing a minimum subject length in the presence of *ACCEPT is difficult
+   (think back references, subroutine calls), and so I have changed the code so
+   that no minimum is registered for a pattern that contains *ACCEPT.



Version 8.13 16-Aug-2011

Modified: code/trunk/configure.ac
===================================================================
--- code/trunk/configure.ac    2011-09-18 15:09:49 UTC (rev 696)
+++ code/trunk/configure.ac    2011-09-19 12:28:24 UTC (rev 697)
@@ -10,8 +10,8 @@


m4_define(pcre_major, [8])
m4_define(pcre_minor, [20])
-m4_define(pcre_prerelease, [-RC1])
-m4_define(pcre_date, [2011-09-12])
+m4_define(pcre_prerelease, [-RC2])
+m4_define(pcre_date, [2011-09-18])

# Libtool shared library interface versions (current:revision:age)
m4_define(libpcre_version, [0:1:0])

Modified: code/trunk/pcre_study.c
===================================================================
--- code/trunk/pcre_study.c    2011-09-18 15:09:49 UTC (rev 696)
+++ code/trunk/pcre_study.c    2011-09-19 12:28:24 UTC (rev 697)
@@ -66,21 +66,20 @@
 rather than bytes.


 Arguments:
-  code        pointer to start of group (the bracket)
-  startcode   pointer to start of the whole pattern
-  options     the compiling options
-  had_accept  pointer to flag for (*ACCEPT) encountered
-  int         RECURSE depth
+  code            pointer to start of group (the bracket)
+  startcode       pointer to start of the whole pattern
+  options         the compiling options
+  int             RECURSE depth


 Returns:   the minimum length
-           -1 if \C was encountered
+           -1 if \C in UTF-8 mode or (*ACCEPT) was encountered
            -2 internal error (missing capturing bracket)
            -3 internal error (opcode not listed)
 */


 static int
 find_minlength(const uschar *code, const uschar *startcode, int options,
-  BOOL *had_accept_ptr, int recurse_depth)
+  int recurse_depth)
 {
 int length = -1;
 BOOL utf8 = (options & PCRE_UTF8) != 0;
@@ -128,24 +127,25 @@
     case OP_BRAPOS:
     case OP_SBRAPOS:
     case OP_ONCE:
-    d = find_minlength(cc, startcode, options, had_accept_ptr, recurse_depth);
+    d = find_minlength(cc, startcode, options, recurse_depth);
     if (d < 0) return d;
     branchlength += d;
-    if (*had_accept_ptr) return branchlength;
     do cc += GET(cc, 1); while (*cc == OP_ALT);
     cc += 1 + LINK_SIZE;
     break;


-    /* Reached end of a branch; if it's a ket it is the end of a nested
-    call. If it's ALT it is an alternation in a nested call. If it is END it's
-    the end of the outer call. All can be handled by the same code. If it is
-    ACCEPT, it is essentially the same as END, but we set a flag so that
-    counting stops. */
+    /* ACCEPT makes things far too complicated; we have to give up. */


     case OP_ACCEPT:
     case OP_ASSERT_ACCEPT:
-    *had_accept_ptr = TRUE;
-    /* Fall through */
+    return -1; 
+
+    /* Reached end of a branch; if it's a ket it is the end of a nested
+    call. If it's ALT it is an alternation in a nested call. If it is END it's
+    the end of the outer call. All can be handled by the same code. If an 
+    ACCEPT was previously encountered, use the length that was in force at that 
+    time, and pass back the shortest ACCEPT length. */
+ 
     case OP_ALT:
     case OP_KET:
     case OP_KETRMAX:
@@ -379,9 +379,7 @@
         }
       else
         {
-        d = find_minlength(cs, startcode, options, had_accept_ptr,
-          recurse_depth);
-        *had_accept_ptr = FALSE;
+        d = find_minlength(cs, startcode, options, recurse_depth);
         }
       }
     else d = 0;
@@ -430,9 +428,7 @@
       had_recurse = TRUE;
     else
       {
-      branchlength += find_minlength(cs, startcode, options, had_accept_ptr,
-        recurse_depth + 1);
-      *had_accept_ptr = FALSE;
+      branchlength += find_minlength(cs, startcode, options, recurse_depth + 1);
       }
     cc += 1 + LINK_SIZE;
     break;
@@ -1228,7 +1224,6 @@
 {
 int min;
 BOOL bits_set = FALSE;
-BOOL had_accept = FALSE;
 uschar start_bits[32];
 pcre_extra *extra = NULL;
 pcre_study_data *study;
@@ -1290,13 +1285,13 @@


/* Find the minimum length of subject string. */

-switch(min = find_minlength(code, code, re->options, &had_accept, 0))
+switch(min = find_minlength(code, code, re->options, 0))
{
case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
default: break;
}
-
+
/* If a set of starting bytes has been identified, or if the minimum length is
greater than zero, or if JIT optimization has been requested, get a pcre_extra
block and a pcre_study_data block. The study data is put in the latter, which
@@ -1336,10 +1331,14 @@
/* Always set the minlength value in the block, because the JIT compiler
makes use of it. However, don't set the bit unless the length is greater than
zero - the interpretive pcre_exec() and pcre_dfa_exec() needn't waste time
- checking this case. */
+ checking the zero case. */

-  study->minlength = min;
-  if (min > 0) study->flags |= PCRE_STUDY_MINLEN;
+  if (min > 0) 
+    {
+    study->flags |= PCRE_STUDY_MINLEN;
+    study->minlength = min;
+    }
+  else study->minlength = 0;    


/* If JIT support was compiled and requested, attempt the JIT compilation.
If no starting bytes were found, and the minimum length is zero, and JIT

Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2011-09-18 15:09:49 UTC (rev 696)
+++ code/trunk/testdata/testinput2    2011-09-19 12:28:24 UTC (rev 697)
@@ -3848,4 +3848,15 @@


/\btype\b\W*?\btext\b\W*?\bjavascript\b|\burl\b\W*?\bshell:|<input\b.*?\btype\b\W*?\bimage\b|\bonkeyup\b\W*?\=/IS

+/a(*SKIP)c|b(*ACCEPT)|/+SI
+    a
+
+/a(*SKIP)c|b(*ACCEPT)cd(*ACCEPT)|x/SI
+    ax
+
+'a*(*ACCEPT)b'+
+    \N\N
+    abc\N\N
+    bbb\N\N 
+
 /-- End of testinput2 --/


Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2011-09-18 15:09:49 UTC (rev 696)
+++ code/trunk/testdata/testoutput2    2011-09-19 12:28:24 UTC (rev 697)
@@ -12251,4 +12251,34 @@
 Subject length lower bound = 8
 Starting byte set: < o t u 


+/a(*SKIP)c|b(*ACCEPT)|/+SI
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+Study returned NULL
+    a
+ 0: 
+ 0+ 
+
+/a(*SKIP)c|b(*ACCEPT)cd(*ACCEPT)|x/SI
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+Subject length lower bound = -1
+Starting byte set: a b x 
+    ax
+ 0: x
+
+'a*(*ACCEPT)b'+
+    \N\N
+No match
+    abc\N\N
+ 0: a
+ 0+ bc
+    bbb\N\N 
+ 0: 
+ 0+ bb
+
 /-- End of testinput2 --/