[Pcre-svn] [620] code/trunk: Fix mutual recursion minimum ca…

トップ ページ
このメッセージを削除
著者: Subversion repository
日付:  
To: pcre-svn
題目: [Pcre-svn] [620] code/trunk: Fix mutual recursion minimum calculation in study bug.
Revision: 620
          http://vcs.pcre.org/viewvc?view=rev&revision=620
Author:   ph10
Date:     2011-07-17 14:53:14 +0100 (Sun, 17 Jul 2011)


Log Message:
-----------
Fix mutual recursion minimum calculation in study bug.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_study.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2011-07-17 13:23:14 UTC (rev 619)
+++ code/trunk/ChangeLog    2011-07-17 13:53:14 UTC (rev 620)
@@ -137,6 +137,13 @@
 24. If an assertion condition captured any substrings, they were not passed 
     back unless some other capturing happened later. For example, if
     (?(?=(a))a) was matched against "a", no capturing was returned.
+    
+25. When studying a pattern that contained subroutine calls or assertions,
+    the code for finding the minimum length of a possible match was handling 
+    direct recursions such as (xxx(?1)|yyy) but not mutual recursions (where 
+    group 1 called group 2 while simultaneously a separate group 2 called group
+    1). A stack overflow occurred in this case. I have fixed this by limiting
+    the recursion depth to 10.



Version 8.12 15-Jan-2011

Modified: code/trunk/pcre_study.c
===================================================================
--- code/trunk/pcre_study.c    2011-07-17 13:23:14 UTC (rev 619)
+++ code/trunk/pcre_study.c    2011-07-17 13:53:14 UTC (rev 620)
@@ -70,6 +70,7 @@
   startcode   pointer to start of the whole pattern
   options     the compiling options
   had_accept  pointer to flag for (*ACCEPT) encountered
+  int         RECURSE depth 


 Returns:   the minimum length
            -1 if \C was encountered
@@ -79,7 +80,7 @@


 static int
 find_minlength(const uschar *code, const uschar *startcode, int options,
-  BOOL *had_accept_ptr)
+  BOOL *had_accept_ptr, int recurse_depth)
 {
 int length = -1;
 BOOL utf8 = (options & PCRE_UTF8) != 0;
@@ -127,7 +128,7 @@
     case OP_BRAPOS:
     case OP_SBRAPOS:
     case OP_ONCE:
-    d = find_minlength(cc, startcode, options, had_accept_ptr);
+    d = find_minlength(cc, startcode, options, had_accept_ptr, recurse_depth);
     if (d < 0) return d;
     branchlength += d;
     if (*had_accept_ptr) return branchlength; 
@@ -378,7 +379,8 @@
         }
       else 
         {
-        d = find_minlength(cs, startcode, options, had_accept_ptr);
+        d = find_minlength(cs, startcode, options, had_accept_ptr, 
+          recurse_depth);
         *had_accept_ptr = FALSE; 
         } 
       }
@@ -416,16 +418,20 @@


     branchlength += min * d;
     break;
+    
+    /* We can easily detect direct recursion, but not mutual recursion. This is 
+    caught by a recursion depth count. */ 


     case OP_RECURSE:
     cs = ce = (uschar *)startcode + GET(cc, 1);
     if (cs == NULL) return -2;
     do ce += GET(ce, 1); while (*ce == OP_ALT);
-    if (cc > cs && cc < ce)
+    if ((cc > cs && cc < ce) || recurse_depth > 10)
       had_recurse = TRUE;
     else
       { 
-      branchlength += find_minlength(cs, startcode, options, had_accept_ptr);
+      branchlength += find_minlength(cs, startcode, options, had_accept_ptr,
+        recurse_depth + 1);
       *had_accept_ptr = FALSE;
       }  
     cc += 1 + LINK_SIZE;
@@ -1275,7 +1281,7 @@


/* Find the minimum length of subject string. */

-switch(min = find_minlength(code, code, re->options, &had_accept))
+switch(min = find_minlength(code, code, re->options, &had_accept, 0))
{
case -2: *errorptr = "internal error: missing capturing bracket"; break;
case -3: *errorptr = "internal error: opcode not recognized"; break;

Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2011-07-17 13:23:14 UTC (rev 619)
+++ code/trunk/testdata/testinput2    2011-07-17 13:53:14 UTC (rev 620)
@@ -3763,4 +3763,12 @@
 /(a)b|ac/++
     ac\O3


+/(?(DEFINE)(a(?2)|b)(b(?1)|a))(?:(?1)|(?2))/SI
+
+/(a(?2)|b)(b(?1)|a)(?:(?1)|(?2))/SI
+
+/(a(?2)|b)(b(?1)|a)(?1)(?2)/SI
+
+/(abc)(?1)/SI
+
/-- End of testinput2 --/

Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2011-07-17 13:23:14 UTC (rev 619)
+++ code/trunk/testdata/testoutput2    2011-07-17 13:53:14 UTC (rev 620)
@@ -11908,4 +11908,36 @@
  0: ac
  0+ 


+/(?(DEFINE)(a(?2)|b)(b(?1)|a))(?:(?1)|(?2))/SI
+Capturing subpattern count = 2
+No options
+No first char
+No need char
+Subject length lower bound = 1
+No set of starting bytes
+
+/(a(?2)|b)(b(?1)|a)(?:(?1)|(?2))/SI
+Capturing subpattern count = 2
+No options
+No first char
+No need char
+Subject length lower bound = 3
+Starting byte set: a b
+
+/(a(?2)|b)(b(?1)|a)(?1)(?2)/SI
+Capturing subpattern count = 2
+No options
+No first char
+No need char
+Subject length lower bound = 4
+Starting byte set: a b
+
+/(abc)(?1)/SI
+Capturing subpattern count = 1
+No options
+First char = 'a'
+Need char = 'c'
+Subject length lower bound = 6
+No set of starting bytes
+
/-- End of testinput2 --/