Revision: 620
http://vcs.pcre.org/viewvc?view=rev&revision=620
Author: ph10
Date: 2011-07-17 14:53:14 +0100 (Sun, 17 Jul 2011)
Log Message:
-----------
Fix mutual recursion minimum calculation in study bug.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_study.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2011-07-17 13:23:14 UTC (rev 619)
+++ code/trunk/ChangeLog 2011-07-17 13:53:14 UTC (rev 620)
@@ -137,6 +137,13 @@
24. If an assertion condition captured any substrings, they were not passed
back unless some other capturing happened later. For example, if
(?(?=(a))a) was matched against "a", no capturing was returned.
+
+25. When studying a pattern that contained subroutine calls or assertions,
+ the code for finding the minimum length of a possible match was handling
+ direct recursions such as (xxx(?1)|yyy) but not mutual recursions (where
+ group 1 called group 2 while simultaneously a separate group 2 called group
+ 1). A stack overflow occurred in this case. I have fixed this by limiting
+ the recursion depth to 10.
Version 8.12 15-Jan-2011
Modified: code/trunk/pcre_study.c
===================================================================
--- code/trunk/pcre_study.c 2011-07-17 13:23:14 UTC (rev 619)
+++ code/trunk/pcre_study.c 2011-07-17 13:53:14 UTC (rev 620)
@@ -70,6 +70,7 @@
startcode pointer to start of the whole pattern
options the compiling options
had_accept pointer to flag for (*ACCEPT) encountered
+ int RECURSE depth
Returns: the minimum length
-1 if \C was encountered
@@ -79,7 +80,7 @@
static int
find_minlength(const uschar *code, const uschar *startcode, int options,
- BOOL *had_accept_ptr)
+ BOOL *had_accept_ptr, int recurse_depth)
{
int length = -1;
BOOL utf8 = (options & PCRE_UTF8) != 0;
@@ -127,7 +128,7 @@
case OP_BRAPOS:
case OP_SBRAPOS:
case OP_ONCE:
- d = find_minlength(cc, startcode, options, had_accept_ptr);
+ d = find_minlength(cc, startcode, options, had_accept_ptr, recurse_depth);
if (d < 0) return d;
branchlength += d;
if (*had_accept_ptr) return branchlength;
@@ -378,7 +379,8 @@
}
else
{
- d = find_minlength(cs, startcode, options, had_accept_ptr);
+ d = find_minlength(cs, startcode, options, had_accept_ptr,
+ recurse_depth);
*had_accept_ptr = FALSE;
}
}
@@ -416,16 +418,20 @@
branchlength += min * d;
break;
+
+ /* We can easily detect direct recursion, but not mutual recursion. This is
+ caught by a recursion depth count. */
case OP_RECURSE:
cs = ce = (uschar *)startcode + GET(cc, 1);
if (cs == NULL) return -2;
do ce += GET(ce, 1); while (*ce == OP_ALT);
- if (cc > cs && cc < ce)
+ if ((cc > cs && cc < ce) || recurse_depth > 10)
had_recurse = TRUE;
else
{
- branchlength += find_minlength(cs, startcode, options, had_accept_ptr);
+ branchlength += find_minlength(cs, startcode, options, had_accept_ptr,
+ recurse_depth + 1);
*had_accept_ptr = FALSE;
}
cc += 1 + LINK_SIZE;
@@ -1275,7 +1281,7 @@
/* Find the minimum length of subject string. */
-switch(min = find_minlength(code, code, re->options, &had_accept))
+switch(min = find_minlength(code, code, re->options, &had_accept, 0))
{
case -2: *errorptr = "internal error: missing capturing bracket"; break;
case -3: *errorptr = "internal error: opcode not recognized"; break;
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2011-07-17 13:23:14 UTC (rev 619)
+++ code/trunk/testdata/testinput2 2011-07-17 13:53:14 UTC (rev 620)
@@ -3763,4 +3763,12 @@
/(a)b|ac/++
ac\O3
+/(?(DEFINE)(a(?2)|b)(b(?1)|a))(?:(?1)|(?2))/SI
+
+/(a(?2)|b)(b(?1)|a)(?:(?1)|(?2))/SI
+
+/(a(?2)|b)(b(?1)|a)(?1)(?2)/SI
+
+/(abc)(?1)/SI
+
/-- End of testinput2 --/
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2011-07-17 13:23:14 UTC (rev 619)
+++ code/trunk/testdata/testoutput2 2011-07-17 13:53:14 UTC (rev 620)
@@ -11908,4 +11908,36 @@
0: ac
0+
+/(?(DEFINE)(a(?2)|b)(b(?1)|a))(?:(?1)|(?2))/SI
+Capturing subpattern count = 2
+No options
+No first char
+No need char
+Subject length lower bound = 1
+No set of starting bytes
+
+/(a(?2)|b)(b(?1)|a)(?:(?1)|(?2))/SI
+Capturing subpattern count = 2
+No options
+No first char
+No need char
+Subject length lower bound = 3
+Starting byte set: a b
+
+/(a(?2)|b)(b(?1)|a)(?1)(?2)/SI
+Capturing subpattern count = 2
+No options
+No first char
+No need char
+Subject length lower bound = 4
+Starting byte set: a b
+
+/(abc)(?1)/SI
+Capturing subpattern count = 1
+No options
+First char = 'a'
+Need char = 'c'
+Subject length lower bound = 6
+No set of starting bytes
+
/-- End of testinput2 --/