Revision: 503
http://vcs.pcre.org/viewvc?view=rev&revision=503
Author: ph10
Date: 2010-03-07 17:35:52 +0000 (Sun, 07 Mar 2010)
Log Message:
-----------
Fix incorrect compile time error for certain types of recursive patterns.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_compile.c
code/trunk/testdata/testinput11
code/trunk/testdata/testoutput11
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2010-03-07 12:05:20 UTC (rev 502)
+++ code/trunk/ChangeLog 2010-03-07 17:35:52 UTC (rev 503)
@@ -51,6 +51,12 @@
implementation of support for (*MARK) will need an extra pointer on the
stack; I have reserved it now, so that the stack frame size does not
decrease.
+
+13. A pattern such as (?P<L1>(?P<L2>0)|(?P>L2)(?P>L1)) in which the only other
+ item in branch that calls a recursion is a subroutine call - as in the
+ second branch in the above example - was incorrectly given the compile-
+ time error "recursive call could loop indefinitely" because pcre_compile()
+ was not correctly checking the subroutine for matching a non-empty string.
Version 8.01 19-Jan-2010
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2010-03-07 12:05:20 UTC (rev 502)
+++ code/trunk/pcre_compile.c 2010-03-07 17:35:52 UTC (rev 503)
@@ -1785,12 +1785,14 @@
code points to start of search
endcode points to where to stop
utf8 TRUE if in UTF8 mode
+ cd contains pointers to tables etc.
Returns: TRUE if what is matched could be empty
*/
static BOOL
-could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8)
+could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8,
+ compile_data *cd)
{
register int c;
for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE);
@@ -1800,7 +1802,7 @@
const uschar *ccode;
c = *code;
-
+
/* Skip over forward assertions; the other assertions are skipped by
first_significant_code() with a TRUE final argument. */
@@ -1820,6 +1822,22 @@
c = *code;
continue;
}
+
+ /* For a recursion/subroutine call, if its end has been reached, which
+ implies a subroutine call, we can scan it. */
+
+ if (c == OP_RECURSE)
+ {
+ const uschar *scode = cd->start_code + GET(code, 1);
+ if (GET(scode, 1) == 0) return TRUE; /* Unclosed */
+ do
+ {
+ if (!could_be_empty_branch(scode, endcode, utf8, cd)) return FALSE;
+ scode += GET(scode, 1);
+ }
+ while (*scode == OP_ALT);
+ continue;
+ }
/* For other groups, scan the branches. */
@@ -1839,7 +1857,7 @@
empty_branch = FALSE;
do
{
- if (!empty_branch && could_be_empty_branch(code, endcode, utf8))
+ if (!empty_branch && could_be_empty_branch(code, endcode, utf8, cd))
empty_branch = TRUE;
code += GET(code, 1);
}
@@ -1973,6 +1991,11 @@
if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];
break;
#endif
+
+ /* None of the remaining opcodes are required to match a character. */
+
+ default:
+ break;
}
}
@@ -1995,17 +2018,18 @@
endcode points to where to stop (current RECURSE item)
bcptr points to the chain of current (unclosed) branch starts
utf8 TRUE if in UTF-8 mode
+ cd pointers to tables etc
Returns: TRUE if what is matched could be empty
*/
static BOOL
could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,
- BOOL utf8)
+ BOOL utf8, compile_data *cd)
{
while (bcptr != NULL && bcptr->current_branch >= code)
{
- if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8))
+ if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8, cd))
return FALSE;
bcptr = bcptr->outer;
}
@@ -4363,7 +4387,7 @@
uschar *scode = bracode;
do
{
- if (could_be_empty_branch(scode, ketcode, utf8))
+ if (could_be_empty_branch(scode, ketcode, utf8, cd))
{
*bracode += OP_SBRA - OP_BRA;
break;
@@ -5176,7 +5200,7 @@
recursion that could loop for ever, and diagnose that case. */
else if (GET(called, 1) == 0 &&
- could_be_empty(called, code, bcptr, utf8))
+ could_be_empty(called, code, bcptr, utf8, cd))
{
*errorcodeptr = ERR40;
goto FAILED;
Modified: code/trunk/testdata/testinput11
===================================================================
--- code/trunk/testdata/testinput11 2010-03-07 12:05:20 UTC (rev 502)
+++ code/trunk/testdata/testinput11 2010-03-07 17:35:52 UTC (rev 503)
@@ -379,4 +379,14 @@
a(b)c
a(b(c)d)e
+/(?P<L1>(?P<L2>0)(?P>L1)|(?P>L2))/
+ 0
+ 00
+ 0000
+
+/(?P<L1>(?P<L2>0)|(?P>L2)(?P>L1))/
+ 0
+ 00
+ 0000
+
/-- End of testinput11 --/
Modified: code/trunk/testdata/testoutput11
===================================================================
--- code/trunk/testdata/testoutput11 2010-03-07 12:05:20 UTC (rev 502)
+++ code/trunk/testdata/testoutput11 2010-03-07 17:35:52 UTC (rev 503)
@@ -776,4 +776,31 @@
0: a(b(c)d)e
1: e
+/(?P<L1>(?P<L2>0)(?P>L1)|(?P>L2))/
+ 0
+ 0: 0
+ 1: 0
+ 00
+ 0: 00
+ 1: 00
+ 2: 0
+ 0000
+ 0: 0000
+ 1: 0000
+ 2: 0
+
+/(?P<L1>(?P<L2>0)|(?P>L2)(?P>L1))/
+ 0
+ 0: 0
+ 1: 0
+ 2: 0
+ 00
+ 0: 0
+ 1: 0
+ 2: 0
+ 0000
+ 0: 0
+ 1: 0
+ 2: 0
+
/-- End of testinput11 --/