Revision: 241
http://www.exim.org/viewvc/pcre2?view=rev&revision=241
Author: ph10
Date: 2015-04-01 16:37:54 +0100 (Wed, 01 Apr 2015)
Log Message:
-----------
Fix stack overflow instead of error diagnosis for mutual recursion within
lookbehind assertion.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_compile.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-04-01 13:56:11 UTC (rev 240)
+++ code/trunk/ChangeLog 2015-04-01 15:37:54 UTC (rev 241)
@@ -63,7 +63,11 @@
it should have been signed. Some other "int" variables, having been checked,
have either been changed to uint32_t or commented as "must be signed".
+16. A mutual recursion within a lookbehind assertion such as (?<=((?2))((?1)))
+caused a stack overflow instead of the diagnosis of a non-fixed length
+lookbehind assertion. This bug was discovered by the LLVM fuzzer.
+
Version 10.10 06-March-2015
---------------------------
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2015-04-01 13:56:11 UTC (rev 240)
+++ code/trunk/src/pcre2_compile.c 2015-04-01 15:37:54 UTC (rev 241)
@@ -75,8 +75,8 @@
const uint32_t *, unsigned int);
static BOOL
- compile_regex(uint32_t, PCRE2_UCHAR **, PCRE2_SPTR *, int *, BOOL, BOOL,
- uint32_t, int, uint32_t *, int32_t *, uint32_t *, int32_t *,
+ compile_regex(uint32_t, PCRE2_UCHAR **, PCRE2_SPTR *, int *, BOOL, BOOL,
+ uint32_t, int, uint32_t *, int32_t *, uint32_t *, int32_t *,
branch_chain *, compile_block *, size_t *);
@@ -677,6 +677,15 @@
};
+/* Structure for checking for mutual recursion when scanning compiled code. */
+
+typedef struct recurse_check {
+ struct recurse_check *prev;
+ PCRE2_SPTR group;
+} recurse_check;
+
+
+
/*************************************************
* Free compiled code *
*************************************************/
@@ -785,6 +794,7 @@
utf TRUE in UTF mode
atend TRUE if called when the pattern is complete
cb the "compile data" structure
+ recurses chain of recurse_check to catch mutual recursion
Returns: the fixed length,
or -1 if there is no fixed length,
@@ -794,10 +804,11 @@
*/
static int
-find_fixedlength(PCRE2_UCHAR *code, BOOL utf, BOOL atend, compile_block *cb)
+find_fixedlength(PCRE2_UCHAR *code, BOOL utf, BOOL atend, compile_block *cb,
+ recurse_check *recurses)
{
int length = -1;
-
+recurse_check this_recurse;
register int branchlength = 0;
register PCRE2_UCHAR *cc = code + 1 + LINK_SIZE;
@@ -822,7 +833,8 @@
case OP_ONCE:
case OP_ONCE_NC:
case OP_COND:
- d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cb);
+ d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cb,
+ recurses);
if (d < 0) return d;
branchlength += d;
do cc += GET(cc, 1); while (*cc == OP_ALT);
@@ -853,10 +865,18 @@
case OP_RECURSE:
if (!atend) return -3;
- cs = ce = (PCRE2_UCHAR *)cb->start_code + GET(cc, 1); /* Start subpattern */
+ cs = ce = (PCRE2_UCHAR *)cb->start_code + GET(cc, 1); /* Start subpattern */
do ce += GET(ce, 1); while (*ce == OP_ALT); /* End subpattern */
if (cc > cs && cc < ce) return -1; /* Recursion */
- d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cb);
+ else /* Check for mutual recursion */
+ {
+ recurse_check *r = recurses;
+ for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
+ if (r != NULL) return -1; /* Mutual recursion */
+ }
+ this_recurse.prev = recurses;
+ this_recurse.group = cs;
+ d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cb, &this_recurse);
if (d < 0) return d;
branchlength += d;
cc += 1 + LINK_SIZE;
@@ -1196,11 +1216,6 @@
Returns: TRUE if what is matched could be empty
*/
-typedef struct recurse_check {
- struct recurse_check *prev;
- PCRE2_SPTR group;
-} recurse_check;
-
static BOOL
could_be_empty_branch(PCRE2_SPTR code, PCRE2_SPTR endcode, BOOL utf,
compile_block *cb, recurse_check *recurses)
@@ -7037,7 +7052,7 @@
int fixed_length;
*code = OP_END;
fixed_length = find_fixedlength(last_branch, (options & PCRE2_UTF) != 0,
- FALSE, cb);
+ FALSE, cb, NULL);
if (fixed_length == -3)
{
cb->check_lookbehind = TRUE;
@@ -8075,7 +8090,7 @@
PCRE2_UCHAR *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);
int end_op = *be;
*be = OP_END;
- fixed_length = find_fixedlength(cc, utf, TRUE, &cb);
+ fixed_length = find_fixedlength(cc, utf, TRUE, &cb, NULL);
*be = end_op;
if (fixed_length < 0)
{
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2015-04-01 13:56:11 UTC (rev 240)
+++ code/trunk/testdata/testinput2 2015-04-01 15:37:54 UTC (rev 241)
@@ -4253,4 +4253,6 @@
/(?<=\bABQ(3(?+7)))/
+";(?<=()((?3))((?2)))"
+
# End of testinput2
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2015-04-01 13:56:11 UTC (rev 240)
+++ code/trunk/testdata/testoutput2 2015-04-01 15:37:54 UTC (rev 241)
@@ -14257,4 +14257,7 @@
/(?<=\bABQ(3(?+7)))/
Failed: error 115 at offset 15: reference to non-existent subpattern
+";(?<=()((?3))((?2)))"
+Failed: error 125 at offset 20: lookbehind assertion is not fixed length
+
# End of testinput2