Revision: 232
http://www.exim.org/viewvc/pcre2?view=rev&revision=232
Author: ph10
Date: 2015-03-25 17:01:04 +0000 (Wed, 25 Mar 2015)
Log Message:
-----------
Add recursion limit to auto-possessification code.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_auto_possess.c
code/trunk/testdata/testinput1
code/trunk/testdata/testoutput1
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-03-24 10:21:34 UTC (rev 231)
+++ code/trunk/ChangeLog 2015-03-25 17:01:04 UTC (rev 232)
@@ -25,7 +25,12 @@
pcre2_match() it worked by luck; in pcre2_dfa_match() it gave an incorrect
error about an unsupported item.
+8. For some types of pattern, for example /Z*(|d*){216}/, the auto-
+possessification code could take exponential time to complete. A recursion
+depth limit of 10000 has been imposed to limit the resources used by this
+optimization. This infelicity was discovered by the LLVM fuzzer.
+
Version 10.10 06-March-2015
---------------------------
Modified: code/trunk/src/pcre2_auto_possess.c
===================================================================
--- code/trunk/src/pcre2_auto_possess.c 2015-03-24 10:21:34 UTC (rev 231)
+++ code/trunk/src/pcre2_auto_possess.c 2015-03-25 17:01:04 UTC (rev 232)
@@ -561,13 +561,15 @@
utf TRUE in UTF mode
cb compile data block
base_list the data list of the base opcode
+ base_end the end of the data list
+ rec_limit points to recursion depth counter
Returns: TRUE if the auto-possessification is possible
*/
static BOOL
compare_opcodes(PCRE2_SPTR code, BOOL utf, const compile_block *cb,
- const uint32_t *base_list, PCRE2_SPTR base_end)
+ const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit)
{
PCRE2_UCHAR c;
uint32_t list[8];
@@ -584,6 +586,8 @@
BOOL accepted, invert_bits;
BOOL entered_a_group = FALSE;
+if (--(*rec_limit) <= 0) return FALSE; /* Recursion has gone too deep */
+
/* Note: the base_list[1] contains whether the current opcode has a greedy
(represented by a non-zero value) quantifier. This is a different from
other character type lists, which store here that the character iterator
@@ -660,7 +664,8 @@
while (*next_code == OP_ALT)
{
- if (!compare_opcodes(code, utf, cb, base_list, base_end)) return FALSE;
+ if (!compare_opcodes(code, utf, cb, base_list, base_end, rec_limit))
+ return FALSE;
code = next_code + 1 + LINK_SIZE;
next_code += GET(next_code, 1);
}
@@ -680,7 +685,7 @@
/* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
next_code += 1 + LINK_SIZE;
- if (!compare_opcodes(next_code, utf, cb, base_list, base_end))
+ if (!compare_opcodes(next_code, utf, cb, base_list, base_end, rec_limit))
return FALSE;
code += PRIV(OP_lengths)[c];
@@ -1116,6 +1121,7 @@
PCRE2_SPTR end;
PCRE2_UCHAR *repeat_opcode;
uint32_t list[8];
+int rec_limit;
for (;;)
{
@@ -1130,7 +1136,8 @@
get_chr_property_list(code, utf, cb->fcc, list) : NULL;
list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
- if (end != NULL && compare_opcodes(end, utf, cb, list, end))
+ rec_limit = 10000;
+ if (end != NULL && compare_opcodes(end, utf, cb, list, end, &rec_limit))
{
switch(c)
{
@@ -1186,7 +1193,8 @@
list[1] = (c & 1) == 0;
- if (compare_opcodes(end, utf, cb, list, end))
+ rec_limit = 10000;
+ if (compare_opcodes(end, utf, cb, list, end, &rec_limit))
{
switch (c)
{
Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1 2015-03-24 10:21:34 UTC (rev 231)
+++ code/trunk/testdata/testinput1 2015-03-25 17:01:04 UTC (rev 232)
@@ -5710,4 +5710,6 @@
/(\2)(\1)/
+"Z*(|d*){216}"
+
# End of testinput1
Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1 2015-03-24 10:21:34 UTC (rev 231)
+++ code/trunk/testdata/testoutput1 2015-03-25 17:01:04 UTC (rev 232)
@@ -9420,4 +9420,6 @@
/(\2)(\1)/
+"Z*(|d*){216}"
+
# End of testinput1