Revision: 211
http://www.exim.org/viewvc/pcre2?view=rev&revision=211
Author: ph10
Date: 2015-02-28 11:31:51 +0000 (Sat, 28 Feb 2015)
Log Message:
-----------
Fix "internal error" bug caused by patterns like "((?2){0,1999}())?".
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_compile.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-02-26 17:36:29 UTC (rev 210)
+++ code/trunk/ChangeLog 2015-02-28 11:31:51 UTC (rev 211)
@@ -97,7 +97,15 @@
21. "make distclean" was not removing config.h, a file that is created for use
with CMake.
+22. A pattern such as "((?2){0,1999}())?", which has a group containing a
+forward reference repeated a large (but limited) number of times within a
+repeated outer group that has a zero minimum quantifier, caused incorrect code
+to be compiled, leading to the error "internal error: previously-checked
+referenced subpattern not found" when an incorrect memory address was read.
+This bug was reported as "heap overflow", discovered by Kai Lu of Fortinet's
+FortiGuard Labs.
+
Version 10.00 05-January-2015
-----------------------------
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2015-02-26 17:36:29 UTC (rev 210)
+++ code/trunk/src/pcre2_compile.c 2015-02-28 11:31:51 UTC (rev 211)
@@ -2586,18 +2586,18 @@
value in the reference (which is a group number).
Arguments:
- group points to the start of the group
- adjust the amount by which the group is to be moved
- utf TRUE in UTF mode
- cb compile data
- save_hwm the hwm forward reference pointer at the start of the group
+ group points to the start of the group
+ adjust the amount by which the group is to be moved
+ utf TRUE in UTF mode
+ cb compile data
+ save_hwm_offset the hwm forward reference offset at the start of the group
Returns: nothing
*/
static void
adjust_recurse(PCRE2_UCHAR *group, int adjust, BOOL utf, compile_block *cb,
- PCRE2_UCHAR *save_hwm)
+ size_t save_hwm_offset)
{
PCRE2_UCHAR *ptr = group;
@@ -2609,7 +2609,8 @@
/* See if this recursion is on the forward reference list. If so, adjust the
reference. */
- for (hc = save_hwm; hc < cb->hwm; hc += LINK_SIZE)
+ for (hc = (PCRE2_UCHAR *)cb->start_workspace + save_hwm_offset; hc < cb->hwm;
+ hc += LINK_SIZE)
{
offset = (int)GET(hc, 0);
if (cb->start_code + offset == ptr + 1)
@@ -3093,7 +3094,7 @@
PCRE2_SPTR nestptr = NULL;
PCRE2_UCHAR *previous = NULL;
PCRE2_UCHAR *previous_callout = NULL;
-PCRE2_UCHAR *save_hwm = NULL;
+size_t save_hwm_offset = 0;
uint8_t classbits[32];
/* We can fish out the UTF setting once and for all into a BOOL, but we must
@@ -4565,7 +4566,7 @@
if (repeat_max <= 1) /* Covers 0, 1, and unlimited */
{
*code = OP_END;
- adjust_recurse(previous, 1, utf, cb, save_hwm);
+ adjust_recurse(previous, 1, utf, cb, save_hwm_offset);
memmove(previous + 1, previous, CU2BYTES(len));
code++;
if (repeat_max == 0)
@@ -4589,7 +4590,7 @@
{
int offset;
*code = OP_END;
- adjust_recurse(previous, 2 + LINK_SIZE, utf, cb, save_hwm);
+ adjust_recurse(previous, 2 + LINK_SIZE, utf, cb, save_hwm_offset);
memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len));
code += 2 + LINK_SIZE;
*previous++ = OP_BRAZERO + repeat_type;
@@ -4652,26 +4653,25 @@
for (i = 1; i < repeat_min; i++)
{
PCRE2_UCHAR *hc;
- PCRE2_UCHAR *this_hwm = cb->hwm;
+ size_t this_hwm_offset = cb->hwm - cb->start_workspace;
memcpy(code, previous, CU2BYTES(len));
while (cb->hwm > cb->start_workspace + cb->workspace_size -
- WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm))
+ WORK_SIZE_SAFETY_MARGIN -
+ (this_hwm_offset - save_hwm_offset))
{
- size_t save_offset = save_hwm - cb->start_workspace;
- size_t this_offset = this_hwm - cb->start_workspace;
*errorcodeptr = expand_workspace(cb);
if (*errorcodeptr != 0) goto FAILED;
- save_hwm = (PCRE2_UCHAR *)cb->start_workspace + save_offset;
- this_hwm = (PCRE2_UCHAR *)cb->start_workspace + this_offset;
}
- for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
+ for (hc = (PCRE2_UCHAR *)cb->start_workspace + save_hwm_offset;
+ hc < (PCRE2_UCHAR *)cb->start_workspace + this_hwm_offset;
+ hc += LINK_SIZE)
{
PUT(cb->hwm, 0, GET(hc, 0) + len);
cb->hwm += LINK_SIZE;
}
- save_hwm = this_hwm;
+ save_hwm_offset = this_hwm_offset;
code += len;
}
}
@@ -4716,7 +4716,7 @@
else for (i = repeat_max - 1; i >= 0; i--)
{
PCRE2_UCHAR *hc;
- PCRE2_UCHAR *this_hwm = cb->hwm;
+ size_t this_hwm_offset = cb->hwm - cb->start_workspace;
*code++ = OP_BRAZERO + repeat_type;
@@ -4738,22 +4738,21 @@
copying them. */
while (cb->hwm > cb->start_workspace + cb->workspace_size -
- WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm))
+ WORK_SIZE_SAFETY_MARGIN -
+ (this_hwm_offset - save_hwm_offset))
{
- size_t save_offset = save_hwm - cb->start_workspace;
- size_t this_offset = this_hwm - cb->start_workspace;
*errorcodeptr = expand_workspace(cb);
if (*errorcodeptr != 0) goto FAILED;
- save_hwm = (PCRE2_UCHAR *)cb->start_workspace + save_offset;
- this_hwm = (PCRE2_UCHAR *)cb->start_workspace + this_offset;
}
- for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
+ for (hc = (PCRE2_UCHAR *)cb->start_workspace + save_hwm_offset;
+ hc < (PCRE2_UCHAR *)cb->start_workspace + this_hwm_offset;
+ hc += LINK_SIZE)
{
PUT(cb->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));
cb->hwm += LINK_SIZE;
}
- save_hwm = this_hwm;
+ save_hwm_offset = this_hwm_offset;
code += len;
}
@@ -4849,7 +4848,7 @@
{
int nlen = (int)(code - bracode);
*code = OP_END;
- adjust_recurse(bracode, 1 + LINK_SIZE, utf, cb, save_hwm);
+ adjust_recurse(bracode, 1 + LINK_SIZE, utf, cb, save_hwm_offset);
memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen));
code += 1 + LINK_SIZE;
nlen += 1 + LINK_SIZE;
@@ -4984,7 +4983,7 @@
else
{
*code = OP_END;
- adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cb, save_hwm);
+ adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cb, save_hwm_offset);
memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len));
code += 1 + LINK_SIZE;
len += 1 + LINK_SIZE;
@@ -5009,13 +5008,17 @@
/* ===================================================================*/
/* Start of nested parenthesized sub-expression, or comment or lookahead or
lookbehind or option setting or condition or all the other extended
- parenthesis forms. */
+ parenthesis forms. We must save the current high-water-mark for the
+ forward reference list so that we know where they start for this group.
+ However, because the list may be extended when there are very many forward
+ references (usually the result of a replicated inner group), we must use
+ an offset rather than an absolute address. */
case CHAR_LEFT_PARENTHESIS:
newoptions = options;
skipbytes = 0;
bravalue = OP_CBRA;
- save_hwm = cb->hwm;
+ save_hwm_offset = cb->hwm - cb->start_workspace;
reset_bracount = FALSE;
/* First deal with various "verbs" that can be introduced by '*'. */
@@ -5972,7 +5975,8 @@
/* Fudge the value of "called" so that when it is inserted as an
offset below, what it actually inserted is the reference number
- of the group. Then remember the forward reference. */
+ of the group. Then remember the forward reference, expanding the
+ working space where the list is kept if necessary. */
called = cb->start_code + recno;
if (cb->hwm >= cb->start_workspace + cb->workspace_size -
@@ -6395,7 +6399,9 @@
PCRE2_SPTR p;
uint32_t cf;
- save_hwm = cb->hwm; /* Normally this is set when '(' is read */
+ /* Normally save_hwm_offset is set when '(' is read */
+
+ save_hwm_offset = cb->hwm - cb->start_workspace;
terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
@@ -6933,7 +6939,7 @@
{
*code = OP_END;
adjust_recurse(start_bracket, 1 + LINK_SIZE,
- (options & PCRE2_UTF) != 0, cb, cb->hwm);
+ (options & PCRE2_UTF) != 0, cb, cb->hwm - cb->start_workspace);
memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
CU2BYTES(code - start_bracket));
*start_bracket = OP_ONCE;
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2015-02-26 17:36:29 UTC (rev 210)
+++ code/trunk/testdata/testinput2 2015-02-28 11:31:51 UTC (rev 211)
@@ -4171,5 +4171,9 @@
'^(?:a)*+(\w)'
g
g\=ovector=1
+
+# This pattern showed up a compile-time bug
+"((?2){0,1999}())?"
+
# End of testinput2
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2015-02-26 17:36:29 UTC (rev 210)
+++ code/trunk/testdata/testoutput2 2015-02-28 11:31:51 UTC (rev 211)
@@ -13949,5 +13949,9 @@
g\=ovector=1
Matched, but too many substrings
0: g
+
+# This pattern showed up a compile-time bug
+"((?2){0,1999}())?"
+
# End of testinput2