Revision: 599
http://www.exim.org/viewvc/pcre2?view=rev&revision=599
Author: ph10
Date: 2016-11-18 18:59:37 +0000 (Fri, 18 Nov 2016)
Log Message:
-----------
Fix overrun bug caused by conditional with assertion using too much memory.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/HACKING
code/trunk/src/pcre2_compile.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput18
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2016-11-10 17:08:27 UTC (rev 598)
+++ code/trunk/ChangeLog 2016-11-18 18:59:37 UTC (rev 599)
@@ -82,6 +82,13 @@
(i) An insufficient memory size was being computed for compiling with
PCRE2_AUTO_CALLOUT.
+
+ (j) A conditional group with an assertion condition used more memory than was
+ allowed for it during parsing, so too many of them could therefore
+ overrun a buffer.
+
+ (k) If parsing a pattern exactly filled the buffer, the internal test for
+ overrun did not check when the final META_END item was added.
4. Back references are now permitted in lookbehind assertions when there are
no duplicated group numbers (that is, (?| has not been used), and, if the
Modified: code/trunk/HACKING
===================================================================
--- code/trunk/HACKING 2016-11-10 17:08:27 UTC (rev 598)
+++ code/trunk/HACKING 2016-11-18 18:59:37 UTC (rev 599)
@@ -186,6 +186,7 @@
META_CLASS_END ] end of non-empty class
META_CLASS_NOT [^ start non-empty negative class
META_COMMIT (*COMMIT)
+META_COND_ASSERT (?(?assertion)
META_DOLLAR $ metacharacter
META_DOT . metacharacter
META_END End of pattern (this value is 0x80000000)
@@ -274,9 +275,8 @@
META_COND_NUMBER (?([+-]digits)
-The following are followed just by an offset, for use in error messages:
+The following is followed just by an offset, for use in error messages:
-META_COND_ASSERT (?(?assertion)
META_COND_DEFINE (?(DEFINE)
In fact, META_COND_ASSERT is used for any group starting (?( that does not
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2016-11-10 17:08:27 UTC (rev 598)
+++ code/trunk/src/pcre2_compile.c 2016-11-18 18:59:37 UTC (rev 599)
@@ -856,6 +856,7 @@
case META_BIGVALUE: fprintf(stderr, "META_BIGVALUE %.8x", *pptr++); break;
case META_CIRCUMFLEX: fprintf(stderr, "META_CIRCUMFLEX"); break;
+ case META_COND_ASSERT: fprintf(stderr, "META_COND_ASSERT"); break;
case META_DOLLAR: fprintf(stderr, "META_DOLLAR"); break;
case META_DOT: fprintf(stderr, "META_DOT"); break;
case META_ASTERISK: fprintf(stderr, "META *"); break;
@@ -949,12 +950,6 @@
fprintf(stderr, "%zd", offset);
break;
- case META_COND_ASSERT:
- fprintf(stderr, "META_COND_ASSERT offset=");
- GETOFFSET(offset, pptr);
- fprintf(stderr, "%zd", offset);
- break;
-
case META_COND_VERSION:
fprintf(stderr, "META (?(VERSION%s", (*pptr++ == 0)? "=" : ">=");
fprintf(stderr, "%d.", *pptr++);
@@ -2189,12 +2184,12 @@
PCRE2_SPTR tempptr;
PCRE2_SPTR thisptr;
PCRE2_SIZE offset;
-
+
if (parsed_pattern >= parsed_pattern_end)
{
errorcode = ERR63; /* Internal error (parsed pattern overflow) */
- goto FAILED;
- }
+ goto FAILED;
+ }
if (nest_depth > cb->cx->parens_nest_limit)
{
@@ -2368,6 +2363,42 @@
parsed_pattern, cb);
}
+ /* If expect_cond_assert is 2, we have just passed (?( and are expecting an
+ assertion, possibly preceded by a callout. If the value is 1, we have just
+ had the callout and expect an assertion. There must be at least 3 more
+ characters in all cases. We know that the current character is an opening
+ parenthesis, as otherwise we wouldn't be here. Note that expect_cond_assert
+ may be negative, since all callouts just decrement it. */
+
+ if (expect_cond_assert > 0)
+ {
+ BOOL ok = ptrend - ptr >= 3 && ptr[0] == CHAR_QUESTION_MARK;
+ if (ok) switch(ptr[1])
+ {
+ case CHAR_C:
+ ok = expect_cond_assert == 2;
+ break;
+
+ case CHAR_EQUALS_SIGN:
+ case CHAR_EXCLAMATION_MARK:
+ break;
+
+ case CHAR_LESS_THAN_SIGN:
+ ok = ptr[2] == CHAR_EQUALS_SIGN || ptr[2] == CHAR_EXCLAMATION_MARK;
+ break;
+
+ default:
+ ok = FALSE;
+ }
+
+ if (!ok)
+ {
+ ptr--; /* Adjust error offset */
+ errorcode = ERR28;
+ goto FAILED;
+ }
+ }
+
/* Remember whether we are expecting a conditional assertion, and set the
default for this item. */
@@ -3443,7 +3474,7 @@
ptr = startptr; /* To give a more useful message */
goto FAILED;
}
- if (*ptr == delimiter && (++ptr >= ptrend || *ptr != delimiter))
+ if (*ptr == delimiter && (++ptr >= ptrend || *ptr != delimiter))
break;
}
@@ -3519,18 +3550,16 @@
nest_depth++;
/* If the next character is ? there must be an assertion next (optionally
- preceded by a callout). We do not check this here, but instead we just
- preserve the offset so that the later check can give a sensible error
- message. Pull back the pointer to the start of the assertion (or
- callout), and set expect_cond_assert to 2. If this is still greater than
- zero (callouts decrement it) when the next assertion is read, it will be
- marked as a condition that must not be repeated. */
+ preceded by a callout). We do not check this here, but instead we set
+ expect_cond_assert to 2. If this is still greater than zero (callouts
+ decrement it) when the next assertion is read, it will be marked as a
+ condition that must not be repeated. A value greater than zero also
+ causes checking that an assertion (possibly with callout) follows. */
if (*ptr == CHAR_QUESTION_MARK)
{
*parsed_pattern++ = META_COND_ASSERT;
- offset = (PCRE2_SIZE)(--ptr - cb->start_pattern - 2);
- PUTOFFSET(offset, parsed_pattern);
+ ptr--; /* Pull pointer back to the opening parenthesis. */
expect_cond_assert = 2;
break; /* End of conditional */
}
@@ -3902,6 +3931,11 @@
/* Terminate the parsed pattern, then return success if all groups are closed.
Otherwise we have unclosed parentheses. */
+if (parsed_pattern >= parsed_pattern_end)
+ {
+ errorcode = ERR63; /* Internal error (parsed pattern overflow) */
+ goto FAILED;
+ }
*parsed_pattern = META_END;
if (nest_depth == 0) return 0;
@@ -5806,23 +5840,10 @@
pptr += 3;
goto GROUP_PROCESS;
- /* The condition is alleged to be an assertion, possibly preceded by a
- callout, because it's not one of the others, and began with (?(?. This is
- where the check for the next thing being an assertion (with optional
- callout) is done. */
+ /* The condition is an assertion, possibly preceded by a callout. */
case META_COND_ASSERT:
bravalue = OP_COND;
- GETPLUSOFFSET(offset, pptr);
- i = (pptr[1] == META_CALLOUT_NUMBER)? 5 :
- (pptr[1] == META_CALLOUT_STRING)? (5 + SIZEOFFSET) : 1;
- if (META_CODE(pptr[i]) < META_LOOKAHEAD ||
- META_CODE(pptr[i]) > META_LOOKBEHINDNOT)
- {
- *errorcodeptr = ERR28; /* Assertion expected */
- cb->erroroffset = offset + 2; /* Point after initial (? */
- return FALSE;
- }
goto GROUP_PROCESS;
@@ -8563,7 +8584,7 @@
goto CHECK_GROUP;
case META_COND_ASSERT:
- pptr += 1 + SIZEOFFSET;
+ pptr += 1;
goto CHECK_GROUP;
case META_COND_VERSION:
@@ -8733,6 +8754,7 @@
case META_CLASS_END:
case META_CLASS_NOT:
case META_COMMIT:
+ case META_COND_ASSERT:
case META_DOLLAR:
case META_DOT:
case META_FAIL:
@@ -8753,7 +8775,6 @@
case META_THEN:
break;
- case META_COND_ASSERT:
case META_RECURSE:
pptr += SIZEOFFSET;
break;
@@ -9178,8 +9199,8 @@
}
cb.parsed_pattern = heap_parsed_pattern;
}
-cb.parsed_pattern_end = cb.parsed_pattern + parsed_size_needed + 1;
-
+cb.parsed_pattern_end = cb.parsed_pattern + parsed_size_needed + 1;
+
/* Do the parsing scan. */
errorcode = parse_regex(ptr, cb.external_options, &has_lookbehind, &cb);
@@ -9569,7 +9590,7 @@
goto HAD_CB_ERROR;
}
-/* Control ends up here in all cases. When running under valgrind, make a
+/* Control ends up here in all cases. When running under valgrind, make a
pattern's terminating zero defined again. If memory was obtained for the parsed
version of the pattern, free it before returning. Also free the list of named
groups if a larger one had to be obtained, and likewise the group information
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2016-11-10 17:08:27 UTC (rev 598)
+++ code/trunk/testdata/testinput2 2016-11-18 18:59:37 UTC (rev 599)
@@ -4912,4 +4912,8 @@
\=get=i00000000000000000000000000000000
\=get=i2345678901234567890123456789012,get=i1245678901234567890123456789012
+"(?(?C))"
+
+/(?(?(?(?(?(?))))))/
+
# End of testinput2
Modified: code/trunk/testdata/testoutput18
===================================================================
--- code/trunk/testdata/testoutput18 2016-11-10 17:08:27 UTC (rev 598)
+++ code/trunk/testdata/testoutput18 2016-11-18 18:59:37 UTC (rev 599)
@@ -145,7 +145,7 @@
Failed: POSIX code 11: unbalanced () at offset 6
"(?(?C))"
-Failed: POSIX code 3: pattern error at offset 2
+Failed: POSIX code 3: pattern error at offset 6
/abcd/substitute_extended
** Ignored with POSIX interface: substitute_extended
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2016-11-10 17:08:27 UTC (rev 598)
+++ code/trunk/testdata/testoutput2 2016-11-18 18:59:37 UTC (rev 599)
@@ -556,7 +556,7 @@
Failed: error 115 at offset 3: reference to non-existent subpattern
/(?(?<ab))/
-Failed: error 142 at offset 7: syntax error in subpattern name (missing terminator)
+Failed: error 128 at offset 2: assertion expected after (?( or (?(?C)
/((?s)blah)\s+\1/I
Capturing subpattern count = 1
@@ -15361,6 +15361,12 @@
\=get=i2345678901234567890123456789012,get=i1245678901234567890123456789012
** Too many characters in named 'get' modifiers
+"(?(?C))"
+Failed: error 128 at offset 6: assertion expected after (?( or (?(?C)
+
+/(?(?(?(?(?(?))))))/
+Failed: error 128 at offset 2: assertion expected after (?( or (?(?C)
+
# End of testinput2
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data