Revision: 311
http://www.exim.org/viewvc/pcre2?view=rev&revision=311
Author: ph10
Date: 2015-07-16 18:47:20 +0100 (Thu, 16 Jul 2015)
Log Message:
-----------
Fix crash due to incorrect pre-pass handling of callouts with string arguments,
and tighten up the callout handling.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_compile.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-07-16 16:15:52 UTC (rev 310)
+++ code/trunk/ChangeLog 2015-07-16 17:47:20 UTC (rev 311)
@@ -30,7 +30,14 @@
own source module to avoid a circular dependency between src/pcre2_compile.c
and src/pcre2_study.c
+8. A callout with a string argument containing an opening square bracket, for
+example /(?C$[$)(?<]/, was incorrectly processed and could provoke a buffer
+overflow. This bug was discovered by Karl Skomski with the LLVM fuzzer.
+9. The handling of callouts during the pre-pass for named group identification
+has been tightened up.
+
+
Version 10.20 30-June-2015
--------------------------
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2015-07-16 16:15:52 UTC (rev 310)
+++ code/trunk/src/pcre2_compile.c 2015-07-16 17:47:20 UTC (rev 311)
@@ -2988,6 +2988,7 @@
compile_block *cb)
{
uint32_t c;
+uint32_t delimiter;
uint32_t nest_depth = 0;
uint32_t set, unset, *optset;
int errorcode = 0;
@@ -2999,6 +3000,7 @@
BOOL utf = (options & PCRE2_UTF) != 0;
BOOL negate_class;
PCRE2_SPTR name;
+PCRE2_SPTR start;
PCRE2_SPTR ptr = *ptrptr;
named_group *ng;
nest_save *top_nest = NULL;
@@ -3176,7 +3178,6 @@
default:
ptr += 2;
if (ptr[0] == CHAR_R || /* (?R) */
- ptr[0] == CHAR_C || /* (?C) */
IS_DIGIT(ptr[0]) || /* (?n) */
(ptr[0] == CHAR_MINUS && IS_DIGIT(ptr[1]))) break; /* (?-n) */
@@ -3252,7 +3253,57 @@
else top_nest->nest_depth = nest_depth;
}
break;
+
+ /* Skip over a numerical or string argument for a callout. */
+
+ case CHAR_C:
+ ptr += 2;
+ if (ptr[1] == CHAR_RIGHT_PARENTHESIS) break;
+ if (IS_DIGIT(ptr[1]))
+ {
+ while (IS_DIGIT(ptr[1])) ptr++;
+ if (ptr[1] != CHAR_RIGHT_PARENTHESIS)
+ {
+ errorcode = ERR39;
+ ptr++;
+ goto FAILED;
+ }
+ break;
+ }
+ /* Handle a string argument */
+
+ ptr++;
+ delimiter = 0;
+ for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
+ {
+ if (*ptr == PRIV(callout_start_delims)[i])
+ {
+ delimiter = PRIV(callout_end_delims)[i];
+ break;
+ }
+ }
+
+ if (delimiter == 0)
+ {
+ errorcode = ERR82;
+ goto FAILED;
+ }
+
+ start = ptr;
+ do
+ {
+ if (++ptr >= cb->end_pattern)
+ {
+ errorcode = ERR81;
+ ptr = start; /* To give a more useful message */
+ goto FAILED;
+ }
+ if (ptr[0] == delimiter && ptr[1] == delimiter) ptr += 2;
+ }
+ while (ptr[0] != delimiter);
+ break;
+
case CHAR_NUMBER_SIGN:
ptr += 3;
while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
@@ -6062,7 +6113,9 @@
}
/* During the pre-compile phase, we parse the string and update the
- length. There is no need to generate any code. */
+ length. There is no need to generate any code. (In fact, the string
+ has already been parsed in the pre-pass that looks for named
+ parentheses, but it does no harm to leave this code in.) */
if (lengthptr != NULL) /* Only check the string */
{
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2015-07-16 16:15:52 UTC (rev 310)
+++ code/trunk/testdata/testinput2 2015-07-16 17:47:20 UTC (rev 311)
@@ -4331,4 +4331,8 @@
/((?x)(*:0))#(?'/
+/(?C$[$)(?<]/
+
+/(?C$)$)(?<]/
+
# End of testinput2
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2015-07-16 16:15:52 UTC (rev 310)
+++ code/trunk/testdata/testoutput2 2015-07-16 17:47:20 UTC (rev 311)
@@ -14476,4 +14476,10 @@
/((?x)(*:0))#(?'/
Failed: error 124 at offset 15: unrecognized character after (?<
+/(?C$[$)(?<]/
+Failed: error 124 at offset 10: unrecognized character after (?<
+
+/(?C$)$)(?<]/
+Failed: error 124 at offset 10: unrecognized character after (?<
+
# End of testinput2