Revision: 317
http://www.exim.org/viewvc/pcre2?view=rev&revision=317
Author: ph10
Date: 2015-07-17 17:25:21 +0100 (Fri, 17 Jul 2015)
Log Message:
-----------
Fix empty comment (?#) bug
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_compile.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-07-17 15:44:51 UTC (rev 316)
+++ code/trunk/ChangeLog 2015-07-17 16:25:21 UTC (rev 317)
@@ -46,7 +46,11 @@
12. The Unicode tables have been updated to Unicode 8.0.0 (thanks to Christian
Persch).
+13. An empty comment (?#) in a pattern was incorrectly processed and could
+provoke a buffer overflow. This bug was discovered by Karl Skomski with the
+LLVM fuzzer.
+
Version 10.20 30-June-2015
--------------------------
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2015-07-17 15:44:51 UTC (rev 316)
+++ code/trunk/src/pcre2_compile.c 2015-07-17 16:25:21 UTC (rev 317)
@@ -2997,6 +2997,7 @@
int i;
BOOL inescq = FALSE;
BOOL isdupname;
+BOOL skiptoket = FALSE;
BOOL utf = (options & PCRE2_UTF) != 0;
BOOL negate_class;
PCRE2_SPTR name;
@@ -3009,6 +3010,16 @@
for (; ptr < cb->end_pattern; ptr++)
{
c = *ptr;
+
+ /* Parenthesized groups set skiptoket when all following characters up to the
+ next closing parenthesis must be ignored. The parenthesis itself must be
+ processed (to end the nested parenthesized item). */
+
+ if (skiptoket)
+ {
+ if (c != CHAR_RIGHT_PARENTHESIS) continue;
+ skiptoket = FALSE;
+ }
/* Skip over literals */
@@ -3177,9 +3188,14 @@
{
default:
ptr += 2;
- if (ptr[0] == CHAR_R || /* (?R) */
- IS_DIGIT(ptr[0]) || /* (?n) */
- (ptr[0] == CHAR_MINUS && IS_DIGIT(ptr[1]))) break; /* (?-n) */
+ if (ptr[0] == CHAR_R || /* (?R) */
+ ptr[0] == CHAR_NUMBER_SIGN || /* (?#) */
+ IS_DIGIT(ptr[0]) || /* (?n) */
+ (ptr[0] == CHAR_MINUS && IS_DIGIT(ptr[1]))) /* (?-n) */
+ {
+ skiptoket = TRUE;
+ break;
+ }
/* Handle (?| and (?imsxJU: which are the only other valid forms. Both
need a new block on the nest stack. */
@@ -3304,16 +3320,6 @@
while (ptr[0] != delimiter);
break;
- case CHAR_NUMBER_SIGN:
- ptr += 3;
- while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
- if (*ptr != CHAR_RIGHT_PARENTHESIS)
- {
- errorcode = ERR18;
- goto FAILED;
- }
- break;
-
case CHAR_LEFT_PARENTHESIS:
nest_depth++;
/* Fall through */
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2015-07-17 15:44:51 UTC (rev 316)
+++ code/trunk/testdata/testinput2 2015-07-17 16:25:21 UTC (rev 317)
@@ -4342,4 +4342,8 @@
/(?(R))*+/B
abcd
+/((?x)(?#))#(?'/
+
+/((?x)(?#))#(?'abc')/I
+
# End of testinput2
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2015-07-17 15:44:51 UTC (rev 316)
+++ code/trunk/testdata/testoutput2 2015-07-17 16:25:21 UTC (rev 317)
@@ -14514,4 +14514,14 @@
abcd
0:
+/((?x)(?#))#(?'/
+Failed: error 124 at offset 14: unrecognized character after (?<
+
+/((?x)(?#))#(?'abc')/I
+Capturing subpattern count = 2
+Named capturing subpatterns:
+ abc 2
+First code unit = '#'
+Subject length lower bound = 1
+
# End of testinput2