[Pcre-svn] [317] code/trunk: Fix empty comment (?#) bug

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [317] code/trunk: Fix empty comment (?#) bug
Revision: 317
          http://www.exim.org/viewvc/pcre2?view=rev&revision=317
Author:   ph10
Date:     2015-07-17 17:25:21 +0100 (Fri, 17 Jul 2015)
Log Message:
-----------
Fix empty comment (?#) bug


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/src/pcre2_compile.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2015-07-17 15:44:51 UTC (rev 316)
+++ code/trunk/ChangeLog    2015-07-17 16:25:21 UTC (rev 317)
@@ -46,7 +46,11 @@
 12. The Unicode tables have been updated to Unicode 8.0.0 (thanks to Christian 
 Persch).


+13. An empty comment (?#) in a pattern was incorrectly processed and could
+provoke a buffer overflow. This bug was discovered by Karl Skomski with the
+LLVM fuzzer.

+
Version 10.20 30-June-2015
--------------------------


Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c    2015-07-17 15:44:51 UTC (rev 316)
+++ code/trunk/src/pcre2_compile.c    2015-07-17 16:25:21 UTC (rev 317)
@@ -2997,6 +2997,7 @@
 int i;
 BOOL inescq = FALSE;
 BOOL isdupname;
+BOOL skiptoket = FALSE;
 BOOL utf = (options & PCRE2_UTF) != 0;
 BOOL negate_class;
 PCRE2_SPTR name;
@@ -3009,6 +3010,16 @@
 for (; ptr < cb->end_pattern; ptr++)
   {
   c = *ptr;
+  
+  /* Parenthesized groups set skiptoket when all following characters up to the 
+  next closing parenthesis must be ignored. The parenthesis itself must be 
+  processed (to end the nested parenthesized item). */ 
+  
+  if (skiptoket)
+    {
+    if (c != CHAR_RIGHT_PARENTHESIS) continue;
+    skiptoket = FALSE;
+    }  


/* Skip over literals */

@@ -3177,9 +3188,14 @@
       {
       default:
       ptr += 2;
-      if (ptr[0] == CHAR_R ||                                 /* (?R) */
-          IS_DIGIT(ptr[0]) ||                                 /* (?n) */
-          (ptr[0] == CHAR_MINUS && IS_DIGIT(ptr[1]))) break;  /* (?-n) */
+      if (ptr[0] == CHAR_R ||                           /* (?R) */
+          ptr[0] == CHAR_NUMBER_SIGN ||                 /* (?#) */ 
+          IS_DIGIT(ptr[0]) ||                           /* (?n) */
+          (ptr[0] == CHAR_MINUS && IS_DIGIT(ptr[1])))   /* (?-n) */
+        {
+        skiptoket = TRUE;
+        break;
+        }      


       /* Handle (?| and (?imsxJU: which are the only other valid forms. Both
       need a new block on the nest stack. */
@@ -3304,16 +3320,6 @@
       while (ptr[0] != delimiter);
       break;


-      case CHAR_NUMBER_SIGN:
-      ptr += 3;
-      while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
-      if (*ptr != CHAR_RIGHT_PARENTHESIS)
-        {
-        errorcode = ERR18;
-        goto FAILED;
-        }
-      break;
-
       case CHAR_LEFT_PARENTHESIS:
       nest_depth++;
       /* Fall through */


Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2015-07-17 15:44:51 UTC (rev 316)
+++ code/trunk/testdata/testinput2    2015-07-17 16:25:21 UTC (rev 317)
@@ -4342,4 +4342,8 @@
 /(?(R))*+/B
     abcd


+/((?x)(?#))#(?'/
+
+/((?x)(?#))#(?'abc')/I
+
# End of testinput2

Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2015-07-17 15:44:51 UTC (rev 316)
+++ code/trunk/testdata/testoutput2    2015-07-17 16:25:21 UTC (rev 317)
@@ -14514,4 +14514,14 @@
     abcd
  0: 


+/((?x)(?#))#(?'/
+Failed: error 124 at offset 14: unrecognized character after (?<
+
+/((?x)(?#))#(?'abc')/I
+Capturing subpattern count = 2
+Named capturing subpatterns:
+ abc 2
+First code unit = '#'
+Subject length lower bound = 1
+
# End of testinput2