[Pcre-svn] [599] code/trunk: Fix overrun bug caused by condi…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [599] code/trunk: Fix overrun bug caused by conditional with assertion using too much memory.
Revision: 599
          http://www.exim.org/viewvc/pcre2?view=rev&revision=599
Author:   ph10
Date:     2016-11-18 18:59:37 +0000 (Fri, 18 Nov 2016)
Log Message:
-----------
Fix overrun bug caused by conditional with assertion using too much memory.


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/HACKING
    code/trunk/src/pcre2_compile.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput18
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2016-11-10 17:08:27 UTC (rev 598)
+++ code/trunk/ChangeLog    2016-11-18 18:59:37 UTC (rev 599)
@@ -82,6 +82,13 @@


   (i) An insufficient memory size was being computed for compiling with 
       PCRE2_AUTO_CALLOUT. 
+      
+  (j) A conditional group with an assertion condition used more memory than was 
+      allowed for it during parsing, so too many of them could therefore 
+      overrun a buffer.
+      
+  (k) If parsing a pattern exactly filled the buffer, the internal test for 
+      overrun did not check when the final META_END item was added.


4. Back references are now permitted in lookbehind assertions when there are
no duplicated group numbers (that is, (?| has not been used), and, if the

Modified: code/trunk/HACKING
===================================================================
--- code/trunk/HACKING    2016-11-10 17:08:27 UTC (rev 598)
+++ code/trunk/HACKING    2016-11-18 18:59:37 UTC (rev 599)
@@ -186,6 +186,7 @@
 META_CLASS_END        ] end of non-empty class
 META_CLASS_NOT        [^ start non-empty negative class
 META_COMMIT           (*COMMIT)
+META_COND_ASSERT      (?(?assertion)
 META_DOLLAR           $ metacharacter
 META_DOT              . metacharacter
 META_END              End of pattern (this value is 0x80000000)
@@ -274,9 +275,8 @@


 META_COND_NUMBER       (?([+-]digits)


-The following are followed just by an offset, for use in error messages:
+The following is followed just by an offset, for use in error messages:

-META_COND_ASSERT      (?(?assertion)
 META_COND_DEFINE      (?(DEFINE)


In fact, META_COND_ASSERT is used for any group starting (?( that does not

Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c    2016-11-10 17:08:27 UTC (rev 598)
+++ code/trunk/src/pcre2_compile.c    2016-11-18 18:59:37 UTC (rev 599)
@@ -856,6 +856,7 @@


     case META_BIGVALUE: fprintf(stderr, "META_BIGVALUE %.8x", *pptr++); break;
     case META_CIRCUMFLEX: fprintf(stderr, "META_CIRCUMFLEX"); break;
+    case META_COND_ASSERT: fprintf(stderr, "META_COND_ASSERT"); break;
     case META_DOLLAR: fprintf(stderr, "META_DOLLAR"); break;
     case META_DOT: fprintf(stderr, "META_DOT"); break;
     case META_ASTERISK: fprintf(stderr, "META *"); break;
@@ -949,12 +950,6 @@
     fprintf(stderr, "%zd", offset);
     break;


-    case META_COND_ASSERT:
-    fprintf(stderr, "META_COND_ASSERT offset=");
-    GETOFFSET(offset, pptr);
-    fprintf(stderr, "%zd", offset);
-    break;
-
     case META_COND_VERSION:
     fprintf(stderr, "META (?(VERSION%s", (*pptr++ == 0)? "=" : ">=");
     fprintf(stderr, "%d.", *pptr++);
@@ -2189,12 +2184,12 @@
   PCRE2_SPTR tempptr;
   PCRE2_SPTR thisptr;
   PCRE2_SIZE offset;
-  
+
   if (parsed_pattern >= parsed_pattern_end)
     {
     errorcode = ERR63;  /* Internal error (parsed pattern overflow) */
-    goto FAILED; 
-    }   
+    goto FAILED;
+    }


   if (nest_depth > cb->cx->parens_nest_limit)
     {
@@ -2368,6 +2363,42 @@
         parsed_pattern, cb);
     }


+  /* If expect_cond_assert is 2, we have just passed (?( and are expecting an
+  assertion, possibly preceded by a callout. If the value is 1, we have just
+  had the callout and expect an assertion. There must be at least 3 more
+  characters in all cases. We know that the current character is an opening
+  parenthesis, as otherwise we wouldn't be here. Note that expect_cond_assert 
+  may be negative, since all callouts just decrement it. */
+
+  if (expect_cond_assert > 0)
+    {
+    BOOL ok = ptrend - ptr >= 3 && ptr[0] == CHAR_QUESTION_MARK;
+    if (ok) switch(ptr[1])
+      {
+      case CHAR_C:
+      ok = expect_cond_assert == 2;
+      break;  
+ 
+      case CHAR_EQUALS_SIGN:
+      case CHAR_EXCLAMATION_MARK:
+      break;
+      
+      case CHAR_LESS_THAN_SIGN:
+      ok = ptr[2] == CHAR_EQUALS_SIGN || ptr[2] == CHAR_EXCLAMATION_MARK;
+      break;
+      
+      default:
+      ok = FALSE;       
+      }   
+
+    if (!ok)
+      {
+      ptr--;   /* Adjust error offset */ 
+      errorcode = ERR28;
+      goto FAILED;
+      }
+    }
+
   /* Remember whether we are expecting a conditional assertion, and set the
   default for this item. */


@@ -3443,7 +3474,7 @@
             ptr = startptr;   /* To give a more useful message */
             goto FAILED;
             }
-          if (*ptr == delimiter && (++ptr >= ptrend || *ptr != delimiter)) 
+          if (*ptr == delimiter && (++ptr >= ptrend || *ptr != delimiter))
             break;
           }


@@ -3519,18 +3550,16 @@
       nest_depth++;


       /* If the next character is ? there must be an assertion next (optionally
-      preceded by a callout). We do not check this here, but instead we just
-      preserve the offset so that the later check can give a sensible error
-      message. Pull back the pointer to the start of the assertion (or
-      callout), and set expect_cond_assert to 2. If this is still greater than
-      zero (callouts decrement it) when the next assertion is read, it will be
-      marked as a condition that must not be repeated. */
+      preceded by a callout). We do not check this here, but instead we set
+      expect_cond_assert to 2. If this is still greater than zero (callouts
+      decrement it) when the next assertion is read, it will be marked as a
+      condition that must not be repeated. A value greater than zero also
+      causes checking that an assertion (possibly with callout) follows. */


       if (*ptr == CHAR_QUESTION_MARK)
         {
         *parsed_pattern++ = META_COND_ASSERT;
-        offset = (PCRE2_SIZE)(--ptr - cb->start_pattern - 2);
-        PUTOFFSET(offset, parsed_pattern);
+        ptr--;   /* Pull pointer back to the opening parenthesis. */ 
         expect_cond_assert = 2;
         break;  /* End of conditional */
         }
@@ -3902,6 +3931,11 @@
 /* Terminate the parsed pattern, then return success if all groups are closed.
 Otherwise we have unclosed parentheses. */


+if (parsed_pattern >= parsed_pattern_end)
+ {
+ errorcode = ERR63; /* Internal error (parsed pattern overflow) */
+ goto FAILED;
+ }
*parsed_pattern = META_END;
if (nest_depth == 0) return 0;

@@ -5806,23 +5840,10 @@
     pptr += 3;
     goto GROUP_PROCESS;


-    /* The condition is alleged to be an assertion, possibly preceded by a
-    callout, because it's not one of the others, and began with (?(?. This is
-    where the check for the next thing being an assertion (with optional
-    callout) is done. */
+    /* The condition is an assertion, possibly preceded by a callout. */


     case META_COND_ASSERT:
     bravalue = OP_COND;
-    GETPLUSOFFSET(offset, pptr);
-    i = (pptr[1] == META_CALLOUT_NUMBER)? 5 :
-        (pptr[1] == META_CALLOUT_STRING)? (5 + SIZEOFFSET) : 1;
-    if (META_CODE(pptr[i]) < META_LOOKAHEAD ||
-        META_CODE(pptr[i]) > META_LOOKBEHINDNOT)
-      {
-      *errorcodeptr = ERR28;         /* Assertion expected */
-      cb->erroroffset = offset + 2;  /* Point after initial (? */
-      return FALSE;
-      }
     goto GROUP_PROCESS;



@@ -8563,7 +8584,7 @@
     goto CHECK_GROUP;


     case META_COND_ASSERT:
-    pptr += 1 + SIZEOFFSET;
+    pptr += 1;
     goto CHECK_GROUP;


     case META_COND_VERSION:
@@ -8733,6 +8754,7 @@
     case META_CLASS_END:
     case META_CLASS_NOT:
     case META_COMMIT:
+    case META_COND_ASSERT:
     case META_DOLLAR:
     case META_DOT:
     case META_FAIL:
@@ -8753,7 +8775,6 @@
     case META_THEN:
     break;


-    case META_COND_ASSERT:
     case META_RECURSE:
     pptr += SIZEOFFSET;
     break;
@@ -9178,8 +9199,8 @@
     }
   cb.parsed_pattern = heap_parsed_pattern;
   }
-cb.parsed_pattern_end = cb.parsed_pattern + parsed_size_needed + 1; 
-  
+cb.parsed_pattern_end = cb.parsed_pattern + parsed_size_needed + 1;
+
 /* Do the parsing scan. */


errorcode = parse_regex(ptr, cb.external_options, &has_lookbehind, &cb);
@@ -9569,7 +9590,7 @@
goto HAD_CB_ERROR;
}

-/* Control ends up here in all cases. When running under valgrind, make a
+/* Control ends up here in all cases. When running under valgrind, make a
pattern's terminating zero defined again. If memory was obtained for the parsed
version of the pattern, free it before returning. Also free the list of named
groups if a larger one had to be obtained, and likewise the group information

Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2016-11-10 17:08:27 UTC (rev 598)
+++ code/trunk/testdata/testinput2    2016-11-18 18:59:37 UTC (rev 599)
@@ -4912,4 +4912,8 @@
 \=get=i00000000000000000000000000000000
 \=get=i2345678901234567890123456789012,get=i1245678901234567890123456789012


+"(?(?C))"
+
+/(?(?(?(?(?(?))))))/
+
# End of testinput2

Modified: code/trunk/testdata/testoutput18
===================================================================
--- code/trunk/testdata/testoutput18    2016-11-10 17:08:27 UTC (rev 598)
+++ code/trunk/testdata/testoutput18    2016-11-18 18:59:37 UTC (rev 599)
@@ -145,7 +145,7 @@
 Failed: POSIX code 11: unbalanced () at offset 6     


 "(?(?C))"
-Failed: POSIX code 3: pattern error at offset 2     
+Failed: POSIX code 3: pattern error at offset 6     


/abcd/substitute_extended
** Ignored with POSIX interface: substitute_extended

Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2016-11-10 17:08:27 UTC (rev 598)
+++ code/trunk/testdata/testoutput2    2016-11-18 18:59:37 UTC (rev 599)
@@ -556,7 +556,7 @@
 Failed: error 115 at offset 3: reference to non-existent subpattern


/(?(?<ab))/
-Failed: error 142 at offset 7: syntax error in subpattern name (missing terminator)
+Failed: error 128 at offset 2: assertion expected after (?( or (?(?C)

/((?s)blah)\s+\1/I
Capturing subpattern count = 1
@@ -15361,6 +15361,12 @@
\=get=i2345678901234567890123456789012,get=i1245678901234567890123456789012
** Too many characters in named 'get' modifiers

+"(?(?C))"
+Failed: error 128 at offset 6: assertion expected after (?( or (?(?C)
+
+/(?(?(?(?(?(?))))))/
+Failed: error 128 at offset 2: assertion expected after (?( or (?(?C)
+
# End of testinput2
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data