[Pcre-svn] [674] code/trunk: Fix crash for pattern with very…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [674] code/trunk: Fix crash for pattern with very many captures.
Revision: 674
          http://www.exim.org/viewvc/pcre2?view=rev&revision=674
Author:   ph10
Date:     2017-03-10 16:34:54 +0000 (Fri, 10 Mar 2017)
Log Message:
-----------
Fix crash for pattern with very many captures. Fixes oss-fuzz issue 783.


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/src/pcre2_match.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2017-03-10 15:53:49 UTC (rev 673)
+++ code/trunk/ChangeLog    2017-03-10 16:34:54 UTC (rev 674)
@@ -24,11 +24,17 @@
       a match, because the external block was being set from non-existent
       internal ovector fields. Fixes oss-fuzz issue 781.


+  (b) A pattern with very many capturing parentheses (when the internal frame 
+      size was greater than the initial frame vector on the stack) caused a 
+      crash. A vector on the heap is now set up at the start of matching if the 
+      vector on the stack is not big enough to handle at least 10 frames. 
+      Fixes oss-fuzz issue 783. 
+       
 2. Hardened pcre2test so as to reduce the number of bugs reported by fuzzers:


   (a) Check for malloc failures when getting memory for the ovector (POSIX) or 
       the match data block (non-POSIX). 
-       
+      
 3. In the 32-bit library in non-UTF mode, an attempt to find a Unicode property
 for a character with a code point greater than 0x10ffff (the Unicode maximum)
 caused a crash.


Modified: code/trunk/src/pcre2_match.c
===================================================================
--- code/trunk/src/pcre2_match.c    2017-03-10 15:53:49 UTC (rev 673)
+++ code/trunk/src/pcre2_match.c    2017-03-10 16:34:54 UTC (rev 674)
@@ -816,9 +816,9 @@


     ovector[0] = Fstart_match - mb->start_subject;
     ovector[1] = Feptr - mb->start_subject;
-    
+
     /* Set i to the smaller of the sizes of the external and frame ovectors. */
-    
+
     i = 2 * ((top_bracket + 1 > oveccount)? oveccount : top_bracket + 1);
     memcpy(ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
     while (--i >= Foffset_top + 2) ovector[i] = PCRE2_UNSET;
@@ -5231,7 +5231,7 @@
     /* The variable Flength will be added to Fecode when the condition is
     false, to get to the second branch. Setting it to the offset to the ALT or
     KET, then incrementing Fecode achieves this effect. However, if the second
-    branch is non-existent, we must point to the KET so that the end of the 
+    branch is non-existent, we must point to the KET so that the end of the
     group is correctly processed. We now have Fecode pointing to the condition
     or callout. */


@@ -5478,8 +5478,8 @@

       /* If we are at the end of an assertion that is a condition, return a
       match, discarding any intermediate backtracking points. Copy back the
-      captures into the frame before N so that they are set on return. Doing 
-      this for all assertions, both positive and negative, seems to match what 
+      captures into the frame before N so that they are set on return. Doing
+      this for all assertions, both positive and negative, seems to match what
       Perl does. */


       if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
@@ -5545,7 +5545,7 @@
       case OP_SCBRA:
       case OP_SCBRAPOS:
       number = GET2(bracode, 1+LINK_SIZE);
-      
+
       /* Handle a recursively called group. We reinstate the previous set of
       captures and then carry on. */


@@ -6197,45 +6197,6 @@
mb->name_entry_size = re->name_entry_size;
mb->start_code = mb->name_table + re->name_count * re->name_entry_size;

-/* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
-vector at the end, whose size depends on the number of capturing parentheses in
-the pattern. It is not used at all if there are no capturing parentheses.
-
-  frame_size             is the total size of each frame
-  mb->frame_vector_size  is the total usable size of the vector (rounded down
-                           to a whole number of frames)
-
-The last of these may be changed if the frame vector has to be expanded. We
-therefore put it into the match block so that it is correct when calling
-match() more than once for non-anchored patterns. */
-
-frame_size = sizeof(heapframe) + ((re->top_bracket - 1) * 2 * sizeof(PCRE2_SIZE));
-mb->frame_vector_size = ((START_FRAMES_SIZE/frame_size) * frame_size);
-
-/* Set up the initial frame set. Write to the ovector within the first frame to
-mark every capture unset and to avoid uninitialized memory read errors when it
-is copied to a new frame. */
-
-memset((char *)(mb->stack_frames) + offsetof(heapframe,ovector), 0xff,
-  re->top_bracket * 2 * sizeof(PCRE2_SIZE));
-mb->match_frames = mb->stack_frames;
-mb->match_frames_top =
-  (heapframe *)((char *)mb->match_frames + mb->frame_vector_size);
-
-/* Limits set in the pattern override the match context only if they are
-smaller. */
-
-mb->match_limit = (mcontext->match_limit < re->limit_match)?
-                  mcontext->match_limit : re->limit_match;
-mb->match_limit_recursion = (mcontext->recursion_limit < re->limit_recursion)?
-                            mcontext->recursion_limit : re->limit_recursion;
-
-/* Pointers to the individual character tables */
-
-mb->lcc = re->tables + lcc_offset;
-mb->fcc = re->tables + fcc_offset;
-mb->ctypes = re->tables + ctypes_offset;
-
 /* Process the \R and newline settings. */


mb->bsr_convention = re->bsr_convention;
@@ -6269,6 +6230,60 @@
default: return PCRE2_ERROR_INTERNAL;
}

+/* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
+vector at the end, whose size depends on the number of capturing parentheses in
+the pattern. It is not used at all if there are no capturing parentheses.
+
+  frame_size             is the total size of each frame
+  mb->frame_vector_size  is the total usable size of the vector (rounded down
+                           to a whole number of frames)
+
+The last of these is changed within the match() function if the frame vector
+has to be expanded. We therefore put it into the match block so that it is
+correct when calling match() more than once for non-anchored patterns. */
+
+frame_size = sizeof(heapframe) + ((re->top_bracket - 1) * 2 * sizeof(PCRE2_SIZE));
+
+/* If a pattern has very many capturing parentheses, the frame size may be very
+large. Ensure that there are at least 10 available frames by getting an initial 
+vector on the heap if necessary. */
+
+if (frame_size <= START_FRAMES_SIZE/10)
+  {
+  mb->match_frames = mb->stack_frames;   /* Initial frame vector on the stack */
+  mb->frame_vector_size = ((START_FRAMES_SIZE/frame_size) * frame_size);
+  }
+else
+  {
+  mb->frame_vector_size = frame_size * 10;
+  mb->match_frames = mb->memctl.malloc(mb->frame_vector_size,
+    mb->memctl.memory_data);
+  if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY;
+  }
+
+mb->match_frames_top =
+  (heapframe *)((char *)mb->match_frames + mb->frame_vector_size);
+
+/* Write to the ovector within the first frame to mark every capture unset and
+to avoid uninitialized memory read errors when it is copied to a new frame. */
+
+memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff,
+  re->top_bracket * 2 * sizeof(PCRE2_SIZE));
+
+/* Limits set in the pattern override the match context only if they are
+smaller. */
+
+mb->match_limit = (mcontext->match_limit < re->limit_match)?
+                  mcontext->match_limit : re->limit_match;
+mb->match_limit_recursion = (mcontext->recursion_limit < re->limit_recursion)?
+                            mcontext->recursion_limit : re->limit_recursion;
+
+/* Pointers to the individual character tables */
+
+mb->lcc = re->tables + lcc_offset;
+mb->fcc = re->tables + fcc_offset;
+mb->ctypes = re->tables + ctypes_offset;
+
 /* Set up the first code unit to match, if available. The first_codeunit value
 is never set for an anchored regular expression, but the anchoring may be
 forced at run time, so we have to test for anchoring. The first code unit may


Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2017-03-10 15:53:49 UTC (rev 673)
+++ code/trunk/testdata/testinput2    2017-03-10 16:34:54 UTC (rev 674)
@@ -5009,4 +5009,10 @@
 '(?:a(*:aa))b|ac' mark
     ac


+/(R?){65}/
+    (R?){65}
+
+/\[(a)]{60}/expand
+    aaaa
+
 # End of testinput2 


Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2017-03-10 15:53:49 UTC (rev 673)
+++ code/trunk/testdata/testoutput2    2017-03-10 16:34:54 UTC (rev 674)
@@ -15559,6 +15559,15 @@
     ac
  0: ac


+/(R?){65}/
+    (R?){65}
+ 0: 
+ 1: 
+
+/\[(a)]{60}/expand
+    aaaa
+No match
+
 # End of testinput2 
 Error -63: PCRE2_ERROR_BADDATA (unknown error number)
 Error -62: bad serialized data