Revision: 674
http://www.exim.org/viewvc/pcre2?view=rev&revision=674
Author: ph10
Date: 2017-03-10 16:34:54 +0000 (Fri, 10 Mar 2017)
Log Message:
-----------
Fix crash for pattern with very many captures. Fixes oss-fuzz issue 783.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_match.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2017-03-10 15:53:49 UTC (rev 673)
+++ code/trunk/ChangeLog 2017-03-10 16:34:54 UTC (rev 674)
@@ -24,11 +24,17 @@
a match, because the external block was being set from non-existent
internal ovector fields. Fixes oss-fuzz issue 781.
+ (b) A pattern with very many capturing parentheses (when the internal frame
+ size was greater than the initial frame vector on the stack) caused a
+ crash. A vector on the heap is now set up at the start of matching if the
+ vector on the stack is not big enough to handle at least 10 frames.
+ Fixes oss-fuzz issue 783.
+
2. Hardened pcre2test so as to reduce the number of bugs reported by fuzzers:
(a) Check for malloc failures when getting memory for the ovector (POSIX) or
the match data block (non-POSIX).
-
+
3. In the 32-bit library in non-UTF mode, an attempt to find a Unicode property
for a character with a code point greater than 0x10ffff (the Unicode maximum)
caused a crash.
Modified: code/trunk/src/pcre2_match.c
===================================================================
--- code/trunk/src/pcre2_match.c 2017-03-10 15:53:49 UTC (rev 673)
+++ code/trunk/src/pcre2_match.c 2017-03-10 16:34:54 UTC (rev 674)
@@ -816,9 +816,9 @@
ovector[0] = Fstart_match - mb->start_subject;
ovector[1] = Feptr - mb->start_subject;
-
+
/* Set i to the smaller of the sizes of the external and frame ovectors. */
-
+
i = 2 * ((top_bracket + 1 > oveccount)? oveccount : top_bracket + 1);
memcpy(ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
while (--i >= Foffset_top + 2) ovector[i] = PCRE2_UNSET;
@@ -5231,7 +5231,7 @@
/* The variable Flength will be added to Fecode when the condition is
false, to get to the second branch. Setting it to the offset to the ALT or
KET, then incrementing Fecode achieves this effect. However, if the second
- branch is non-existent, we must point to the KET so that the end of the
+ branch is non-existent, we must point to the KET so that the end of the
group is correctly processed. We now have Fecode pointing to the condition
or callout. */
@@ -5478,8 +5478,8 @@
/* If we are at the end of an assertion that is a condition, return a
match, discarding any intermediate backtracking points. Copy back the
- captures into the frame before N so that they are set on return. Doing
- this for all assertions, both positive and negative, seems to match what
+ captures into the frame before N so that they are set on return. Doing
+ this for all assertions, both positive and negative, seems to match what
Perl does. */
if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
@@ -5545,7 +5545,7 @@
case OP_SCBRA:
case OP_SCBRAPOS:
number = GET2(bracode, 1+LINK_SIZE);
-
+
/* Handle a recursively called group. We reinstate the previous set of
captures and then carry on. */
@@ -6197,45 +6197,6 @@
mb->name_entry_size = re->name_entry_size;
mb->start_code = mb->name_table + re->name_count * re->name_entry_size;
-/* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
-vector at the end, whose size depends on the number of capturing parentheses in
-the pattern. It is not used at all if there are no capturing parentheses.
-
- frame_size is the total size of each frame
- mb->frame_vector_size is the total usable size of the vector (rounded down
- to a whole number of frames)
-
-The last of these may be changed if the frame vector has to be expanded. We
-therefore put it into the match block so that it is correct when calling
-match() more than once for non-anchored patterns. */
-
-frame_size = sizeof(heapframe) + ((re->top_bracket - 1) * 2 * sizeof(PCRE2_SIZE));
-mb->frame_vector_size = ((START_FRAMES_SIZE/frame_size) * frame_size);
-
-/* Set up the initial frame set. Write to the ovector within the first frame to
-mark every capture unset and to avoid uninitialized memory read errors when it
-is copied to a new frame. */
-
-memset((char *)(mb->stack_frames) + offsetof(heapframe,ovector), 0xff,
- re->top_bracket * 2 * sizeof(PCRE2_SIZE));
-mb->match_frames = mb->stack_frames;
-mb->match_frames_top =
- (heapframe *)((char *)mb->match_frames + mb->frame_vector_size);
-
-/* Limits set in the pattern override the match context only if they are
-smaller. */
-
-mb->match_limit = (mcontext->match_limit < re->limit_match)?
- mcontext->match_limit : re->limit_match;
-mb->match_limit_recursion = (mcontext->recursion_limit < re->limit_recursion)?
- mcontext->recursion_limit : re->limit_recursion;
-
-/* Pointers to the individual character tables */
-
-mb->lcc = re->tables + lcc_offset;
-mb->fcc = re->tables + fcc_offset;
-mb->ctypes = re->tables + ctypes_offset;
-
/* Process the \R and newline settings. */
mb->bsr_convention = re->bsr_convention;
@@ -6269,6 +6230,60 @@
default: return PCRE2_ERROR_INTERNAL;
}
+/* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
+vector at the end, whose size depends on the number of capturing parentheses in
+the pattern. It is not used at all if there are no capturing parentheses.
+
+ frame_size is the total size of each frame
+ mb->frame_vector_size is the total usable size of the vector (rounded down
+ to a whole number of frames)
+
+The last of these is changed within the match() function if the frame vector
+has to be expanded. We therefore put it into the match block so that it is
+correct when calling match() more than once for non-anchored patterns. */
+
+frame_size = sizeof(heapframe) + ((re->top_bracket - 1) * 2 * sizeof(PCRE2_SIZE));
+
+/* If a pattern has very many capturing parentheses, the frame size may be very
+large. Ensure that there are at least 10 available frames by getting an initial
+vector on the heap if necessary. */
+
+if (frame_size <= START_FRAMES_SIZE/10)
+ {
+ mb->match_frames = mb->stack_frames; /* Initial frame vector on the stack */
+ mb->frame_vector_size = ((START_FRAMES_SIZE/frame_size) * frame_size);
+ }
+else
+ {
+ mb->frame_vector_size = frame_size * 10;
+ mb->match_frames = mb->memctl.malloc(mb->frame_vector_size,
+ mb->memctl.memory_data);
+ if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY;
+ }
+
+mb->match_frames_top =
+ (heapframe *)((char *)mb->match_frames + mb->frame_vector_size);
+
+/* Write to the ovector within the first frame to mark every capture unset and
+to avoid uninitialized memory read errors when it is copied to a new frame. */
+
+memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff,
+ re->top_bracket * 2 * sizeof(PCRE2_SIZE));
+
+/* Limits set in the pattern override the match context only if they are
+smaller. */
+
+mb->match_limit = (mcontext->match_limit < re->limit_match)?
+ mcontext->match_limit : re->limit_match;
+mb->match_limit_recursion = (mcontext->recursion_limit < re->limit_recursion)?
+ mcontext->recursion_limit : re->limit_recursion;
+
+/* Pointers to the individual character tables */
+
+mb->lcc = re->tables + lcc_offset;
+mb->fcc = re->tables + fcc_offset;
+mb->ctypes = re->tables + ctypes_offset;
+
/* Set up the first code unit to match, if available. The first_codeunit value
is never set for an anchored regular expression, but the anchoring may be
forced at run time, so we have to test for anchoring. The first code unit may
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2017-03-10 15:53:49 UTC (rev 673)
+++ code/trunk/testdata/testinput2 2017-03-10 16:34:54 UTC (rev 674)
@@ -5009,4 +5009,10 @@
'(?:a(*:aa))b|ac' mark
ac
+/(R?){65}/
+ (R?){65}
+
+/\[(a)]{60}/expand
+ aaaa
+
# End of testinput2
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2017-03-10 15:53:49 UTC (rev 673)
+++ code/trunk/testdata/testoutput2 2017-03-10 16:34:54 UTC (rev 674)
@@ -15559,6 +15559,15 @@
ac
0: ac
+/(R?){65}/
+ (R?){65}
+ 0:
+ 1:
+
+/\[(a)]{60}/expand
+ aaaa
+No match
+
# End of testinput2
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data