Revision: 588
http://www.exim.org/viewvc/pcre2?view=rev&revision=588
Author: ph10
Date: 2016-11-01 17:45:54 +0000 (Tue, 01 Nov 2016)
Log Message:
-----------
Fix callout string read overrun; do better with catching these when using
zero-terminated patterns under valgrind.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_compile.c
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2016-11-01 15:58:28 UTC (rev 587)
+++ code/trunk/ChangeLog 2016-11-01 17:45:54 UTC (rev 588)
@@ -21,14 +21,22 @@
(a) \Q\E in the middle of a quantifier such as A+\Q\E+ is now ignored instead
of giving an invalid quantifier error.
+
(b) {0} can now be used after a group in a lookbehind assertion; previously
this caused an "assertion is not fixed length" error.
+
(c) Perl always treats (?(DEFINE) as a "define" group, even if a group with
the name "DEFINE" exists. PCRE2 now does likewise.
+
(d) A recursion condition test such as (?(R2)...) must now refer to an
existing subpattern.
+
(e) A conditional recursion test such as (?(R)...) misbehaved if there was a
group whose name began with "R".
+
+ (f) When testing zero-terminated patterns under valgrind, the terminating
+ zero is now marked "no access". This catches bugs that would otherwise
+ show up only with non-zero-terminated patterns.
One effect of the refactoring is that some error numbers and messages have
changed, and the pattern offset given for compiling errors is not always the
@@ -63,6 +71,9 @@
(f) An unterminated repeat at the end of a non-zero-terminated pattern (e.g.
"{2,2") could cause reading beyond the pattern.
+
+ (g) When reading a callout string, if the end delimiter was at the end of the
+ pattern one further code unit was read.
4. Back references are now permitted in lookbehind assertions when there are
no duplicated group numbers (that is, (?| has not been used), and, if the
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2016-11-01 15:58:28 UTC (rev 587)
+++ code/trunk/src/pcre2_compile.c 2016-11-01 17:45:54 UTC (rev 588)
@@ -3436,7 +3436,8 @@
ptr = startptr; /* To give a more useful message */
goto FAILED;
}
- if (*ptr == delimiter && (++ptr > ptrend || *ptr != delimiter)) break;
+ if (*ptr == delimiter && (++ptr >= ptrend || *ptr != delimiter))
+ break;
}
calloutlength = (PCRE2_SIZE)(ptr - startptr);
@@ -7653,7 +7654,7 @@
if (op == OP_BRA || op == OP_BRAPOS ||
op == OP_SBRA || op == OP_SBRAPOS)
{
- if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
+ if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
return FALSE;
}
@@ -7678,7 +7679,7 @@
else if (op == OP_COND)
{
- if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
+ if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
return FALSE;
}
@@ -8825,36 +8826,37 @@
pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE patlen, uint32_t options,
int *errorptr, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext)
{
-BOOL utf; /* Set TRUE for UTF mode */
-BOOL has_lookbehind; /* Set TRUE if a lookbehind is found */
-pcre2_real_code *re = NULL; /* What we will return */
-compile_block cb; /* "Static" compile-time data */
-const uint8_t *tables; /* Char tables base pointer */
+BOOL utf; /* Set TRUE for UTF mode */
+BOOL has_lookbehind; /* Set TRUE if a lookbehind is found */
+BOOL zero_terminated; /* Set TRUE for zero-terminated pattern */
+pcre2_real_code *re = NULL; /* What we will return */
+compile_block cb; /* "Static" compile-time data */
+const uint8_t *tables; /* Char tables base pointer */
-PCRE2_UCHAR *code; /* Current pointer in compiled code */
-PCRE2_SPTR codestart; /* Start of compiled code */
-PCRE2_SPTR ptr; /* Current pointer in pattern */
-uint32_t *pptr; /* Current pointer in parsed pattern */
+PCRE2_UCHAR *code; /* Current pointer in compiled code */
+PCRE2_SPTR codestart; /* Start of compiled code */
+PCRE2_SPTR ptr; /* Current pointer in pattern */
+uint32_t *pptr; /* Current pointer in parsed pattern */
-PCRE2_SIZE length = 1; /* Allow for final END opcode */
-PCRE2_SIZE usedlength; /* Actual length used */
-PCRE2_SIZE re_blocksize; /* Size of memory block */
-PCRE2_SIZE big32count = 0; /* 32-bit literals >= 0x80000000 */
-PCRE2_SIZE parsed_size_needed; /* Needed for parsed pattern */
+PCRE2_SIZE length = 1; /* Allow for final END opcode */
+PCRE2_SIZE usedlength; /* Actual length used */
+PCRE2_SIZE re_blocksize; /* Size of memory block */
+PCRE2_SIZE big32count = 0; /* 32-bit literals >= 0x80000000 */
+PCRE2_SIZE parsed_size_needed; /* Needed for parsed pattern */
-int32_t firstcuflags, reqcuflags; /* Type of first/req code unit */
-uint32_t firstcu, reqcu; /* Value of first/req code unit */
-uint32_t setflags = 0; /* NL and BSR set flags */
+int32_t firstcuflags, reqcuflags; /* Type of first/req code unit */
+uint32_t firstcu, reqcu; /* Value of first/req code unit */
+uint32_t setflags = 0; /* NL and BSR set flags */
-uint32_t skipatstart; /* When checking (*UTF) etc */
-uint32_t limit_match = UINT32_MAX; /* Unset match limits */
+uint32_t skipatstart; /* When checking (*UTF) etc */
+uint32_t limit_match = UINT32_MAX; /* Unset match limits */
uint32_t limit_recursion = UINT32_MAX;
-int newline = 0; /* Unset; can be set by the pattern */
-int bsr = 0; /* Unset; can be set by the pattern */
-int errorcode = 0; /* Initialize to avoid compiler warn */
+int newline = 0; /* Unset; can be set by the pattern */
+int bsr = 0; /* Unset; can be set by the pattern */
+int errorcode = 0; /* Initialize to avoid compiler warn */
-uint32_t i; /* Local loop counter */
+uint32_t i; /* Local loop counter */
/* Comments at the head of this file explain about these variables. */
@@ -8901,7 +8903,9 @@
/* A zero-terminated pattern is indicated by the special length value
PCRE2_ZERO_TERMINATED. Check for an overlong pattern. */
-if (patlen == PCRE2_ZERO_TERMINATED) patlen = PRIV(strlen)(pattern);
+if ((zero_terminated = (patlen == PCRE2_ZERO_TERMINATED)))
+ patlen = PRIV(strlen)(pattern);
+
if (patlen > ccontext->max_pattern_length)
{
*errorptr = ERR88;
@@ -8908,6 +8912,10 @@
return NULL;
}
+/* From here on, all returns from this function should end up going via the
+EXIT label. */
+
+
/* ------------ Initialize the "static" compile data -------------- */
tables = (ccontext->tables != NULL)? ccontext->tables : PRIV(default_tables);
@@ -8966,8 +8974,14 @@
/* --------------- Start looking at the pattern --------------- */
/* Check for global one-time option settings at the start of the pattern, and
-remember the offset to the actual regex. */
+remember the offset to the actual regex. With valgrind support, make the
+terminator of a zero-terminated pattern inaccessible. This catches bugs that
+would otherwise only show up for non-zero-terminated patterns. */
+#ifdef SUPPORT_VALGRIND
+if (zero_terminated) VALGRIND_MAKE_MEM_NOACCESS(pattern + patlen, CU2BYTES(1));
+#endif
+
ptr = pattern;
skipatstart = 0;
@@ -9148,14 +9162,14 @@
if (parsed_size_needed >= PARSED_PATTERN_DEFAULT_SIZE)
{
- cb.parsed_pattern = ccontext->memctl.malloc(
- (parsed_size_needed + 1) * sizeof(uint32_t),
- ccontext->memctl.memory_data);
- if (cb.parsed_pattern == NULL)
+ uint32_t *heap_parsed_pattern = ccontext->memctl.malloc(
+ (parsed_size_needed + 1) * sizeof(uint32_t), ccontext->memctl.memory_data);
+ if (heap_parsed_pattern == NULL)
{
*errorptr = ERR21;
- return NULL;
+ goto EXIT;
}
+ cb.parsed_pattern = heap_parsed_pattern;
}
/* Do the parsing scan. */
@@ -9547,12 +9561,16 @@
goto HAD_CB_ERROR;
}
-/* Control ends up here in all cases. If memory was obtained for the parsed
+/* Control ends up here in all cases. When running under valgrind, make a
+pattern's terminating zero defined again. If memory was obtained for the parsed
version of the pattern, free it before returning. Also free the list of named
groups if a larger one had to be obtained, and likewise the group information
vector. */
EXIT:
+#ifdef SUPPORT_VALGRIND
+if (zero_terminated) VALGRIND_MAKE_MEM_DEFINED(pattern + patlen, CU2BYTES(1));
+#endif
if (cb.parsed_pattern != stack_parsed_pattern)
ccontext->memctl.free(cb.parsed_pattern, ccontext->memctl.memory_data);
if (cb.named_group_list_size > NAMED_GROUP_LIST_SIZE)