[Pcre-svn] [588] code/trunk: Fix callout string read overrun…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [588] code/trunk: Fix callout string read overrun; do better with catching these when using
Revision: 588
          http://www.exim.org/viewvc/pcre2?view=rev&revision=588
Author:   ph10
Date:     2016-11-01 17:45:54 +0000 (Tue, 01 Nov 2016)
Log Message:
-----------
Fix callout string read overrun; do better with catching these when using 
zero-terminated patterns under valgrind.


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/src/pcre2_compile.c


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2016-11-01 15:58:28 UTC (rev 587)
+++ code/trunk/ChangeLog    2016-11-01 17:45:54 UTC (rev 588)
@@ -21,14 +21,22 @@


   (a) \Q\E in the middle of a quantifier such as A+\Q\E+ is now ignored instead
       of giving an invalid quantifier error.
+
   (b) {0} can now be used after a group in a lookbehind assertion; previously
       this caused an "assertion is not fixed length" error.
+
   (c) Perl always treats (?(DEFINE) as a "define" group, even if a group with 
       the name "DEFINE" exists. PCRE2 now does likewise.
+
   (d) A recursion condition test such as (?(R2)...) must now refer to an 
       existing subpattern.
+
   (e) A conditional recursion test such as (?(R)...) misbehaved if there was a 
       group whose name began with "R".
+
+  (f) When testing zero-terminated patterns under valgrind, the terminating 
+      zero is now marked "no access". This catches bugs that would otherwise
+      show up only with non-zero-terminated patterns.


One effect of the refactoring is that some error numbers and messages have
changed, and the pattern offset given for compiling errors is not always the
@@ -63,6 +71,9 @@

   (f) An unterminated repeat at the end of a non-zero-terminated pattern (e.g.
       "{2,2") could cause reading beyond the pattern.
+      
+  (g) When reading a callout string, if the end delimiter was at the end of the 
+      pattern one further code unit was read.


4. Back references are now permitted in lookbehind assertions when there are
no duplicated group numbers (that is, (?| has not been used), and, if the

Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c    2016-11-01 15:58:28 UTC (rev 587)
+++ code/trunk/src/pcre2_compile.c    2016-11-01 17:45:54 UTC (rev 588)
@@ -3436,7 +3436,8 @@
             ptr = startptr;   /* To give a more useful message */
             goto FAILED;
             }
-          if (*ptr == delimiter && (++ptr > ptrend || *ptr != delimiter)) break;
+          if (*ptr == delimiter && (++ptr >= ptrend || *ptr != delimiter)) 
+            break;
           }


         calloutlength = (PCRE2_SIZE)(ptr - startptr);
@@ -7653,7 +7654,7 @@
    if (op == OP_BRA  || op == OP_BRAPOS ||
        op == OP_SBRA || op == OP_SBRAPOS)
      {
-     if (!is_anchored(scode, bracket_map, cb, atomcount, inassert)) 
+     if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
        return FALSE;
      }


@@ -7678,7 +7679,7 @@

    else if (op == OP_COND)
      {
-     if (!is_anchored(scode, bracket_map, cb, atomcount, inassert)) 
+     if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
        return FALSE;
      }


@@ -8825,36 +8826,37 @@
 pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE patlen, uint32_t options,
    int *errorptr, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext)
 {
-BOOL utf;                               /* Set TRUE for UTF mode */
-BOOL has_lookbehind;                    /* Set TRUE if a lookbehind is found */
-pcre2_real_code *re = NULL;             /* What we will return */
-compile_block cb;                       /* "Static" compile-time data */
-const uint8_t *tables;                  /* Char tables base pointer */
+BOOL utf;                             /* Set TRUE for UTF mode */
+BOOL has_lookbehind;                  /* Set TRUE if a lookbehind is found */
+BOOL zero_terminated;                 /* Set TRUE for zero-terminated pattern */
+pcre2_real_code *re = NULL;           /* What we will return */
+compile_block cb;                     /* "Static" compile-time data */
+const uint8_t *tables;                /* Char tables base pointer */


-PCRE2_UCHAR *code;                      /* Current pointer in compiled code */
-PCRE2_SPTR codestart;                   /* Start of compiled code */
-PCRE2_SPTR ptr;                         /* Current pointer in pattern */
-uint32_t *pptr;                         /* Current pointer in parsed pattern */
+PCRE2_UCHAR *code;                    /* Current pointer in compiled code */
+PCRE2_SPTR codestart;                 /* Start of compiled code */
+PCRE2_SPTR ptr;                       /* Current pointer in pattern */
+uint32_t *pptr;                       /* Current pointer in parsed pattern */


-PCRE2_SIZE length = 1;                  /* Allow for final END opcode */
-PCRE2_SIZE usedlength;                  /* Actual length used */
-PCRE2_SIZE re_blocksize;                /* Size of memory block */
-PCRE2_SIZE big32count = 0;              /* 32-bit literals >= 0x80000000 */
-PCRE2_SIZE parsed_size_needed;          /* Needed for parsed pattern */
+PCRE2_SIZE length = 1;                /* Allow for final END opcode */
+PCRE2_SIZE usedlength;                /* Actual length used */
+PCRE2_SIZE re_blocksize;              /* Size of memory block */
+PCRE2_SIZE big32count = 0;            /* 32-bit literals >= 0x80000000 */
+PCRE2_SIZE parsed_size_needed;        /* Needed for parsed pattern */


-int32_t firstcuflags, reqcuflags;       /* Type of first/req code unit */
-uint32_t firstcu, reqcu;                /* Value of first/req code unit */
-uint32_t setflags = 0;                  /* NL and BSR set flags */
+int32_t firstcuflags, reqcuflags;     /* Type of first/req code unit */
+uint32_t firstcu, reqcu;              /* Value of first/req code unit */
+uint32_t setflags = 0;                /* NL and BSR set flags */


-uint32_t skipatstart;                   /* When checking (*UTF) etc */
-uint32_t limit_match = UINT32_MAX;      /* Unset match limits */
+uint32_t skipatstart;                 /* When checking (*UTF) etc */
+uint32_t limit_match = UINT32_MAX;    /* Unset match limits */
 uint32_t limit_recursion = UINT32_MAX;


-int newline = 0;                        /* Unset; can be set by the pattern */
-int bsr = 0;                            /* Unset; can be set by the pattern */
-int errorcode = 0;                      /* Initialize to avoid compiler warn */
+int newline = 0;                      /* Unset; can be set by the pattern */
+int bsr = 0;                          /* Unset; can be set by the pattern */
+int errorcode = 0;                    /* Initialize to avoid compiler warn */


-uint32_t i;                             /* Local loop counter */
+uint32_t i;                           /* Local loop counter */


/* Comments at the head of this file explain about these variables. */

@@ -8901,7 +8903,9 @@
/* A zero-terminated pattern is indicated by the special length value
PCRE2_ZERO_TERMINATED. Check for an overlong pattern. */

-if (patlen == PCRE2_ZERO_TERMINATED) patlen = PRIV(strlen)(pattern);
+if ((zero_terminated = (patlen == PCRE2_ZERO_TERMINATED)))
+ patlen = PRIV(strlen)(pattern);
+
if (patlen > ccontext->max_pattern_length)
{
*errorptr = ERR88;
@@ -8908,6 +8912,10 @@
return NULL;
}

+/* From here on, all returns from this function should end up going via the
+EXIT label. */
+
+
/* ------------ Initialize the "static" compile data -------------- */

tables = (ccontext->tables != NULL)? ccontext->tables : PRIV(default_tables);
@@ -8966,8 +8974,14 @@
/* --------------- Start looking at the pattern --------------- */

/* Check for global one-time option settings at the start of the pattern, and
-remember the offset to the actual regex. */
+remember the offset to the actual regex. With valgrind support, make the
+terminator of a zero-terminated pattern inaccessible. This catches bugs that
+would otherwise only show up for non-zero-terminated patterns. */

+#ifdef SUPPORT_VALGRIND
+if (zero_terminated) VALGRIND_MAKE_MEM_NOACCESS(pattern + patlen, CU2BYTES(1));
+#endif
+
ptr = pattern;
skipatstart = 0;

@@ -9148,14 +9162,14 @@

 if (parsed_size_needed >= PARSED_PATTERN_DEFAULT_SIZE)
   {
-  cb.parsed_pattern = ccontext->memctl.malloc(
-    (parsed_size_needed + 1) * sizeof(uint32_t),
-      ccontext->memctl.memory_data);
-  if (cb.parsed_pattern == NULL)
+  uint32_t *heap_parsed_pattern = ccontext->memctl.malloc(
+    (parsed_size_needed + 1) * sizeof(uint32_t), ccontext->memctl.memory_data);
+  if (heap_parsed_pattern == NULL)
     {
     *errorptr = ERR21;
-    return NULL;
+    goto EXIT;
     }
+  cb.parsed_pattern = heap_parsed_pattern;
   }


/* Do the parsing scan. */
@@ -9547,12 +9561,16 @@
goto HAD_CB_ERROR;
}

-/* Control ends up here in all cases. If memory was obtained for the parsed
+/* Control ends up here in all cases. When running under valgrind, make a
+pattern's terminating zero defined again. If memory was obtained for the parsed
version of the pattern, free it before returning. Also free the list of named
groups if a larger one had to be obtained, and likewise the group information
vector. */

EXIT:
+#ifdef SUPPORT_VALGRIND
+if (zero_terminated) VALGRIND_MAKE_MEM_DEFINED(pattern + patlen, CU2BYTES(1));
+#endif
if (cb.parsed_pattern != stack_parsed_pattern)
ccontext->memctl.free(cb.parsed_pattern, ccontext->memctl.memory_data);
if (cb.named_group_list_size > NAMED_GROUP_LIST_SIZE)