Revision: 188
http://www.exim.org/viewvc/pcre2?view=rev&revision=188
Author: ph10
Date: 2015-01-27 17:21:32 +0000 (Tue, 27 Jan 2015)
Log Message:
-----------
Fix incorrect size calculation when a reference to a duplicate name occurs
in a part of the pattern where PCRE2_DUPNAMES is not set.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_compile.c
code/trunk/src/pcre2_intmodedep.h
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-01-26 14:57:27 UTC (rev 187)
+++ code/trunk/ChangeLog 2015-01-27 17:21:32 UTC (rev 188)
@@ -31,7 +31,16 @@
5. The (*NO_JIT) feature is implemented.
+6. If a bug that caused pcre2_compile() to use more memory than allocated was
+triggered when using valgrind, the code in (3) above passed a stupidly large
+value to valgrind. This caused a crash instead of an "internal error" return.
+7. A reference to a duplicated named group (either a back reference or a test
+for being set in a conditional) that occurred in a part of the pattern where
+PCRE2_DUPNAMES was not set caused the amount of memory needed for the pattern
+to be incorrectly calculated, leading to overwriting.
+
+
Version 10.00 05-January-2015
-----------------------------
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2015-01-26 14:57:27 UTC (rev 187)
+++ code/trunk/src/pcre2_compile.c 2015-01-27 17:21:32 UTC (rev 188)
@@ -5296,9 +5296,10 @@
}
/* Otherwise we expect to read a name; anything else is an error. When
- a name is one of a number of duplicates, a different opcode is used and
- it needs more memory. Unfortunately we cannot tell whether a name is a
- duplicate in the first pass, so we have to allow for more memory. */
+ the referenced name is one of a number of duplicates, a different
+ opcode is used and it needs more memory. Unfortunately we cannot tell
+ whether this is the case in the first pass, so we have to allow for
+ more memory always. */
else
{
@@ -5318,8 +5319,7 @@
ptr++;
}
namelen = (int)(ptr - name);
- if (lengthptr != NULL && (options & PCRE2_DUPNAMES) != 0)
- *lengthptr += IMM2_SIZE;
+ if (lengthptr != NULL) *lengthptr += IMM2_SIZE;
}
/* Check the terminator */
@@ -5736,15 +5736,15 @@
}
recno = (i < cb->names_found)? ng->number : 0;
- /* Count named back references. */
-
- if (!is_recurse) cb->namedrefcount++;
-
/* If duplicate names are permitted, we have to allow for a named
reference to a duplicated name (this cannot be determined until the
- second pass). This needs an extra 16-bit data item. */
+ second pass). This needs an extra data item. Counting named back
+ references and incrementing the count at the end does not work
+ because it does not account for duplication of groups containing such
+ references. Nor does checking for PCRE2_DUPNAMES because that need
+ not be set at the point of reference. */
- if ((options & PCRE2_DUPNAMES) != 0) *lengthptr += IMM2_SIZE;
+ *lengthptr += IMM2_SIZE;
}
/* In the real compile, search the name table. We check the name
@@ -6874,7 +6874,7 @@
}
/* Fill in the ket */
-
+
*code = OP_KET;
PUT(code, 1, (int)(code - start_bracket));
code += 1 + LINK_SIZE;
@@ -7479,7 +7479,6 @@
cb.name_table = NULL;
cb.named_groups = named_groups;
cb.named_group_list_size = NAMED_GROUP_LIST_SIZE;
-cb.namedrefcount = 0;
cb.names_found = 0;
cb.open_caps = NULL;
cb.parens_depth = 0;
@@ -7668,13 +7667,6 @@
goto HAD_ERROR;
}
-/* If there are groups with duplicate names and there are also references by
-name, we must allow for the possibility of named references to duplicated
-groups. These require an extra data item each. */
-
-if (cb.dupnames && cb.namedrefcount > 0)
- length += cb.namedrefcount * IMM2_SIZE * sizeof(PCRE2_UCHAR);
-
/* Compute the size of, and then get and initialize, the data block for storing
the compiled pattern and names table. Integer overflow should no longer be
possible because nowadays we limit the maximum value of cb.names_found and
@@ -7689,7 +7681,7 @@
errorcode = ERR21;
goto HAD_ERROR;
}
-
+
re->memctl = ccontext->memctl;
re->tables = tables;
re->executable_jit = NULL;
@@ -7775,22 +7767,22 @@
}
/* If we have not reached end of pattern after a successful compile, there's an
-excess bracket. Otherwise, fill in the final opcode and check for disastrous
-overflow. */
+excess bracket. Fill in the final opcode and check for disastrous overflow.
+If no overflow, but the estimated length exceeds the really used length, adjust
+the value of re->blocksize, and if valgrind support is configured, mark the
+extra allocated memory as unaddressable, so that any out-of-bound reads can be
+detected. */
if (errorcode == 0 && ptr < cb.end_pattern) errorcode = ERR22;
*code++ = OP_END;
usedlength = code - codestart;
-if (usedlength > length) errorcode = ERR23;
-
-/* If the estimated length exceeds the really used length, adjust the value of
-re->blocksize, and if valgrind support is configured, mark the extra allocated
-memory as unaddressable, so that any out-of-bound reads can be detected. */
-
-re->blocksize -= CU2BYTES(length - usedlength);
+if (usedlength > length) errorcode = ERR23; else
+ {
+ re->blocksize -= CU2BYTES(length - usedlength);
#ifdef SUPPORT_VALGRIND
-VALGRIND_MAKE_MEM_NOACCESS(code, CU2BYTES(length - usedlength));
+ VALGRIND_MAKE_MEM_NOACCESS(code, CU2BYTES(length - usedlength));
#endif
+ }
/* Fill in any forward references that are required. There may be repeated
references; optimize for them, as searching a large regex takes time. */
Modified: code/trunk/src/pcre2_intmodedep.h
===================================================================
--- code/trunk/src/pcre2_intmodedep.h 2015-01-26 14:57:27 UTC (rev 187)
+++ code/trunk/src/pcre2_intmodedep.h 2015-01-27 17:21:32 UTC (rev 188)
@@ -677,7 +677,6 @@
uint32_t final_bracount; /* Saved value after first pass */
uint32_t top_backref; /* Maximum back reference */
uint32_t backref_map; /* Bitmap of low back refs */
- uint32_t namedrefcount; /* Number of backreferences by name */
uint32_t nltype; /* Newline type */
uint32_t nllen; /* Newline string length */
PCRE2_UCHAR nl[4]; /* Newline string when fixed length */
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2015-01-26 14:57:27 UTC (rev 187)
+++ code/trunk/testdata/testinput2 2015-01-27 17:21:32 UTC (rev 188)
@@ -4134,4 +4134,8 @@
aa123\=ovector=2
aa123\=ovector=3
+/(?<N111>(?J)(?<N111>1(111111)11|)1|1|)(?(<N111>)1)/
+
+/(?<N>(?J)(?<N>))(?-J)\k<N>/
+
# End of testinput2
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2015-01-26 14:57:27 UTC (rev 187)
+++ code/trunk/testdata/testoutput2 2015-01-27 17:21:32 UTC (rev 188)
@@ -13893,4 +13893,8 @@
1: <unset>
2: a
+/(?<N111>(?J)(?<N111>1(111111)11|)1|1|)(?(<N111>)1)/
+
+/(?<N>(?J)(?<N>))(?-J)\k<N>/
+
# End of testinput2