Revision: 233
http://www.exim.org/viewvc/pcre2?view=rev&revision=233
Author: ph10
Date: 2015-03-25 19:26:27 +0000 (Wed, 25 Mar 2015)
Log Message:
-----------
Fix bad memory computation for "(*UTF)[\S\V\H]" (a pattern with a negative
class (\S) and explicit wide characters).
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_compile.c
code/trunk/testdata/testinput4
code/trunk/testdata/testoutput4
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-03-25 17:01:04 UTC (rev 232)
+++ code/trunk/ChangeLog 2015-03-25 19:26:27 UTC (rev 233)
@@ -30,7 +30,14 @@
depth limit of 10000 has been imposed to limit the resources used by this
optimization. This infelicity was discovered by the LLVM fuzzer.
+9. A pattern such as /(*UTF)[\S\V\H]/, which contains a negated special class
+such as \S in non-UCP mode, explicit wide characters (> 255) can be ignored
+because \S ensures they are all in the class. The code for doing this was
+interacting badly with the code for computing the amount of space needed to
+compile the pattern, leading to a buffer overflow. This bug was discovered by
+the LLVM fuzzer.
+
Version 10.10 06-March-2015
---------------------------
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2015-03-25 17:01:04 UTC (rev 232)
+++ code/trunk/src/pcre2_compile.c 2015-03-25 19:26:27 UTC (rev 233)
@@ -3556,20 +3556,6 @@
}
#endif
-#ifdef SUPPORT_WIDE_CHARS
- /* In the pre-compile phase, accumulate the length of any wide characters
- and reset the pointer. This is so that very large classes that contain a
- zillion wide characters no longer overwrite the work space (which is on
- the stack). We have to remember that there was XCLASS data, however. */
-
- if (lengthptr != NULL && class_uchardata > class_uchardata_base)
- {
- xclass = TRUE;
- *lengthptr += class_uchardata - class_uchardata_base;
- class_uchardata = class_uchardata_base;
- }
-#endif
-
/* Inside \Q...\E everything is literal except \E */
if (inescq)
@@ -4074,21 +4060,29 @@
nestptr = NULL;
c = *(++ptr);
}
- if (c == CHAR_RIGHT_SQUARE_BRACKET && !inescq) break;
- } /* End of main class-processing loop */
- /* We will need an XCLASS if data has been placed in class_uchardata. In
- the second phase this is a sufficient test. However, in the pre-compile
- phase, class_uchardata gets emptied to prevent workspace overflow, so it
- only if the very last character in the class needs XCLASS will it contain
- anything at this point. For this reason, xclass gets set TRUE above when
- class_uchardata is emptied, and that's why this code is the way it is here
- instead of just doing a test on class_uchardata below. */
-
#ifdef SUPPORT_WIDE_CHARS
- if (class_uchardata > class_uchardata_base) xclass = TRUE;
+ /* If any wide characters have been encountered, set xclass = TRUE. Then,
+ in the pre-compile phase, accumulate the length of the wide characters
+ and reset the pointer. This is so that very large classes that contain a
+ zillion wide characters do not overwrite the work space (which is on the
+ stack). */
+
+ if (class_uchardata > class_uchardata_base)
+ {
+ xclass = TRUE;
+ if (lengthptr != NULL)
+ {
+ *lengthptr += class_uchardata - class_uchardata_base;
+ class_uchardata = class_uchardata_base;
+ }
+ }
#endif
+ /* An unescaped ] ends the class */
+ if (c == CHAR_RIGHT_SQUARE_BRACKET && !inescq) break;
+ } /* End of main class-processing loop */
+
/* If this is the first thing in the branch, there can be no first char
setting, whatever the repeat count. Any reqcu setting must remain
unchanged after any kind of repeat. */
@@ -4107,12 +4101,12 @@
be listed) there are no characters < 256, we can omit the bitmap in the
actual compiled code. */
+#ifdef SUPPORT_WIDE_CHARS
#ifdef SUPPORT_UNICODE
if (xclass && (!should_flip_negation || (options & PCRE2_UCP) != 0))
#elif PCRE2_CODE_UNIT_WIDTH != 8
if (xclass && !should_flip_negation)
#endif
-#ifdef SUPPORT_WIDE_CHARS
{
*class_uchardata++ = XCL_END; /* Marks the end of extra data */
*code++ = OP_XCLASS;
Modified: code/trunk/testdata/testinput4
===================================================================
--- code/trunk/testdata/testinput4 2015-03-25 17:01:04 UTC (rev 232)
+++ code/trunk/testdata/testinput4 2015-03-25 19:26:27 UTC (rev 233)
@@ -2219,4 +2219,6 @@
/[A-`]/i,utf
abcdefghijklmno
+"[\S\V\H]"utf
+
# End of testinput4
Modified: code/trunk/testdata/testoutput4
===================================================================
--- code/trunk/testdata/testoutput4 2015-03-25 17:01:04 UTC (rev 232)
+++ code/trunk/testdata/testoutput4 2015-03-25 19:26:27 UTC (rev 233)
@@ -3739,4 +3739,6 @@
abcdefghijklmno
0: a
+"[\S\V\H]"utf
+
# End of testinput4