Revision: 579
http://www.exim.org/viewvc/pcre2?view=rev&revision=579
Author: ph10
Date: 2016-10-27 18:42:14 +0100 (Thu, 27 Oct 2016)
Log Message:
-----------
Fix bad lookup in global table for wide characters in extended mode and in
*VERB names.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_compile.c
code/trunk/src/pcre2_intmodedep.h
code/trunk/testdata/testinput5
code/trunk/testdata/testoutput5
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2016-10-26 16:59:22 UTC (rev 578)
+++ code/trunk/ChangeLog 2016-10-27 17:42:14 UTC (rev 579)
@@ -48,6 +48,10 @@
(b) In utf mode, the length of a *MARK (or other verb) name was being checked
in characters instead of code units, which could lead to bad code being
compiled, leading to unpredictable behaviour.
+
+ (c) In extended /x mode, characters whose code was greater than 255 caused
+ a lookup outside one of the global tables. A similar bug existed for wide
+ characters in *VERB names.
4. Back references are now permitted in lookbehind assertions when there are
no duplicated group numbers (that is, (?| has not been used), and, if the
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2016-10-26 16:59:22 UTC (rev 578)
+++ code/trunk/src/pcre2_compile.c 2016-10-27 17:42:14 UTC (rev 579)
@@ -2226,7 +2226,9 @@
and \E and escaped characters are allowed (no character types such as \d). If
PCRE2_EXTENDED is also set, we must ignore white space and # comments. Do
this by not entering the special (*VERB:NAME) processing - they are then
- picked up below. */
+ picked up below. Note that c is a character, not a code unit, so we must not
+ use MAX_255 to test its size because MAX_255 tests code units and is assumed
+ TRUE in 8-bit mode. */
if (inverbname &&
(
@@ -2234,7 +2236,7 @@
((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) !=
(PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) ||
/* OR: character > 255 */
- !MAX_255(c) ||
+ c > 255 ||
/* OR: not a # comment or white space */
(c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0)
))
@@ -2306,11 +2308,13 @@
}
}
- /* Skip over whitespace and # comments in extended mode. */
+ /* Skip over whitespace and # comments in extended mode. Note that c is a
+ character, not a code unit, so we must not use MAX_255 to test its size
+ because MAX_255 tests code units and is assumed TRUE in 8-bit mode. */
if ((options & PCRE2_EXTENDED) != 0)
{
- if (MAX_255(c) && (cb->ctypes[c] & ctype_space) != 0) continue;
+ if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue;
if (c == CHAR_NUMBER_SIGN)
{
while (ptr < ptrend)
@@ -8866,7 +8870,7 @@
*errorptr = ERR16;
return NULL;
}
-
+
/* Check that all undefined public option bits are zero. */
if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
Modified: code/trunk/src/pcre2_intmodedep.h
===================================================================
--- code/trunk/src/pcre2_intmodedep.h 2016-10-26 16:59:22 UTC (rev 578)
+++ code/trunk/src/pcre2_intmodedep.h 2016-10-27 17:42:14 UTC (rev 579)
@@ -200,11 +200,11 @@
#endif
/* Other macros that are different for 8-bit mode. The MAX_255 macro checks
-whether its argument is less than 256. The maximum length of a MARK name must
-fit in one code unit; currently it is set to 255 or 65535. The TABLE_GET macro
-is used to access elements of tables containing exactly 256 items. When code
-points can be greater than 255, a check is needed before accessing these
-tables. */
+whether its argument, which is assumed to be one code unit, is less than 256.
+The maximum length of a MARK name must fit in one code unit; currently it is
+set to 255 or 65535. The TABLE_GET macro is used to access elements of tables
+containing exactly 256 items. When code points can be greater than 255, a check
+is needed before accessing these tables. */
#if PCRE2_CODE_UNIT_WIDTH == 8
#define MAX_255(c) TRUE
Modified: code/trunk/testdata/testinput5
===================================================================
--- code/trunk/testdata/testinput5 2016-10-26 16:59:22 UTC (rev 578)
+++ code/trunk/testdata/testinput5 2016-10-27 17:42:14 UTC (rev 579)
@@ -1740,4 +1740,8 @@
/../utf,auto_callout
\n\x{123}\x{123}\x{123}\x{123}
+# This tests processing wide characters in extended mode.
+
+/XȀ/x,utf
+
# End of testinput5
Modified: code/trunk/testdata/testoutput5
===================================================================
--- code/trunk/testdata/testoutput5 2016-10-26 16:59:22 UTC (rev 578)
+++ code/trunk/testdata/testoutput5 2016-10-27 17:42:14 UTC (rev 579)
@@ -4184,4 +4184,8 @@
+2 ^ ^
0: \x{123}\x{123}
+# This tests processing wide characters in extended mode.
+
+/XȀ/x,utf
+
# End of testinput5