Revision: 670
http://www.exim.org/viewvc/pcre2?view=rev&revision=670
Author: ph10
Date: 2017-02-24 18:25:32 +0000 (Fri, 24 Feb 2017)
Log Message:
-----------
Fix 32-bit non-UTF property test crash.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/maint/MultiStage2.py
code/trunk/src/pcre2_internal.h
code/trunk/src/pcre2_ucd.c
code/trunk/testdata/testinput12
code/trunk/testdata/testoutput12-16
code/trunk/testdata/testoutput12-32
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2017-02-23 17:05:43 UTC (rev 669)
+++ code/trunk/ChangeLog 2017-02-24 18:25:32 UTC (rev 670)
@@ -9,6 +9,10 @@
(a) Check for malloc failures when getting memory for the ovector (POSIX) or
the match data block (non-POSIX).
+
+2. In the 32-bit library in non-UTF mode, an attempt to find a Unicode property
+for a character with a code point greater than 0x10ffff (the Unicode maximum)
+caused a crash.
Version 10.23 14-February-2017
Modified: code/trunk/maint/MultiStage2.py
===================================================================
--- code/trunk/maint/MultiStage2.py 2017-02-23 17:05:43 UTC (rev 669)
+++ code/trunk/maint/MultiStage2.py 2017-02-24 18:25:32 UTC (rev 670)
@@ -236,7 +236,8 @@
fmt = "%3d," * ELEMS_PER_LINE + " /* U+%04X */"
mult = MAX_UNICODE / len(table)
for i in range(0, len(table), ELEMS_PER_LINE):
- print(fmt % (table[i:i+ELEMS_PER_LINE] + (i * mult,)))
+ print(fmt % (table[i:i+ELEMS_PER_LINE] +
+ (int(i * mult),)))
else:
if block_size > ELEMS_PER_LINE:
el = ELEMS_PER_LINE
@@ -485,6 +486,20 @@
print()
print("const char *PRIV(unicode_version) = \"{}\";".format(unicode_version))
print()
+print("/* If the 32-bit library is run in non-32-bit mode, character values")
+print("greater than 0x10ffff may be encountered. For these we set up a")
+print("special record. */")
+print()
+print("#if PCRE2_CODE_UNIT_WIDTH == 32")
+print("const ucd_record PRIV(dummy_ucd_record)[] = {{")
+print(" ucp_Common, /* script */")
+print(" ucp_Cn, /* type unassigned */")
+print(" ucp_gbOther, /* grapheme break property */")
+print(" 0, /* case set */")
+print(" 0, /* other case */")
+print(" }};")
+print("#endif")
+print()
print(record_struct)
# --- Added by PH: output the table of caseless character sets ---
Modified: code/trunk/src/pcre2_internal.h
===================================================================
--- code/trunk/src/pcre2_internal.h 2017-02-23 17:05:43 UTC (rev 669)
+++ code/trunk/src/pcre2_internal.h 2017-02-24 18:25:32 UTC (rev 670)
@@ -1774,10 +1774,17 @@
/* UCD access macros */
#define UCD_BLOCK_SIZE 128
-#define GET_UCD(ch) (PRIV(ucd_records) + \
+#define REAL_GET_UCD(ch) (PRIV(ucd_records) + \
PRIV(ucd_stage2)[PRIV(ucd_stage1)[(int)(ch) / UCD_BLOCK_SIZE] * \
UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE])
+#if PCRE2_CODE_UNIT_WIDTH == 32
+#define GET_UCD(ch) ((ch > MAX_UTF_CODE_POINT)? \
+ PRIV(dummy_ucd_record) : REAL_GET_UCD(ch))
+#else
+#define GET_UCD(ch) REAL_GET_UCD(ch)
+#endif
+
#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
#define UCD_SCRIPT(ch) GET_UCD(ch)->script
#define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
@@ -1834,6 +1841,9 @@
#define _pcre2_default_compile_context PCRE2_SUFFIX(_pcre2_default_compile_context_)
#define _pcre2_default_match_context PCRE2_SUFFIX(_pcre2_default_match_context_)
#define _pcre2_default_tables PCRE2_SUFFIX(_pcre2_default_tables_)
+#if PCRE2_CODE_UNIT_WIDTH == 32
+#define _pcre2_dummy_ucd_record PCRE2_SUFFIX(_pcre2_dummy_ucd_record_)
+#endif
#define _pcre2_hspace_list PCRE2_SUFFIX(_pcre2_hspace_list_)
#define _pcre2_vspace_list PCRE2_SUFFIX(_pcre2_vspace_list_)
#define _pcre2_ucd_caseless_sets PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_)
@@ -1858,6 +1868,9 @@
extern const uint32_t PRIV(vspace_list)[];
extern const uint32_t PRIV(ucd_caseless_sets)[];
extern const ucd_record PRIV(ucd_records)[];
+#if PCRE2_CODE_UNIT_WIDTH == 32
+extern const ucd_record PRIV(dummy_ucd_record)[];
+#endif
extern const uint8_t PRIV(ucd_stage1)[];
extern const uint16_t PRIV(ucd_stage2)[];
extern const uint32_t PRIV(ucp_gbtable)[];
Modified: code/trunk/src/pcre2_ucd.c
===================================================================
--- code/trunk/src/pcre2_ucd.c 2017-02-23 17:05:43 UTC (rev 669)
+++ code/trunk/src/pcre2_ucd.c 2017-02-24 18:25:32 UTC (rev 670)
@@ -41,6 +41,20 @@
const char *PRIV(unicode_version) = "8.0.0";
+/* If the 32-bit library is run in non-32-bit mode, character values
+greater than 0x10ffff may be encountered. For these we set up a
+special record. */
+
+#if PCRE2_CODE_UNIT_WIDTH == 32
+const ucd_record PRIV(dummy_ucd_record)[] = {{
+ ucp_Common, /* script */
+ ucp_Cn, /* type unassigned */
+ ucp_gbOther, /* grapheme break property */
+ 0, /* case set */
+ 0, /* other case */
+ }};
+#endif
+
/* When recompiling tables with a new Unicode version, please check the
types in this structure definition from pcre2_internal.h (the actual
field names will be different):
Modified: code/trunk/testdata/testinput12
===================================================================
--- code/trunk/testdata/testinput12 2017-02-23 17:05:43 UTC (rev 669)
+++ code/trunk/testdata/testinput12 2017-02-24 18:25:32 UTC (rev 670)
@@ -360,4 +360,7 @@
/[\s[:^ascii:]]/B,ucp
+/\pP/ucp
+ \x{7fffffff}\=no_jit
+
# End of testinput12
Modified: code/trunk/testdata/testoutput12-16
===================================================================
--- code/trunk/testdata/testoutput12-16 2017-02-23 17:05:43 UTC (rev 669)
+++ code/trunk/testdata/testoutput12-16 2017-02-24 18:25:32 UTC (rev 670)
@@ -1415,4 +1415,10 @@
End
------------------------------------------------------------------
+/\pP/ucp
+ \x{7fffffff}\=no_jit
+** Character \x{7fffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+No match
+
# End of testinput12
Modified: code/trunk/testdata/testoutput12-32
===================================================================
--- code/trunk/testdata/testoutput12-32 2017-02-23 17:05:43 UTC (rev 669)
+++ code/trunk/testdata/testoutput12-32 2017-02-24 18:25:32 UTC (rev 670)
@@ -1409,4 +1409,8 @@
End
------------------------------------------------------------------
+/\pP/ucp
+ \x{7fffffff}\=no_jit
+No match
+
# End of testinput12