Revision: 720
http://www.exim.org/viewvc/pcre2?view=rev&revision=720
Author: zherczeg
Date: 2017-03-31 06:40:37 +0100 (Fri, 31 Mar 2017)
Log Message:
-----------
Fix character type detection when 32-bit and UCP are enabled but UTF is not in JIT.
Modified Paths:
--------------
code/trunk/src/pcre2_jit_compile.c
code/trunk/testdata/testinput12
code/trunk/testdata/testoutput12-16
code/trunk/testdata/testoutput12-32
Modified: code/trunk/src/pcre2_jit_compile.c
===================================================================
--- code/trunk/src/pcre2_jit_compile.c 2017-03-30 17:43:50 UTC (rev 719)
+++ code/trunk/src/pcre2_jit_compile.c 2017-03-31 05:40:37 UTC (rev 720)
@@ -588,6 +588,8 @@
#define READ_CHAR_MAX 0x7fffffff
+#define INVALID_UTF_CHAR 888
+
static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
{
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
@@ -3558,10 +3560,30 @@
/* Search the UCD record for the character comes in TMP1.
Returns chartype in TMP1 and UCD offset in TMP2. */
DEFINE_COMPILER;
+#if PCRE2_CODE_UNIT_WIDTH == 32
+struct sljit_jump *jump;
+#endif
+#if defined SLJIT_DEBUG && SLJIT_DEBUG
+/* dummy_ucd_record */
+const ucd_record *record = GET_UCD(INVALID_UTF_CHAR);
+SLJIT_ASSERT(record->script == ucp_Common && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
+SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
+#endif
+
SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+
+#if PCRE2_CODE_UNIT_WIDTH == 32
+if (!common->utf)
+ {
+ jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
+ JUMPHERE(jump);
+ }
+#endif
+
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
@@ -5969,6 +5991,15 @@
if (needschar && !charsaved)
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (!common->utf)
+ {
+ jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
+ JUMPHERE(jump);
+ }
+#endif
+
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
Modified: code/trunk/testdata/testinput12
===================================================================
--- code/trunk/testdata/testinput12 2017-03-30 17:43:50 UTC (rev 719)
+++ code/trunk/testdata/testinput12 2017-03-31 05:40:37 UTC (rev 720)
@@ -361,6 +361,6 @@
/[\s[:^ascii:]]/B,ucp
/\pP/ucp
- \x{7fffffff}\=no_jit
+ \x{7fffffff}
# End of testinput12
Modified: code/trunk/testdata/testoutput12-16
===================================================================
--- code/trunk/testdata/testoutput12-16 2017-03-30 17:43:50 UTC (rev 719)
+++ code/trunk/testdata/testoutput12-16 2017-03-31 05:40:37 UTC (rev 720)
@@ -1416,7 +1416,7 @@
------------------------------------------------------------------
/\pP/ucp
- \x{7fffffff}\=no_jit
+ \x{7fffffff}
** Character \x{7fffffff} is greater than 0xffff and UTF-16 mode is not enabled.
** Truncation will probably give the wrong result.
No match
Modified: code/trunk/testdata/testoutput12-32
===================================================================
--- code/trunk/testdata/testoutput12-32 2017-03-30 17:43:50 UTC (rev 719)
+++ code/trunk/testdata/testoutput12-32 2017-03-31 05:40:37 UTC (rev 720)
@@ -1410,7 +1410,7 @@
------------------------------------------------------------------
/\pP/ucp
- \x{7fffffff}\=no_jit
+ \x{7fffffff}
No match
# End of testinput12