Revision: 1232
http://www.exim.org/viewvc/pcre2?view=rev&revision=1232
Author: zherczeg
Date: 2020-02-27 08:35:14 +0000 (Thu, 27 Feb 2020)
Log Message:
-----------
Support more accelerated repeat cases in JIT.
Modified Paths:
--------------
code/trunk/src/pcre2_jit_compile.c
Modified: code/trunk/src/pcre2_jit_compile.c
===================================================================
--- code/trunk/src/pcre2_jit_compile.c 2020-02-26 16:53:39 UTC (rev 1231)
+++ code/trunk/src/pcre2_jit_compile.c 2020-02-27 08:35:14 UTC (rev 1232)
@@ -1270,6 +1270,7 @@
cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
#endif
+ /* Only these types are supported. */
switch(*cc)
{
case OP_CRSTAR:
@@ -1315,8 +1316,10 @@
break;
end = cc + GET(cc, 1);
+ /* Iterated brackets are skipped. */
if (*end != OP_KET || PRIVATE_DATA(end) != 0)
return FALSE;
+
if (*cc == OP_CBRA)
{
if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
@@ -1336,67 +1339,138 @@
return FALSE;
}
-static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth)
+static void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth)
{
- PCRE2_SPTR next_alt;
+PCRE2_SPTR next_alt;
+PCRE2_SPTR end;
- SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
+SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
+SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
- if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
- return;
+do
+ {
+ next_alt = cc + GET(cc, 1);
- next_alt = bracketend(cc) - (1 + LINK_SIZE);
- if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
- return;
+ cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
- do
+ while (TRUE)
{
- next_alt = cc + GET(cc, 1);
+ switch(*cc)
+ {
+ case OP_SOD:
+ case OP_SOM:
+ case OP_SET_SOM:
+ case OP_NOT_WORD_BOUNDARY:
+ case OP_WORD_BOUNDARY:
+ case OP_NOT_DIGIT:
+ case OP_DIGIT:
+ case OP_NOT_WHITESPACE:
+ case OP_WHITESPACE:
+ case OP_NOT_WORDCHAR:
+ case OP_WORDCHAR:
+ case OP_ANY:
+ case OP_ALLANY:
+ case OP_ANYBYTE:
+ case OP_ANYNL:
+ case OP_NOT_HSPACE:
+ case OP_HSPACE:
+ case OP_NOT_VSPACE:
+ case OP_VSPACE:
+ case OP_EODN:
+ case OP_EOD:
+ case OP_CIRC:
+ case OP_CIRCM:
+ case OP_DOLL:
+ case OP_DOLLM:
+ cc++;
+ continue;
- cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
+ case OP_NOTPROP:
+ case OP_PROP:
+ cc += 1 + 2;
+ continue;
- while (TRUE)
- {
- switch(*cc)
+ case OP_CHAR:
+ case OP_CHARI:
+ case OP_NOT:
+ case OP_NOTI:
+ cc += 2;
+#ifdef SUPPORT_UNICODE
+ if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+ continue;
+
+ case OP_CLASS:
+ case OP_NCLASS:
+#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
+ case OP_XCLASS:
+ end = cc + ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
+#else
+ end = cc + (1 + (32 / sizeof(PCRE2_UCHAR)));
+#endif
+
+ if (*end >= OP_CRSTAR && *end <= OP_CRPOSRANGE)
+ break;
+
+ cc = end;
+ continue;
+
+ case OP_BRA:
+ case OP_CBRA:
+ end = cc + GET(cc, 1);
+
+ if (*end == OP_KET && PRIVATE_DATA(end) == 0)
{
- case OP_SOD:
- case OP_SOM:
- case OP_SET_SOM:
- case OP_NOT_WORD_BOUNDARY:
- case OP_WORD_BOUNDARY:
- case OP_EODN:
- case OP_EOD:
- case OP_CIRC:
- case OP_CIRCM:
- case OP_DOLL:
- case OP_DOLLM:
- /* Zero width assertions. */
- cc++;
+ if (*cc == OP_CBRA)
+ {
+ if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
+ break;
+ cc += IMM2_SIZE;
+ }
+
+ cc += 1 + LINK_SIZE;
continue;
}
- break;
- }
- if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
- detect_fast_fail(common, cc, private_data_start, depth - 1);
+ if (depth == 0)
+ break;
- if (is_accelerated_repeat(cc))
- {
- common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
+ end = bracketend(cc) - (1 + LINK_SIZE);
+ if (*end != OP_KET || PRIVATE_DATA(end) != 0)
+ break;
- if (common->fast_fail_start_ptr == 0)
- common->fast_fail_start_ptr = *private_data_start;
+ if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
+ break;
- *private_data_start += sizeof(sljit_sw);
- common->fast_fail_end_ptr = *private_data_start;
+ detect_fast_fail(common, cc, private_data_start, depth - 1);
+ break;
- if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
- return;
+ case OP_KET:
+ SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
+ if (cc >= next_alt)
+ break;
+ cc += 1 + LINK_SIZE;
+ continue;
}
+ break;
+ }
- cc = next_alt;
+ if (is_accelerated_repeat(cc))
+ {
+ common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
+
+ if (common->fast_fail_start_ptr == 0)
+ common->fast_fail_start_ptr = *private_data_start;
+
+ *private_data_start += sizeof(sljit_sw);
+ common->fast_fail_end_ptr = *private_data_start;
+
+ if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
+ return;
}
- while (*cc == OP_ALT);
+ cc = next_alt;
+ }
+while (*cc == OP_ALT);
}
static int get_class_iterator_size(PCRE2_SPTR cc)