Revision: 1303
http://www.exim.org/viewvc/pcre2?view=rev&revision=1303
Author: zherczeg
Date: 2021-02-19 11:58:36 +0000 (Fri, 19 Feb 2021)
Log Message:
-----------
Prefer single character optimization over bracket repetition in JIT.
Modified Paths:
--------------
code/trunk/src/pcre2_jit_compile.c
Modified: code/trunk/src/pcre2_jit_compile.c
===================================================================
--- code/trunk/src/pcre2_jit_compile.c 2021-02-19 09:20:37 UTC (rev 1302)
+++ code/trunk/src/pcre2_jit_compile.c 2021-02-19 11:58:36 UTC (rev 1303)
@@ -1238,6 +1238,7 @@
*/
static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth, int start)
{
+PCRE2_SPTR begin = cc;
PCRE2_SPTR next_alt;
PCRE2_SPTR end;
PCRE2_SPTR accelerated_start;
@@ -1475,31 +1476,19 @@
case OP_CBRA:
end = cc + GET(cc, 1);
- if (*end == OP_KET && PRIVATE_DATA(end) == 0)
- {
- if (*cc == OP_CBRA)
- {
- if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
- break;
- cc += IMM2_SIZE;
- }
-
- cc += 1 + LINK_SIZE;
- continue;
- }
-
fast_forward_allowed = FALSE;
if (depth >= 4)
break;
end = bracketend(cc) - (1 + LINK_SIZE);
- if (*end != OP_KET || PRIVATE_DATA(end) != 0)
+ if (*end != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
break;
- if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
- break;
+ count = detect_early_fail(common, cc, private_data_start, depth + 1, count);
- count = detect_early_fail(common, cc, private_data_start, depth + 1, count);
+ if (PRIVATE_DATA(cc) != 0)
+ common->private_data_ptrs[begin - common->start] = 1;
+
if (count < EARLY_FAIL_ENHANCE_MAX)
{
cc = end + (1 + LINK_SIZE);
@@ -1555,6 +1544,8 @@
return EARLY_FAIL_ENHANCE_MAX;
}
+ /* Cannot be part of a repeat. */
+ common->private_data_ptrs[begin - common->start] = 1;
count++;
if (count < EARLY_FAIL_ENHANCE_MAX)
@@ -1620,11 +1611,12 @@
sljit_s32 min, max, i;
/* Detect fixed iterations first. */
-if (end[-(1 + LINK_SIZE)] != OP_KET)
+if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
return FALSE;
-/* Already detected repeat. */
-if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
+/* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
+ * Skip the check of the second part. */
+if (PRIVATE_DATA(end - LINK_SIZE) == 0)
return TRUE;
next = end;
@@ -1763,6 +1755,7 @@
if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
break;
+ /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
{
if (detect_repeat(common, cc))
@@ -1813,6 +1806,7 @@
case OP_COND:
/* Might be a hidden SCOND. */
+ common->private_data_ptrs[cc - common->start] = 0;
alternative = cc + GET(cc, 1);
if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
{
@@ -13661,10 +13655,12 @@
memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
-set_private_data_ptrs(common, &private_data_size, ccend);
+
if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
detect_early_fail(common, common->start, &private_data_size, 0, 0);
+set_private_data_ptrs(common, &private_data_size, ccend);
+
SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
if (private_data_size > SLJIT_MAX_LOCAL_SIZE)