[Pcre-svn] [718] code/trunk/src: Support (*ACCEPT) inside re…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [718] code/trunk/src: Support (*ACCEPT) inside recurse in JIT.
Revision: 718
          http://www.exim.org/viewvc/pcre2?view=rev&revision=718
Author:   zherczeg
Date:     2017-03-30 14:25:20 +0100 (Thu, 30 Mar 2017)
Log Message:
-----------
Support (*ACCEPT) inside recurse in JIT.


Modified Paths:
--------------
    code/trunk/src/pcre2_jit_compile.c
    code/trunk/src/pcre2_jit_test.c


Modified: code/trunk/src/pcre2_jit_compile.c
===================================================================
--- code/trunk/src/pcre2_jit_compile.c    2017-03-29 18:10:55 UTC (rev 717)
+++ code/trunk/src/pcre2_jit_compile.c    2017-03-30 13:25:20 UTC (rev 718)
@@ -1862,11 +1862,14 @@


#undef RECURSE_TMP_REG_COUNT

-static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL *needs_control_head)
+static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
+ BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept)
{
int length = 1;
int size;
PCRE2_SPTR alternative;
+BOOL quit_found = FALSE;
+BOOL accept_found = FALSE;
BOOL setsom_found = FALSE;
BOOL setmark_found = FALSE;
BOOL capture_last_found = FALSE;
@@ -1875,7 +1878,6 @@
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
SLJIT_ASSERT(common->control_head_ptr != 0);
control_head_found = TRUE;
-length++;
#endif

 /* Calculate the sum of the private machine words. */
@@ -1886,30 +1888,17 @@
     {
     case OP_SET_SOM:
     SLJIT_ASSERT(common->has_set_som);
-    if (!setsom_found)
-      {
-      length++;
-      setsom_found = TRUE;
-      }
+    setsom_found = TRUE;
     cc += 1;
     break;


     case OP_RECURSE:
-    if (common->has_set_som && !setsom_found)
-      {
-      length++;
+    if (common->has_set_som)
       setsom_found = TRUE;
-      }
-    if (common->mark_ptr != 0 && !setmark_found)
-      {
-      length++;
+    if (common->mark_ptr != 0)
       setmark_found = TRUE;
-      }
-    if (common->capture_last_ptr != 0 && !capture_last_found)
-      {
-      length++;
+    if (common->capture_last_ptr != 0)
       capture_last_found = TRUE;
-      }
     cc += 1 + LINK_SIZE;
     break;


@@ -1940,11 +1929,8 @@
     case OP_CBRA:
     case OP_SCBRA:
     length += 2;
-    if (common->capture_last_ptr != 0 && !capture_last_found)
-      {
-      length++;
+    if (common->capture_last_ptr != 0)
       capture_last_found = TRUE;
-      }
     if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
       length++;
     cc += 1 + LINK_SIZE + IMM2_SIZE;
@@ -1953,11 +1939,8 @@
     case OP_CBRAPOS:
     case OP_SCBRAPOS:
     length += 2 + 2;
-    if (common->capture_last_ptr != 0 && !capture_last_found)
-      {
-      length++;
+    if (common->capture_last_ptr != 0)
       capture_last_found = TRUE;
-      }
     cc += 1 + LINK_SIZE + IMM2_SIZE;
     break;


@@ -2032,28 +2015,41 @@
     case OP_THEN_ARG:
     SLJIT_ASSERT(common->mark_ptr != 0);
     if (!setmark_found)
-      {
-      length++;
       setmark_found = TRUE;
-      }
-    if (common->control_head_ptr != 0 && !control_head_found)
-      {
-      length++;
+    if (common->control_head_ptr != 0)
       control_head_found = TRUE;
-      }
+    if (*cc != OP_MARK)
+      quit_found = TRUE;
+
     cc += 1 + 2 + cc[1];
     break;


+    case OP_PRUNE:
+    case OP_SKIP:
+    case OP_COMMIT:
+    quit_found = TRUE;
+    cc++;
+    break;
+
+    case OP_SKIP_ARG:
+    quit_found = TRUE;
+    cc += 1 + 2 + cc[1];
+    break;
+
     case OP_THEN:
     SLJIT_ASSERT(common->control_head_ptr != 0);
+    quit_found = TRUE;
     if (!control_head_found)
-      {
-      length++;
       control_head_found = TRUE;
-      }
     cc++;
     break;


+    case OP_ACCEPT:
+    case OP_ASSERT_ACCEPT:
+    accept_found = TRUE;
+    cc++;
+    break;
+
     default:
     cc = next_opcode(common, cc);
     SLJIT_ASSERT(cc != NULL);
@@ -2062,7 +2058,21 @@
   }
 SLJIT_ASSERT(cc == ccend);


+if (control_head_found)
+  length++;
+if (capture_last_found)
+  length++;
+if (quit_found)
+  {
+  if (setsom_found)
+    length++;
+  if (setmark_found)
+    length++;
+  }
+
 *needs_control_head = control_head_found;
+*has_quit = quit_found;
+*has_accept = accept_found;
 return length;
 }


@@ -2070,11 +2080,12 @@
recurse_copy_from_global,
recurse_copy_private_to_global,
recurse_copy_shared_to_global,
+ recurse_copy_kept_shared_to_global,
recurse_swap_global
};

static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
- int type, int stackptr, int stacktop)
+ int type, int stackptr, int stacktop, BOOL has_quit)
{
delayed_mem_copy_status status;
PCRE2_SPTR alternative;
@@ -2102,6 +2113,7 @@

case recurse_copy_private_to_global:
case recurse_copy_shared_to_global:
+ case recurse_copy_kept_shared_to_global:
from_sp = FALSE;
base_reg = STACK_TOP;
break;
@@ -2109,7 +2121,7 @@
default:
SLJIT_ASSERT(type == recurse_swap_global);
from_sp = FALSE;
- base_reg = TMP1;
+ base_reg = TMP2;
break;
}

@@ -2116,13 +2128,13 @@
stackptr = STACK(stackptr);
stacktop = STACK(stacktop);

-status.tmp_regs[0] = TMP2;
-status.saved_tmp_regs[0] = TMP2;
+status.tmp_regs[0] = TMP1;
+status.saved_tmp_regs[0] = TMP1;

-if (base_reg != TMP1)
+if (base_reg != TMP2)
{
- status.tmp_regs[1] = TMP1;
- status.saved_tmp_regs[1] = TMP1;
+ status.tmp_regs[1] = TMP2;
+ status.saved_tmp_regs[1] = TMP2;
}
else
{
@@ -2141,8 +2153,10 @@

delayed_mem_copy_init(&status, common);

-if (type != recurse_copy_shared_to_global)
+if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
   {
+  SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
+
   if (!from_sp)
     delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);


@@ -2175,7 +2189,7 @@
     {
     case OP_SET_SOM:
     SLJIT_ASSERT(common->has_set_som);
-    if (!setsom_found)
+    if (has_quit && !setsom_found)
       {
       kept_shared_srcw[0] = OVECTOR(0);
       kept_shared_count = 1;
@@ -2185,18 +2199,21 @@
     break;


     case OP_RECURSE:
-    if (common->has_set_som && !setsom_found)
+    if (has_quit)
       {
-      kept_shared_srcw[0] = OVECTOR(0);
-      kept_shared_count = 1;
-      setsom_found = TRUE;
+      if (common->has_set_som && !setsom_found)
+        {
+        kept_shared_srcw[0] = OVECTOR(0);
+        kept_shared_count = 1;
+        setsom_found = TRUE;
+        }
+      if (common->mark_ptr != 0 && !setmark_found)
+        {
+        kept_shared_srcw[kept_shared_count] = common->mark_ptr;
+        kept_shared_count++;
+        setmark_found = TRUE;
+        }
       }
-    if (common->mark_ptr != 0 && !setmark_found)
-      {
-      kept_shared_srcw[shared_count] = common->mark_ptr;
-      kept_shared_count++;
-      setmark_found = TRUE;
-      }
     if (common->capture_last_ptr != 0 && !capture_last_found)
       {
       shared_srcw[0] = common->capture_last_ptr;
@@ -2384,7 +2401,7 @@
     case OP_PRUNE_ARG:
     case OP_THEN_ARG:
     SLJIT_ASSERT(common->mark_ptr != 0);
-    if (!setmark_found)
+    if (has_quit && !setmark_found)
       {
       kept_shared_srcw[0] = common->mark_ptr;
       kept_shared_count = 1;
@@ -2416,8 +2433,10 @@
     break;
     }


-  if (type != recurse_copy_shared_to_global)
+  if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
     {
+    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
+
     for (i = 0; i < private_count; i++)
       {
       SLJIT_ASSERT(private_srcw[i] != 0);
@@ -2434,8 +2453,10 @@
   else
     stackptr += sizeof(sljit_sw) * private_count;


-  if (type != recurse_copy_private_to_global)
+  if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
     {
+    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
+
     for (i = 0; i < shared_count; i++)
       {
       SLJIT_ASSERT(shared_srcw[i] != 0);
@@ -2452,8 +2473,10 @@
   else
     stackptr += sizeof(sljit_sw) * shared_count;


-  if (type == recurse_copy_from_global || type == recurse_copy_shared_to_global)
+  if (type != recurse_copy_private_to_global && type != recurse_swap_global)
     {
+    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
+
     for (i = 0; i < kept_shared_count; i++)
       {
       SLJIT_ASSERT(kept_shared_srcw[i] != 0);
@@ -11001,7 +11024,9 @@
 PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
 PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
 BOOL needs_control_head;
-int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head);
+BOOL has_quit;
+BOOL has_accept;
+int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept);
 int alt_count, alt_max, local_size;
 backtrack_common altbacktrack;
 jump_list *match = NULL;
@@ -11008,6 +11033,7 @@
 sljit_uw *next_update_addr = NULL;
 struct sljit_jump *alt1 = NULL;
 struct sljit_jump *alt2 = NULL;
+struct sljit_jump *accept_exit = NULL;
 struct sljit_label *quit;


/* Recurse captures then. */
@@ -11028,13 +11054,14 @@

local_size = (alt_max > 1) ? 2 : 1;

-/* Stack layout: [private data][return address][alternative index] */
+/* (Reversed) stack layout:
+ [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */

allocate_stack(common, private_data_size + local_size);
/* Save return address. */
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);

-copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size);
+copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit);

 /* This variable is saved and restored all time when we enter or exit from a recursive context. */
 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
@@ -11064,14 +11091,10 @@
   if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
     return;


- /* TODO: this needs to be improved in the future. */
- set_jumps(common->accept, LABEL());
- common->accept_label = NULL;
+ allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1);
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);

-  allocate_stack(common, (alt_max > 1) ? 2 : 1);
-  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
-
-  if (alt_max > 1)
+  if (alt_max > 1 || has_accept)
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);


   add_jump(compiler, &match, JUMP(SLJIT_JUMP));
@@ -11083,13 +11106,16 @@
     common->currententry->backtrack_label = LABEL();
     set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);


-    sljit_emit_fast_enter(compiler, TMP2, 0);
+    sljit_emit_fast_enter(compiler, TMP1, 0);


-    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+    if (has_accept)
+      accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_max * sizeof (sljit_sw));
+
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
     /* Save return address. */
-    OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), STACK(local_size - 1), TMP2, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);


-    copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size);
+    copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);


     if (alt_max > 1)
       {
@@ -11113,7 +11139,7 @@
         }
       }
     else
-      free_stack(common, 1);
+      free_stack(common, has_accept ? 2 : 1);
     }
   else if (alt_max > 4)
     add_label_addr(common, next_update_addr++);
@@ -11151,7 +11177,7 @@


quit = LABEL();

-copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size);
+copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit);

OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
free_stack(common, private_data_size + local_size);
@@ -11160,19 +11186,50 @@

if (common->quit != NULL)
{
+ SLJIT_ASSERT(has_quit);
+
set_jumps(common->quit, LABEL());
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
- copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size);
+ copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit);
JUMPTO(SLJIT_JUMP, quit);
}

+if (has_accept)
+ {
+ JUMPHERE(accept_exit);
+ free_stack(common, 2);
+
+ /* Save return address. */
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
+
+ copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit);
+
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
+ free_stack(common, private_data_size + local_size);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
+ sljit_emit_fast_return(compiler, TMP2, 0);
+ }
+
+if (common->accept != NULL)
+ {
+ SLJIT_ASSERT(has_accept);
+
+ set_jumps(common->accept, LABEL());
+
+ OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
+ OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
+
+ allocate_stack(common, 2);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
+ }
+
set_jumps(match, LABEL());

-OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);

-copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size);
+copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);

-OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), STACK(local_size - 1));
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
sljit_emit_fast_return(compiler, TMP2, 0);
}

Modified: code/trunk/src/pcre2_jit_test.c
===================================================================
--- code/trunk/src/pcre2_jit_test.c    2017-03-29 18:10:55 UTC (rev 717)
+++ code/trunk/src/pcre2_jit_test.c    2017-03-30 13:25:20 UTC (rev 718)
@@ -724,6 +724,8 @@
     { MU, A, 0, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
     { MU, A, 0, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
     { MU, A, 0, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
+    { MU, A, 0, 0, "((.)(?:.|\\2(?1))){0}#(?1)#", "#aabbccdde# #aabbccddee#" },
+    { MU, A, 0, 0, "((.)(?:\\2|\\2{4}b)){0}#(?:(?1))+#", "#aaaab# #aaaaab#" },


     /* 16 bit specific tests. */
     { CM, A, 0, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
@@ -842,6 +844,16 @@
     { MU, A, 0, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
     { MU, A, 0, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },


+    /* Recurse and control verbs. */
+    { MU, A, 0, 0, "(a(*ACCEPT)b){0}a(?1)b", "aacaabb" },
+    { MU, A, 0, 0, "((a)\\2(*ACCEPT)b){0}a(?1)b", "aaacaaabb" },
+    { MU, A, 0, 0, "((ab|a(*ACCEPT)x)+|ababababax){0}_(?1)_", "_ababababax_ _ababababa_" },
+    { MU, A, 0, 0, "((.)(?:A(*ACCEPT)|(?1)\\2)){0}_(?1)_", "_bcdaAdcb_bcdaAdcb_" },
+    { MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_", "_ab_" },
+    { MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_|(_aa_)", "_aa_" },
+    { MU, A, 0, 0, "(a(*COMMIT)(?:b|bb)|c(*ACCEPT)d|dd){0}_(?1)+_", "_ax_ _cd_ _abbb_ _abcd_ _abbcdd_" },
+    { MU, A, 0, 0, "((.)(?:.|(*COMMIT)\\2{3}(*ACCEPT).*|.*)){0}_(?1){0,4}_", "_aaaabbbbccccddd_ _aaaabbbbccccdddd_" },
+
     /* Deep recursion. */
     { MU, A, 0, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
     { MU, A, 0, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },