[Pcre-svn] [437] code/trunk/src: Support offset limit in JIT…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [437] code/trunk/src: Support offset limit in JIT.
Revision: 437
          http://www.exim.org/viewvc/pcre2?view=rev&revision=437
Author:   zherczeg
Date:     2015-11-15 05:05:53 +0000 (Sun, 15 Nov 2015)
Log Message:
-----------
Support offset limit in JIT.


Modified Paths:
--------------
    code/trunk/src/pcre2_jit_compile.c
    code/trunk/src/pcre2_jit_match.c
    code/trunk/src/pcre2_match.c


Modified: code/trunk/src/pcre2_jit_compile.c
===================================================================
--- code/trunk/src/pcre2_jit_compile.c    2015-11-14 17:28:19 UTC (rev 436)
+++ code/trunk/src/pcre2_jit_compile.c    2015-11-15 05:05:53 UTC (rev 437)
@@ -186,6 +186,7 @@
   int (*callout)(pcre2_callout_block *, void *);
   void *callout_data;
   /* Everything else after. */
+  sljit_uw offset_limit;
   sljit_ui limit_match;
   uint32_t oveccount;
   uint32_t options;
@@ -373,8 +374,8 @@
   sljit_si start_used_ptr;
   /* Starting pointer for partial soft matches. */
   sljit_si hit_start;
-  /* End pointer of the first line. */
-  sljit_si first_line_end;
+  /* Pointer of the match end position. */
+  sljit_si match_end_ptr;
   /* Points to the marked string. */
   sljit_si mark_ptr;
   /* Recursive control verb management chain. */
@@ -3298,7 +3299,7 @@


#endif /* SUPPORT_UNICODE */

-static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
+static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, uint32_t overall_options)
{
DEFINE_COMPILER;
struct sljit_label *mainloop;
@@ -3305,7 +3306,7 @@
struct sljit_label *newlinelabel = NULL;
struct sljit_jump *start;
struct sljit_jump *end = NULL;
-struct sljit_jump *nl = NULL;
+struct sljit_jump *end2 = NULL;
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
struct sljit_jump *singlechar;
#endif
@@ -3313,14 +3314,14 @@
BOOL newlinecheck = FALSE;
BOOL readuchar = FALSE;

-if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
-    common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
+if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
+    && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
   newlinecheck = TRUE;


-if (firstline)
+if ((overall_options & PCRE2_FIRSTLINE) != 0)
{
/* Search for the end of the first line. */
- SLJIT_ASSERT(common->first_line_end != 0);
+ SLJIT_ASSERT(common->match_end_ptr != 0);
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);

   if (common->nltype == NLTYPE_FIXED && common->newline > 255)
@@ -3333,7 +3334,7 @@
     CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
     CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
     JUMPHERE(end);
-    OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+    OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
     }
   else
     {
@@ -3340,18 +3341,41 @@
     end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
     mainloop = LABEL();
     /* Continual stores does not cause data dependency. */
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
     read_char_range(common, common->nlmin, common->nlmax, TRUE);
     check_newlinechar(common, common->nltype, &newline, TRUE);
     CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
     JUMPHERE(end);
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
     set_jumps(newline, LABEL());
     }


OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
}
+else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
+ {
+ /* Check whether offset limit is set and valid. */
+ SLJIT_ASSERT(common->match_end_ptr != 0);

+ OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, offset_limit));
+ OP1(SLJIT_MOV, TMP1, 0, STR_END, 0);
+ end = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
+#if PCRE2_CODE_UNIT_WIDTH == 16
+ OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
+#elif PCRE2_CODE_UNIT_WIDTH == 32
+ OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
+#endif
+ OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
+ end2 = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_END, 0);
+ OP1(SLJIT_MOV, TMP1, 0, STR_END, 0);
+ JUMPHERE(end2);
+ JUMPHERE(end);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP1, 0);
+ }
+
start = JUMP(SLJIT_JUMP);

if (newlinecheck)
@@ -3366,7 +3390,7 @@
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
- nl = JUMP(SLJIT_JUMP);
+ end2 = JUMP(SLJIT_JUMP);
}

mainloop = LABEL();
@@ -3411,7 +3435,7 @@
if (newlinecheck)
{
JUMPHERE(end);
- JUMPHERE(nl);
+ JUMPHERE(end2);
}

return mainloop;
@@ -4128,7 +4152,7 @@

#endif

-static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_si offset, BOOL firstline)
+static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_si offset)
{
DEFINE_COMPILER;
struct sljit_label *start;
@@ -4139,16 +4163,16 @@
struct sljit_label *utf_start = NULL;
struct sljit_jump *utf_quit = NULL;
#endif
+BOOL has_match_end = (common->match_end_ptr != 0);

if (offset > 0)
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));

-if (firstline)
+if (has_match_end)
{
- SLJIT_ASSERT(common->first_line_end != 0);
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);

-  OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end, SLJIT_IMM, IN_UCHARS(offset + 1));
+  OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, SLJIT_IMM, IN_UCHARS(offset + 1));
 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
   if (sljit_x86_is_cmov_available())
     {
@@ -4210,16 +4234,16 @@
   else if (sljit_x86_is_cmov_available())
     {
     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
-    sljit_x86_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, firstline ? SLJIT_MEM1(SLJIT_SP) : STR_END, firstline ? common->first_line_end : 0);
+    sljit_x86_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, has_match_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end ? common->match_end_ptr : 0);
     }
   else
     {
     quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
-    OP1(SLJIT_MOV, STR_PTR, 0, firstline ? SLJIT_MEM1(SLJIT_SP) : STR_END, firstline ? common->first_line_end : 0);
+    OP1(SLJIT_MOV, STR_PTR, 0, has_match_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end ? common->match_end_ptr : 0);
     JUMPHERE(quit);
     }


-  if (firstline)
+  if (has_match_end)
     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
   return;
   }
@@ -4282,10 +4306,10 @@


JUMPHERE(quit);

-if (firstline)
+if (has_match_end)
   {
   quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
-  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
+  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
   if (offset > 0)
     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
   JUMPHERE(quit);
@@ -4296,7 +4320,7 @@
   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
 }


-static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
+static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
{
DEFINE_COMPILER;
struct sljit_label *start;
@@ -4403,7 +4427,7 @@
SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2);
/* Works regardless the value is 1 or 2. */
mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]];
- fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset, firstline);
+ fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset);
return TRUE;
}

@@ -4414,10 +4438,9 @@

max -= 1;
SLJIT_ASSERT(max > 0);
-if (firstline)
+if (common->match_end_ptr != 0)
{
- SLJIT_ASSERT(common->first_line_end != 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
@@ -4503,10 +4526,10 @@

JUMPHERE(quit);

-if (firstline)
+if (common->match_end_ptr != 0)
   {
   if (range_right >= 0)
-    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
   if (range_right >= 0)
     {
@@ -4523,7 +4546,7 @@
 #undef MAX_N_CHARS
 #undef MAX_N_BYTES


-static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, PCRE2_UCHAR first_char, BOOL caseless, BOOL firstline)
+static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, PCRE2_UCHAR first_char, BOOL caseless)
{
PCRE2_UCHAR oc;

@@ -4537,10 +4560,10 @@
#endif
}

-fast_forward_first_char2(common, first_char, oc, 0, firstline);
+fast_forward_first_char2(common, first_char, oc, 0);
}

-static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
+static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
{
DEFINE_COMPILER;
struct sljit_label *loop;
@@ -4551,11 +4574,10 @@
struct sljit_jump *notfoundnl;
jump_list *newline = NULL;

-if (firstline)
+if (common->match_end_ptr != 0)
{
- SLJIT_ASSERT(common->first_line_end != 0);
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
- OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
+ OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
}

if (common->nltype == NLTYPE_FIXED && common->newline > 255)
@@ -4586,7 +4608,7 @@
JUMPHERE(firstchar);
JUMPHERE(lastchar);

-  if (firstline)
+  if (common->match_end_ptr != 0)
     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
   return;
   }
@@ -4624,13 +4646,13 @@
 JUMPHERE(lastchar);
 JUMPHERE(firstchar);


-if (firstline)
+if (common->match_end_ptr != 0)
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
}

static BOOL check_class_ranges(compiler_common *common, const sljit_ub *bits, BOOL nclass, BOOL invert, jump_list **backtracks);

-static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_ub *start_bits, BOOL firstline)
+static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_ub *start_bits)
{
DEFINE_COMPILER;
struct sljit_label *start;
@@ -4641,11 +4663,10 @@
struct sljit_jump *jump;
#endif

-if (firstline)
+if (common->match_end_ptr != 0)
{
- SLJIT_ASSERT(common->first_line_end != 0);
OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
- OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
+ OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
}

start = LABEL();
@@ -4703,7 +4724,7 @@
set_jumps(matches, LABEL());
JUMPHERE(quit);

-if (firstline)
+if (common->match_end_ptr != 0)
OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
}

@@ -10941,9 +10962,9 @@
     common->ovector_start += sizeof(sljit_sw);
     }
   }
-if ((re->overall_options & PCRE2_FIRSTLINE) != 0)
+if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
   {
-  common->first_line_end = common->ovector_start;
+  common->match_end_ptr = common->ovector_start;
   common->ovector_start += sizeof(sljit_sw);
   }
 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
@@ -11049,19 +11070,19 @@
 /* Main part of the matching */
 if ((re->overall_options & PCRE2_ANCHORED) == 0)
   {
-  mainloop_label = mainloop_entry(common, (re->flags & PCRE2_HASCRORLF) != 0, (re->overall_options & PCRE2_FIRSTLINE) != 0);
+  mainloop_label = mainloop_entry(common, (re->flags & PCRE2_HASCRORLF) != 0, re->overall_options);
   continue_match_label = LABEL();
   /* Forward search if possible. */
   if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
     {
-    if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common, (re->overall_options & PCRE2_FIRSTLINE) != 0))
+    if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
       ;
     else if ((re->flags & PCRE2_FIRSTSET) != 0)
-      fast_forward_first_char(common, (PCRE2_UCHAR)(re->first_codeunit), (re->flags & PCRE2_FIRSTCASELESS) != 0, (re->overall_options & PCRE2_FIRSTLINE) != 0);
+      fast_forward_first_char(common, (PCRE2_UCHAR)(re->first_codeunit), (re->flags & PCRE2_FIRSTCASELESS) != 0);
     else if ((re->flags & PCRE2_STARTLINE) != 0)
-      fast_forward_newline(common, (re->overall_options & PCRE2_FIRSTLINE) != 0);
+      fast_forward_newline(common);
     else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
-      fast_forward_start_bits(common, re->start_bitmap, (re->overall_options & PCRE2_FIRSTLINE) != 0);
+      fast_forward_start_bits(common, re->start_bitmap);
     }
   }
 else
@@ -11162,10 +11183,9 @@
   }


/* Check we have remaining characters. */
-if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_FIRSTLINE) != 0)
+if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
{
- SLJIT_ASSERT(common->first_line_end != 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
}

 if (common->fast_forward_bc_ptr != NULL)
@@ -11177,16 +11197,15 @@
   {
   if (common->ff_newline_shortcut != NULL)
     {
+    /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
     if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
-      CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
-    /* There cannot be more newlines here. */
+      {
+      CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, common->ff_newline_shortcut);
+      }
     }
   else
     {
-    if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
-      CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);
-    else
-      CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);
+    CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
     }
   }



Modified: code/trunk/src/pcre2_jit_match.c
===================================================================
--- code/trunk/src/pcre2_jit_match.c    2015-11-14 17:28:19 UTC (rev 436)
+++ code/trunk/src/pcre2_jit_match.c    2015-11-15 05:05:53 UTC (rev 437)
@@ -129,10 +129,12 @@
 arguments.startchar_ptr = subject;
 arguments.mark_ptr = NULL;
 arguments.options = options;
+
 if (mcontext != NULL)
   {
   arguments.callout = mcontext->callout;
   arguments.callout_data = mcontext->callout_data;
+  arguments.offset_limit = mcontext->offset_limit;
   arguments.limit_match = (mcontext->match_limit < re->limit_match)?
     mcontext->match_limit : re->limit_match;
   if (mcontext->jit_callback != NULL)
@@ -144,6 +146,7 @@
   {
   arguments.callout = NULL;
   arguments.callout_data = NULL;
+  arguments.offset_limit = PCRE2_UNSET;
   arguments.limit_match = (MATCH_LIMIT < re->limit_match)?
     MATCH_LIMIT : re->limit_match;
   jit_stack = NULL;


Modified: code/trunk/src/pcre2_match.c
===================================================================
--- code/trunk/src/pcre2_match.c    2015-11-14 17:28:19 UTC (rev 436)
+++ code/trunk/src/pcre2_match.c    2015-11-15 05:05:53 UTC (rev 437)
@@ -6605,13 +6605,6 @@
 selected normal or partial matching mode was not compiled). */


#ifdef SUPPORT_JIT
-
-/* +++ TEMPORARY: JIT does not yet support offset_limit. */
-
-if (mcontext->offset_limit == PCRE2_UNSET)
-
-/* +++ */
-
if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0)
{
rc = pcre2_jit_match(code, subject, length, start_offset, options,