[Pcre-svn] [351] code/trunk/src: Continue working on SSE2 su…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [351] code/trunk/src: Continue working on SSE2 support.
Revision: 351
          http://www.exim.org/viewvc/pcre2?view=rev&revision=351
Author:   zherczeg
Date:     2015-08-23 10:47:08 +0100 (Sun, 23 Aug 2015)
Log Message:
-----------
Continue working on SSE2 support.


Modified Paths:
--------------
    code/trunk/src/pcre2_jit_compile.c
    code/trunk/src/pcre2_jit_test.c


Modified: code/trunk/src/pcre2_jit_compile.c
===================================================================
--- code/trunk/src/pcre2_jit_compile.c    2015-08-23 01:54:04 UTC (rev 350)
+++ code/trunk/src/pcre2_jit_compile.c    2015-08-23 09:47:08 UTC (rev 351)
@@ -3453,9 +3453,10 @@
 static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *chars, int max_chars, uint32_t *rec_count)
 {
 /* Recursive function, which scans prefix literals. */
-BOOL last, any, caseless;
+BOOL last, any, class, caseless;
 int len, repeat, len_save, consumed = 0;
 sljit_ui chr; /* Any unicode character. */
+sljit_ub *bytes, *bytes_end, byte;
 PCRE2_SPTR alternative, cc_save, oc;
 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
 PCRE2_UCHAR othercase[8];
@@ -3474,6 +3475,7 @@


last = TRUE;
any = FALSE;
+ class = FALSE;
caseless = FALSE;

   switch (*cc)
@@ -3575,8 +3577,7 @@
 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
     if (common->utf && !is_char7_bitset((const sljit_ub *)(cc + 1), FALSE)) return consumed;
 #endif
-    any = TRUE;
-    cc += 1 + 32 / sizeof(PCRE2_UCHAR);
+    class = TRUE;
     break;


     case OP_NCLASS:
@@ -3583,8 +3584,7 @@
 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
     if (common->utf) return consumed;
 #endif
-    any = TRUE;
-    cc += 1 + 32 / sizeof(PCRE2_UCHAR);
+    class = TRUE;
     break;


 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
@@ -3687,6 +3687,102 @@
     continue;
     }


+  if (class)
+    {
+    bytes = (sljit_ub*) (cc + 1);
+    cc += 1 + 32 / sizeof(PCRE2_UCHAR);
+
+    switch (*cc)
+      {
+      case OP_CRSTAR:
+      case OP_CRMINSTAR:
+      case OP_CRPOSSTAR:
+      case OP_CRQUERY:
+      case OP_CRMINQUERY:
+      case OP_CRPOSQUERY:
+      max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
+      if (max_chars == 0)
+        return consumed;
+      break;
+
+      default:
+      case OP_CRPLUS:
+      case OP_CRMINPLUS:
+      case OP_CRPOSPLUS:
+      break;
+
+      case OP_CRRANGE:
+      case OP_CRMINRANGE:
+      case OP_CRPOSRANGE:
+      repeat = GET2(cc, 1);
+      if (repeat <= 0)
+        return consumed;
+      break;
+      }
+
+    do
+      {
+      if (bytes[31] & 0x80)
+        chars[0] = 255;
+      else if (chars[0] != 255)
+        {
+        bytes_end = bytes + 32;
+        chr = 0;
+        do
+          {
+          byte = *bytes++;
+          SLJIT_ASSERT((chr & 0x7) == 0);
+          if (byte == 0)
+            chr += 8;
+          else
+            {
+            do
+              {
+              if ((byte & 0x1) != 0)
+                add_prefix_char(chr, chars);
+              byte >>= 1;
+              chr++;
+              }
+            while (byte != 0);
+            chr = (chr + 7) & ~7;
+            }
+          }
+        while (chars[0] != 255 && bytes < bytes_end);
+        }
+
+      consumed++;
+      if (--max_chars == 0)
+        return consumed;
+      chars += MAX_DIFF_CHARS;
+      }
+    while (--repeat > 0);
+
+    switch (*cc)
+      {
+      case OP_CRSTAR:
+      case OP_CRMINSTAR:
+      case OP_CRPOSSTAR:
+      return consumed;
+
+      case OP_CRQUERY:
+      case OP_CRMINQUERY:
+      case OP_CRPOSQUERY:
+      cc++;
+      break;
+
+      case OP_CRRANGE:
+      case OP_CRMINRANGE:
+      case OP_CRPOSRANGE:
+      if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
+        return consumed;
+      cc += 1 + 2 * IMM2_SIZE;
+      break;
+      }
+
+    repeat = 1;
+    continue;
+    }
+
   len = 1;
 #ifdef SUPPORT_UNICODE
   if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
@@ -3769,12 +3865,11 @@
 #endif
 }


-static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, jump_list **found)
+static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
{
DEFINE_COMPILER;
struct sljit_label *start;
-struct sljit_jump *quit;
-struct sljit_jump *aligned;
+struct sljit_jump *quit[3];
struct sljit_jump *nomatch;
sljit_ub instruction[8];
sljit_si tmp1_ind = sljit_get_register_index(TMP1);
@@ -3790,9 +3885,10 @@
if ((char1 != char2) && bit == 0)
load_twice = TRUE;

-OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, 32 /* bytes */);
-quit = CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0);
+quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);

+/* First part (unaligned start) */
+
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));

SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
@@ -3827,9 +3923,7 @@
sljit_emit_op_custom(compiler, instruction, 5);
}

-OP2(SLJIT_AND | SLJIT_SET_E, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf);
-aligned = JUMP(SLJIT_ZERO);
-
+OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf);
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);

/* MOVDQA xmm1, xmm2/m128 */
@@ -3913,30 +4007,26 @@

OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0);

-nomatch = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
-
/* BSF r32, r/m32 */
-
instruction[0] = 0x0f;
instruction[1] = 0xbc;
instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
sljit_emit_op_custom(compiler, instruction, 3);

+nomatch = JUMP(SLJIT_ZERO);
+
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
-add_jump(compiler, found, JUMP(SLJIT_JUMP));
+quit[1] = JUMP(SLJIT_JUMP);

JUMPHERE(nomatch);
+
+start = LABEL();
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
+quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);

-JUMPHERE(aligned);
+/* Second part (aligned) */

-/* SSE2 part */
-
-OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, 16);
-
-start = LABEL();
-
instruction[0] = 0x66;
instruction[1] = 0x0f;

@@ -4017,26 +4107,20 @@
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
}

-nomatch = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
-
/* BSF r32, r/m32 */
-
instruction[0] = 0x0f;
instruction[1] = 0xbc;
instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
sljit_emit_op_custom(compiler, instruction, 3);

-OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, 16);
+JUMPTO(SLJIT_ZERO, start);
+
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
-add_jump(compiler, found, JUMP(SLJIT_JUMP));

-JUMPHERE(nomatch);
-
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
-CMPTO(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0, start);
-
-OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, 16);
-JUMPHERE(quit);
+start = LABEL();
+SET_LABEL(quit[0], start);
+SET_LABEL(quit[1], start);
+SET_LABEL(quit[2], start);
}

#undef SSE2_COMPARE_TYPE_INDEX
@@ -4054,9 +4138,6 @@
struct sljit_label *utf_start = NULL;
struct sljit_jump *utf_quit = NULL;
#endif
-#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
-jump_list *found_list = NULL;
-#endif

if (offset > 0)
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
@@ -4066,6 +4147,11 @@
SLJIT_ASSERT(common->first_line_end != 0);
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
+
+ OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
+ quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0);
+ OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
+ JUMPHERE(quit);
}

#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
@@ -4078,10 +4164,53 @@
/* SSE2 accelerated first character search. */

if (sljit_is_fpu_available())
- fast_forward_first_char2_sse2(common, char1, char2, &found_list);
+ {
+ fast_forward_first_char2_sse2(common, char1, char2);

+  quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
+  if (firstline)
+    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
+  else
+    OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
+
+  if (offset > 0)
+    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
+
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+  if (common->utf && offset > 0)
+    {
+    utf_quit = JUMP(SLJIT_JUMP);
+
+    JUMPHERE(quit);
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
+    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+#if PCRE2_CODE_UNIT_WIDTH == 8
+    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
+    CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
+    CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
+#else
+#error "Unknown code width"
 #endif
+    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+    JUMPHERE(utf_quit);
+    }
+  else
+#endif
+    JUMPHERE(quit);


+  if (offset > 0)
+    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
+
+  if (firstline)
+    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
+
+  return;
+  }
+
+#endif
+
 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);


start = LABEL();
@@ -4117,10 +4246,6 @@

JUMPHERE(found);

-#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
-set_jumps(found_list, LABEL());
-#endif
-
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf && offset > 0)
{
@@ -4142,11 +4267,18 @@

JUMPHERE(quit);

+if (firstline)
+  {
+  quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
+  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
+  if (offset > 0)
+    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
+  JUMPHERE(quit);
+  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
+  }
+
 if (offset > 0)
   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
-
-if (firstline)
-  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
 }


static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
@@ -4173,7 +4305,7 @@
rec_count = 10000;
max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);

-if (max <= 1)
+if (max < 1)
return FALSE;

 in_range = FALSE;
@@ -4253,6 +4385,8 @@
   {
   if (offset < 0)
     return FALSE;
+  SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2);
+  /* Works regardless the value is 1 or 2. */
   mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]];
   fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset, firstline);
   return TRUE;


Modified: code/trunk/src/pcre2_jit_test.c
===================================================================
--- code/trunk/src/pcre2_jit_test.c    2015-08-23 01:54:04 UTC (rev 350)
+++ code/trunk/src/pcre2_jit_test.c    2015-08-23 09:47:08 UTC (rev 351)
@@ -332,6 +332,9 @@
     { MU, A, 0, 0, "(?:abc|((?:amc|\\b\\w*xy)))", "aaaaa,abxyz" },
     { MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.abcd#."},
     { MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.mbcd#."},
+    { MU, A, 0, 0, ".[ab]*.", "xx" },
+    { MU, A, 0, 0, ".[ab]*a", "xxa" },
+    { MU, A, 0, 0, ".[ab]?.", "xx" },


     /* Bracket repeats with limit. */
     { MU, A, 0, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
@@ -688,6 +691,7 @@
     { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 1, ".", "\r\n" },
     { PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_LF, 0, 0 | F_NOMATCH, "ab.", "ab" },
     { MU | PCRE2_FIRSTLINE, A, 0, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
+    { PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_ANY, 0, 0, "....a", "012\n0a" },


     /* Recurse. */
     { MU, A, 0, 0, "(a)(?1)", "aa" },