[Pcre-svn] [1225] code/trunk/src: Implement simd support for…

Kezdőlap
Üzenet törlése
Szerző: Subversion repository
Dátum:  
Címzett: pcre-svn
Tárgy: [Pcre-svn] [1225] code/trunk/src: Implement simd support for requested character in JIT.
Revision: 1225
          http://www.exim.org/viewvc/pcre2?view=rev&revision=1225
Author:   zherczeg
Date:     2020-02-24 05:26:15 +0000 (Mon, 24 Feb 2020)
Log Message:
-----------
Implement simd support for requested character in JIT.


Modified Paths:
--------------
    code/trunk/src/pcre2_jit_compile.c
    code/trunk/src/pcre2_jit_simd_inc.h


Modified: code/trunk/src/pcre2_jit_compile.c
===================================================================
--- code/trunk/src/pcre2_jit_compile.c    2020-02-23 16:40:05 UTC (rev 1224)
+++ code/trunk/src/pcre2_jit_compile.c    2020-02-24 05:26:15 UTC (rev 1225)
@@ -6107,22 +6107,22 @@
   OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
 }


-static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
+static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
{
DEFINE_COMPILER;
struct sljit_label *loop;
struct sljit_jump *toolong;
-struct sljit_jump *alreadyfound;
+struct sljit_jump *already_found;
struct sljit_jump *found;
-struct sljit_jump *foundoc = NULL;
-struct sljit_jump *notfound;
+struct sljit_jump *found_oc = NULL;
+jump_list *not_found = NULL;
sljit_u32 oc, bit;

SLJIT_ASSERT(common->req_char_ptr != 0);
-OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
-OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_CU_MAX);
-toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
-alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
+OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
+toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
+already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);

if (has_firstchar)
OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
@@ -6129,10 +6129,6 @@
else
OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);

-loop = LABEL();
-notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
-
-OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
 oc = req_char;
 if (caseless)
   {
@@ -6142,32 +6138,49 @@
     oc = UCD_OTHERCASE(req_char);
 #endif
   }
-if (req_char == oc)
-  found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
+
+#ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
+if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
+  {
+  not_found = fast_requested_char_simd(common, req_char, oc);
+  }
 else
+#endif
   {
-  bit = req_char ^ oc;
-  if (is_powerof2(bit))
-    {
-    OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
-    found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
-    }
+  loop = LABEL();
+  add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
+
+  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
+
+  if (req_char == oc)
+    found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
   else
     {
-    found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
-    foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
+    bit = req_char ^ oc;
+    if (is_powerof2(bit))
+      {
+       OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
+      found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
+      }
+    else
+      {
+      found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
+      found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
+      }
     }
+  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
+  JUMPTO(SLJIT_JUMP, loop);
+
+  JUMPHERE(found);
+  if (found_oc)
+    JUMPHERE(found_oc);
   }
-OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
-JUMPTO(SLJIT_JUMP, loop);


-JUMPHERE(found);
-if (foundoc)
- JUMPHERE(foundoc);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
-JUMPHERE(alreadyfound);
+
+JUMPHERE(already_found);
JUMPHERE(toolong);
-return notfound;
+return not_found;
}

static void do_revertframes(compiler_common *common)
@@ -13135,9 +13148,9 @@
struct sljit_label *quit_label;
struct sljit_jump *jump;
struct sljit_jump *minlength_check_failed = NULL;
-struct sljit_jump *reqbyte_notfound = NULL;
struct sljit_jump *empty_match = NULL;
struct sljit_jump *end_anchor_failed = NULL;
+jump_list *reqcu_not_found = NULL;

SLJIT_ASSERT(tables);

@@ -13403,7 +13416,7 @@
minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
}
if (common->req_char_ptr != 0)
- reqbyte_notfound = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
+ reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);

/* Store the current STR_PTR in OVECTOR(0). */
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
@@ -13538,8 +13551,8 @@
}

/* No more remaining characters. */
-if (reqbyte_notfound != NULL)
- JUMPHERE(reqbyte_notfound);
+if (reqcu_not_found != NULL)
+ set_jumps(reqcu_not_found, LABEL());

if (mode == PCRE2_JIT_PARTIAL_SOFT)
CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);

Modified: code/trunk/src/pcre2_jit_simd_inc.h
===================================================================
--- code/trunk/src/pcre2_jit_simd_inc.h    2020-02-23 16:40:05 UTC (rev 1224)
+++ code/trunk/src/pcre2_jit_simd_inc.h    2020-02-24 05:26:15 UTC (rev 1225)
@@ -344,6 +344,136 @@
 #endif
 }


+#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
+
+static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
+{
+DEFINE_COMPILER;
+struct sljit_label *start;
+struct sljit_jump *quit;
+jump_list *not_found = NULL;
+sse2_compare_type compare_type = sse2_compare_match1;
+sljit_u8 instruction[8];
+sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
+sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
+sljit_s32 data_ind = 0;
+sljit_s32 tmp_ind = 1;
+sljit_s32 cmp1_ind = 2;
+sljit_s32 cmp2_ind = 3;
+sljit_u32 bit = 0;
+int i;
+
+if (char1 != char2)
+  {
+  bit = char1 ^ char2;
+  compare_type = sse2_compare_match1i;
+
+  if (!is_powerof2(bit))
+    {
+    bit = 0;
+    compare_type = sse2_compare_match2;
+    }
+  }
+
+add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
+OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
+OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
+
+/* First part (unaligned start) */
+
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
+
+SLJIT_ASSERT(tmp1_reg_ind < 8);
+
+/* MOVD xmm, r/m32 */
+instruction[0] = 0x66;
+instruction[1] = 0x0f;
+instruction[2] = 0x6e;
+instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_reg_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+if (char1 != char2)
+  {
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
+
+  /* MOVD xmm, r/m32 */
+  instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_reg_ind;
+  sljit_emit_op_custom(compiler, instruction, 4);
+  }
+
+OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
+
+/* PSHUFD xmm1, xmm2/m128, imm8 */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0x70;
+instruction[3] = 0xc0 | (cmp1_ind << 3) | cmp1_ind;
+instruction[4] = 0;
+sljit_emit_op_custom(compiler, instruction, 5);
+
+if (char1 != char2)
+  {
+  /* PSHUFD xmm1, xmm2/m128, imm8 */
+  instruction[3] = 0xc0 | (cmp2_ind << 3) | cmp2_ind;
+  sljit_emit_op_custom(compiler, instruction, 5);
+  }
+
+OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
+
+load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0);
+for (i = 0; i < 4; i++)
+  fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
+
+/* PMOVMSKB reg, xmm */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0xd7;
+instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
+
+quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
+
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+
+/* Second part (aligned) */
+start = LABEL();
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
+
+add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+
+load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0);
+for (i = 0; i < 4; i++)
+  fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
+
+/* PMOVMSKB reg, xmm */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0xd7;
+instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
+
+JUMPHERE(quit);
+
+/* BSF r32, r/m32 */
+instruction[0] = 0x0f;
+instruction[1] = 0xbc;
+instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
+sljit_emit_op_custom(compiler, instruction, 3);
+
+OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0);
+add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
+
+OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
+return not_found;
+}
+
 #ifndef _WIN64


static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)