[Pcre-svn] [757] code/trunk/src/pcre2_jit_compile.c: Refacto…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [757] code/trunk/src/pcre2_jit_compile.c: Refactor SSE2 accelerated first character search in JIT.
Revision: 757
          http://www.exim.org/viewvc/pcre2?view=rev&revision=757
Author:   zherczeg
Date:     2017-04-18 06:49:39 +0100 (Tue, 18 Apr 2017)
Log Message:
-----------
Refactor SSE2 accelerated first character search in JIT.


Modified Paths:
--------------
    code/trunk/src/pcre2_jit_compile.c


Modified: code/trunk/src/pcre2_jit_compile.c
===================================================================
--- code/trunk/src/pcre2_jit_compile.c    2017-04-17 15:39:09 UTC (rev 756)
+++ code/trunk/src/pcre2_jit_compile.c    2017-04-18 05:49:39 UTC (rev 757)
@@ -4233,26 +4233,82 @@
 #endif
 }


+static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, PCRE2_UCHAR char1, PCRE2_UCHAR char2,
+  sljit_u32 bit, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
+{
+sljit_u8 instruction[4];
+instruction[0] = 0x66;
+instruction[1] = 0x0f;
+
+if (char1 == char2 || bit != 0)
+  {
+  if (bit != 0)
+    {
+    /* POR xmm1, xmm2/m128 */
+    /* instruction[0] = 0x66; */
+    /* instruction[1] = 0x0f; */
+    instruction[2] = 0xeb;
+    instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind;
+    sljit_emit_op_custom(compiler, instruction, 4);
+    }
+
+  /* PCMPEQB/W/D xmm1, xmm2/m128 */
+  /* instruction[0] = 0x66; */
+  /* instruction[1] = 0x0f; */
+  instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
+  instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
+  sljit_emit_op_custom(compiler, instruction, 4);
+  }
+else
+  {
+  /* MOVDQA xmm1, xmm2/m128 */
+  /* instruction[0] = 0x66; */
+  /* instruction[1] = 0x0f; */
+  instruction[2] = 0x6f;
+  instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind;
+  sljit_emit_op_custom(compiler, instruction, 4);
+
+  /* PCMPEQB/W/D xmm1, xmm2/m128 */
+  /* instruction[0] = 0x66; */
+  /* instruction[1] = 0x0f; */
+  instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
+  instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
+  sljit_emit_op_custom(compiler, instruction, 4);
+
+  instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind;
+  sljit_emit_op_custom(compiler, instruction, 4);
+
+  /* POR xmm1, xmm2/m128 */
+  /* instruction[0] = 0x66; */
+  /* instruction[1] = 0x0f; */
+  instruction[2] = 0xeb;
+  instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind;
+  sljit_emit_op_custom(compiler, instruction, 4);
+  }
+}
+
 static void fast_forward_first_char2_sse2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
 {
 DEFINE_COMPILER;
 struct sljit_label *start;
 struct sljit_jump *quit[3];
-struct sljit_jump *nomatch;
 sljit_u8 instruction[8];
 sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
 sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
 sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
-BOOL load_twice = FALSE;
-PCRE2_UCHAR bit;
+sljit_s32 data_ind = 0;
+sljit_s32 tmp_ind = 1;
+sljit_s32 cmp1_ind = 2;
+sljit_s32 cmp2_ind = 3;
+sljit_u32 bit = 0;


-bit = char1 ^ char2;
-if (!is_powerof2(bit))
-  bit = 0;
+if (char1 != char2)
+  {
+  bit = char1 ^ char2;
+  if (!is_powerof2(bit))
+    bit = 0;
+  }


-if ((char1 != char2) && bit == 0)
- load_twice = TRUE;
-
quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);

/* First part (unaligned start) */
@@ -4265,7 +4321,7 @@
instruction[0] = 0x66;
instruction[1] = 0x0f;
instruction[2] = 0x6e;
-instruction[3] = 0xc0 | (2 << 3) | tmp1_ind;
+instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_ind;
sljit_emit_op_custom(compiler, instruction, 4);

if (char1 != char2)
@@ -4273,15 +4329,17 @@
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));

/* MOVD xmm, r/m32 */
- instruction[3] = 0xc0 | (3 << 3) | tmp1_ind;
+ instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_ind;
sljit_emit_op_custom(compiler, instruction, 4);
}

+OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
+
/* PSHUFD xmm1, xmm2/m128, imm8 */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0x70;
-instruction[3] = 0xc0 | (2 << 3) | 2;
+instruction[3] = 0xc0 | (cmp1_ind << 3) | 2;
instruction[4] = 0;
sljit_emit_op_custom(compiler, instruction, 5);

@@ -4288,53 +4346,26 @@
if (char1 != char2)
{
/* PSHUFD xmm1, xmm2/m128, imm8 */
- instruction[3] = 0xc0 | (3 << 3) | 3;
+ instruction[3] = 0xc0 | (cmp2_ind << 3) | 3;
sljit_emit_op_custom(compiler, instruction, 5);
}

-OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf);
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);

-load_from_mem_sse2(compiler, 0, str_ptr_ind);
-if (load_twice)
- load_from_mem_sse2(compiler, 1, str_ptr_ind);
+load_from_mem_sse2(compiler, data_ind, str_ptr_ind);
+fast_forward_char_pair_sse2_compare(compiler, char1, char2, bit, data_ind, cmp1_ind, cmp2_ind, tmp_ind);

-if (bit != 0)
- {
- /* POR xmm1, xmm2/m128 */
- instruction[2] = 0xeb;
- instruction[3] = 0xc0 | (0 << 3) | 3;
- sljit_emit_op_custom(compiler, instruction, 4);
- }
-
-/* PCMPEQB/W/D xmm1, xmm2/m128 */
-instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
-instruction[3] = 0xc0 | (0 << 3) | 2;
-sljit_emit_op_custom(compiler, instruction, 4);
-
-if (load_twice)
- {
- instruction[3] = 0xc0 | (1 << 3) | 3;
- sljit_emit_op_custom(compiler, instruction, 4);
- }
-
/* PMOVMSKB reg, xmm */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
instruction[2] = 0xd7;
instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
sljit_emit_op_custom(compiler, instruction, 4);

-if (load_twice)
- {
- OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
- instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
- sljit_emit_op_custom(compiler, instruction, 4);
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);

- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
- OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
- }
-
-OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0);
-
/* BSF r32, r/m32 */
instruction[0] = 0x0f;
instruction[1] = 0xbc;
@@ -4342,14 +4373,10 @@
sljit_emit_op_custom(compiler, instruction, 3);
sljit_set_current_flags(compiler, SLJIT_SET_Z);

-nomatch = JUMP(SLJIT_ZERO);
+quit[1] = JUMP(SLJIT_NOT_ZERO);

-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
-quit[1] = JUMP(SLJIT_JUMP);
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);

-JUMPHERE(nomatch);
-
start = LABEL();
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
@@ -4356,46 +4383,16 @@

/* Second part (aligned) */

-instruction[0] = 0x66;
-instruction[1] = 0x0f;
-
-/* MOVDQA xmm1, xmm2/m128 */
load_from_mem_sse2(compiler, 0, str_ptr_ind);
-if (load_twice)
- load_from_mem_sse2(compiler, 1, str_ptr_ind);
+fast_forward_char_pair_sse2_compare(compiler, char1, char2, bit, data_ind, cmp1_ind, cmp2_ind, tmp_ind);

-if (bit != 0)
- {
- /* POR xmm1, xmm2/m128 */
- instruction[2] = 0xeb;
- instruction[3] = 0xc0 | (0 << 3) | 3;
- sljit_emit_op_custom(compiler, instruction, 4);
- }
-
-/* PCMPEQB/W/D xmm1, xmm2/m128 */
-instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
-instruction[3] = 0xc0 | (0 << 3) | 2;
-sljit_emit_op_custom(compiler, instruction, 4);
-
-if (load_twice)
- {
- instruction[3] = 0xc0 | (1 << 3) | 3;
- sljit_emit_op_custom(compiler, instruction, 4);
- }
-
/* PMOVMSKB reg, xmm */
+instruction[0] = 0x66;
+instruction[1] = 0x0f;
instruction[2] = 0xd7;
instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
sljit_emit_op_custom(compiler, instruction, 4);

-if (load_twice)
- {
- instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
- sljit_emit_op_custom(compiler, instruction, 4);
-
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
- }
-
/* BSF r32, r/m32 */
instruction[0] = 0x0f;
instruction[1] = 0xbc;
@@ -4405,12 +4402,12 @@

JUMPTO(SLJIT_ZERO, start);

+JUMPHERE(quit[1]);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);

start = LABEL();
-SET_LABEL(quit[0], start);
-SET_LABEL(quit[1], start);
-SET_LABEL(quit[2], start);
+JUMPHERE(quit[0]);
+JUMPHERE(quit[2]);
}

#ifndef _WIN64
@@ -4428,60 +4425,6 @@
#endif
}

-static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, PCRE2_UCHAR char1, PCRE2_UCHAR char2,
-  sljit_u32 bit, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
-{
-sljit_u8 instruction[4];
-instruction[0] = 0x66;
-instruction[1] = 0x0f;
-
-if (char1 == char2 || bit != 0)
-  {
-  if (bit != 0)
-    {
-    /* POR xmm1, xmm2/m128 */
-    /* instruction[0] = 0x66; */
-    /* instruction[1] = 0x0f; */
-    instruction[2] = 0xeb;
-    instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind;
-    sljit_emit_op_custom(compiler, instruction, 4);
-    }
-
-  /* PCMPEQB/W/D xmm1, xmm2/m128 */
-  /* instruction[0] = 0x66; */
-  /* instruction[1] = 0x0f; */
-  instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
-  instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
-  sljit_emit_op_custom(compiler, instruction, 4);
-  }
-else
-  {
-  /* MOVDQA xmm1, xmm2/m128 */
-  /* instruction[0] = 0x66; */
-  /* instruction[1] = 0x0f; */
-  instruction[2] = 0x6f;
-  instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind;
-  sljit_emit_op_custom(compiler, instruction, 4);
-
-  /* PCMPEQB/W/D xmm1, xmm2/m128 */
-  /* instruction[0] = 0x66; */
-  /* instruction[1] = 0x0f; */
-  instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
-  instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
-  sljit_emit_op_custom(compiler, instruction, 4);
-
-  instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind;
-  sljit_emit_op_custom(compiler, instruction, 4);
-
-  /* POR xmm1, xmm2/m128 */
-  /* instruction[0] = 0x66; */
-  /* instruction[1] = 0x0f; */
-  instruction[2] = 0xeb;
-  instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind;
-  sljit_emit_op_custom(compiler, instruction, 4);
-  }
-}
-
 static void fast_forward_char_pair_sse2(compiler_common *common, sljit_u32 offs1,
   PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_u32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
 {
@@ -4699,8 +4642,8 @@
 sljit_emit_op_custom(compiler, instruction, 4);


/* Ignore matches before the first STR_PTR. */
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
-OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, TMP2, 0);

/* BSF r32, r/m32 */
instruction[0] = 0x0f;
@@ -4711,6 +4654,8 @@

jump[2] = JUMP(SLJIT_NOT_ZERO);

+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+
/* Main loop. */
instruction[0] = 0x66;
instruction[1] = 0x0f;