[Pcre-svn] [358] code/trunk/src: SSE2 refactor, JIT compiler…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [358] code/trunk/src: SSE2 refactor, JIT compiler update.
Revision: 358
          http://www.exim.org/viewvc/pcre2?view=rev&revision=358
Author:   zherczeg
Date:     2015-08-30 06:30:43 +0100 (Sun, 30 Aug 2015)
Log Message:
-----------
SSE2 refactor, JIT compiler update.


Modified Paths:
--------------
    code/trunk/src/pcre2_jit_compile.c
    code/trunk/src/sljit/sljitLir.h
    code/trunk/src/sljit/sljitNativeX86_common.c


Modified: code/trunk/src/pcre2_jit_compile.c
===================================================================
--- code/trunk/src/pcre2_jit_compile.c    2015-08-29 17:13:09 UTC (rev 357)
+++ code/trunk/src/pcre2_jit_compile.c    2015-08-30 05:30:43 UTC (rev 358)
@@ -4146,12 +4146,20 @@
   {
   SLJIT_ASSERT(common->first_line_end != 0);
   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
-  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);


-  OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
-  quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0);
-  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
-  JUMPHERE(quit);
+  OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end, SLJIT_IMM, IN_UCHARS(offset + 1));
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+  if (sljit_x86_is_cmov_available())
+    {
+    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
+    sljit_x86_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
+    }
+#endif
+    {
+    quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0);
+    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
+    JUMPHERE(quit);
+    }
   }


#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
@@ -4163,49 +4171,55 @@

/* SSE2 accelerated first character search. */

-if (sljit_is_fpu_available())
+if (sljit_x86_is_sse2_available())
{
fast_forward_first_char2_sse2(common, char1, char2);

-  quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
-  if (firstline)
-    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
-  else
-    OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
+  SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
+  if (common->mode == PCRE2_JIT_COMPLETE)
+    {
+    /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
+    SLJIT_ASSERT(common->forced_quit_label == NULL);
+    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
+    add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));


-  if (offset > 0)
-    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
-
 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
-  if (common->utf && offset > 0)
-    {
-    utf_quit = JUMP(SLJIT_JUMP);
+    if (common->utf && offset > 0)
+      {
+      SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);


-    JUMPHERE(quit);
-    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
-    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
+      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
 #if PCRE2_CODE_UNIT_WIDTH == 8
-    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
-    CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
+      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
+      CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
 #elif PCRE2_CODE_UNIT_WIDTH == 16
-    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
-    CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
+      OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
+      CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
 #else
 #error "Unknown code width"
 #endif
-    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-    JUMPHERE(utf_quit);
+      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+      }
+#endif
+
+    if (offset > 0)
+      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
     }
+  else if (sljit_x86_is_cmov_available())
+    {
+    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
+    sljit_x86_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, firstline ? SLJIT_MEM1(SLJIT_SP) : STR_END, firstline ? common->first_line_end : 0);
+    }
   else
-#endif
+    {
+    quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
+    OP1(SLJIT_MOV, STR_PTR, 0, firstline ? SLJIT_MEM1(SLJIT_SP) : STR_END, firstline ? common->first_line_end : 0);
     JUMPHERE(quit);
+    }


-  if (offset > 0)
-    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
-
   if (firstline)
     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
-
   return;
   }



Modified: code/trunk/src/sljit/sljitLir.h
===================================================================
--- code/trunk/src/sljit/sljitLir.h    2015-08-29 17:13:09 UTC (rev 357)
+++ code/trunk/src/sljit/sljitLir.h    2015-08-30 05:30:43 UTC (rev 358)
@@ -869,34 +869,6 @@
     sljit_si src1, sljit_sw src1w,
     sljit_si src2, sljit_sw src2w);


-/* The following function is a helper function for sljit_emit_op_custom.
-   It returns with the real machine register index ( >=0 ) of any SLJIT_R,
-   SLJIT_S and SLJIT_SP registers.
-
-   Note: it returns with -1 for virtual registers (only on x86-32). */
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg);
-
-/* The following function is a helper function for sljit_emit_op_custom.
-   It returns with the real machine register index of any SLJIT_FLOAT register.
-
-   Note: the index is always an even number on ARM (except ARM-64), MIPS, and SPARC. */
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg);
-
-/* Any instruction can be inserted into the instruction stream by
-   sljit_emit_op_custom. It has a similar purpose as inline assembly.
-   The size parameter must match to the instruction size of the target
-   architecture:
-
-         x86: 0 < size <= 15. The instruction argument can be byte aligned.
-      Thumb2: if size == 2, the instruction argument must be 2 byte aligned.
-              if size == 4, the instruction argument must be 4 byte aligned.
-   Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
-    void *instruction, sljit_si size);
-
 /* Returns with non-zero if fpu is available. */


SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void);
@@ -1214,4 +1186,64 @@

#endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */

+/* --------------------------------------------------------------------- */
+/*  CPU specific functions                                               */
+/* --------------------------------------------------------------------- */
+
+/* The following function is a helper function for sljit_emit_op_custom.
+   It returns with the real machine register index ( >=0 ) of any SLJIT_R,
+   SLJIT_S and SLJIT_SP registers.
+
+   Note: it returns with -1 for virtual registers (only on x86-32). */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg);
+
+/* The following function is a helper function for sljit_emit_op_custom.
+   It returns with the real machine register index of any SLJIT_FLOAT register.
+
+   Note: the index is always an even number on ARM (except ARM-64), MIPS, and SPARC. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg);
+
+/* Any instruction can be inserted into the instruction stream by
+   sljit_emit_op_custom. It has a similar purpose as inline assembly.
+   The size parameter must match to the instruction size of the target
+   architecture:
+
+         x86: 0 < size <= 15. The instruction argument can be byte aligned.
+      Thumb2: if size == 2, the instruction argument must be 2 byte aligned.
+              if size == 4, the instruction argument must be 4 byte aligned.
+   Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
+    void *instruction, sljit_si size);
+
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+
+/* Returns with non-zero if sse2 is available. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_sse2_available(void);
+
+/* Returns with non-zero if cmov instruction is available. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_cmov_available(void);
+
+/* Emit a conditional mov instruction on x86 CPUs. This instruction
+   moves src to destination, if the condition is satisfied. Unlike
+   other arithmetic instructions, destination must be a register.
+   Before such instructions are emitted, cmov support should be
+   checked by sljit_x86_is_cmov_available function.
+    type must be between SLJIT_EQUAL and SLJIT_S_ORDERED
+    dst_reg must be a valid register and it can be combined
+      with SLJIT_INT_OP to perform 32 bit arithmetic
+   Flags: I - (never set any flags)
+ */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_emit_cmov(struct sljit_compiler *compiler,
+    sljit_si type,
+    sljit_si dst_reg,
+    sljit_si src, sljit_sw srcw);
+
+#endif
+
 #endif /* _SLJIT_LIR_H_ */


Modified: code/trunk/src/sljit/sljitNativeX86_common.c
===================================================================
--- code/trunk/src/sljit/sljitNativeX86_common.c    2015-08-29 17:13:09 UTC (rev 357)
+++ code/trunk/src/sljit/sljitNativeX86_common.c    2015-08-30 05:30:43 UTC (rev 358)
@@ -2936,3 +2936,69 @@
 {
     *(sljit_sw*)addr = new_constant;
 }
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_sse2_available(void)
+{
+#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+    if (cpu_has_sse2 == -1)
+        get_cpu_features();
+    return cpu_has_sse2;
+#else
+    return 1;
+#endif
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_cmov_available(void)
+{
+    if (cpu_has_cmov == -1)
+        get_cpu_features();
+    return cpu_has_cmov;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_emit_cmov(struct sljit_compiler *compiler,
+    sljit_si type,
+    sljit_si dst_reg,
+    sljit_si src, sljit_sw srcw)
+{
+    sljit_ub* inst;
+
+    CHECK_ERROR();
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+    CHECK_ARGUMENT(sljit_x86_is_cmov_available());
+    CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_INT_OP)));
+    CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_D_ORDERED);
+    CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_INT_OP));
+    FUNCTION_CHECK_SRC(src, srcw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+    if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+        fprintf(compiler->verbose, "  x86_cmov%s %s%s, ",
+            !(dst_reg & SLJIT_INT_OP) ? "" : ".i",
+            JUMP_PREFIX(type), jump_names[type & 0xff]);
+        sljit_verbose_reg(compiler, dst_reg & ~SLJIT_INT_OP);
+        fprintf(compiler->verbose, ", ");
+        sljit_verbose_param(compiler, src, srcw);
+        fprintf(compiler->verbose, "\n");
+    }
+#endif
+
+    ADJUST_LOCAL_OFFSET(src, srcw);
+    CHECK_EXTRA_REGS(src, srcw, (void)0);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+    compiler->mode32 = dst_reg & SLJIT_INT_OP;
+#endif
+    dst_reg &= ~SLJIT_INT_OP;
+
+    if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
+        EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
+        src = TMP_REG1;
+        srcw = 0;
+    }
+
+    inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
+    FAIL_IF(!inst);
+    *inst++ = GROUP_0F;
+    *inst = get_jump_code(type & 0xff) - 0x40;
+    return SLJIT_SUCCESS;
+}