Revision: 358
http://www.exim.org/viewvc/pcre2?view=rev&revision=358
Author: zherczeg
Date: 2015-08-30 06:30:43 +0100 (Sun, 30 Aug 2015)
Log Message:
-----------
SSE2 refactor, JIT compiler update.
Modified Paths:
--------------
code/trunk/src/pcre2_jit_compile.c
code/trunk/src/sljit/sljitLir.h
code/trunk/src/sljit/sljitNativeX86_common.c
Modified: code/trunk/src/pcre2_jit_compile.c
===================================================================
--- code/trunk/src/pcre2_jit_compile.c 2015-08-29 17:13:09 UTC (rev 357)
+++ code/trunk/src/pcre2_jit_compile.c 2015-08-30 05:30:43 UTC (rev 358)
@@ -4146,12 +4146,20 @@
{
SLJIT_ASSERT(common->first_line_end != 0);
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
- OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
- OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
- quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0);
- OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
- JUMPHERE(quit);
+ OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end, SLJIT_IMM, IN_UCHARS(offset + 1));
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+ if (sljit_x86_is_cmov_available())
+ {
+ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
+ sljit_x86_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
+ }
+#endif
+ {
+ quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0);
+ OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
+ JUMPHERE(quit);
+ }
}
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
@@ -4163,49 +4171,55 @@
/* SSE2 accelerated first character search. */
-if (sljit_is_fpu_available())
+if (sljit_x86_is_sse2_available())
{
fast_forward_first_char2_sse2(common, char1, char2);
- quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
- if (firstline)
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
- else
- OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
+ SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
+ if (common->mode == PCRE2_JIT_COMPLETE)
+ {
+ /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
+ SLJIT_ASSERT(common->forced_quit_label == NULL);
+ OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
+ add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
- if (offset > 0)
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
-
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- if (common->utf && offset > 0)
- {
- utf_quit = JUMP(SLJIT_JUMP);
+ if (common->utf && offset > 0)
+ {
+ SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
- JUMPHERE(quit);
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
#if PCRE2_CODE_UNIT_WIDTH == 8
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
- CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
+ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
+ CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
#elif PCRE2_CODE_UNIT_WIDTH == 16
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
- CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
+ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
+ CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
#else
#error "Unknown code width"
#endif
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- JUMPHERE(utf_quit);
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ }
+#endif
+
+ if (offset > 0)
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
}
+ else if (sljit_x86_is_cmov_available())
+ {
+ OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
+ sljit_x86_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, firstline ? SLJIT_MEM1(SLJIT_SP) : STR_END, firstline ? common->first_line_end : 0);
+ }
else
-#endif
+ {
+ quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
+ OP1(SLJIT_MOV, STR_PTR, 0, firstline ? SLJIT_MEM1(SLJIT_SP) : STR_END, firstline ? common->first_line_end : 0);
JUMPHERE(quit);
+ }
- if (offset > 0)
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
-
if (firstline)
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
-
return;
}
Modified: code/trunk/src/sljit/sljitLir.h
===================================================================
--- code/trunk/src/sljit/sljitLir.h 2015-08-29 17:13:09 UTC (rev 357)
+++ code/trunk/src/sljit/sljitLir.h 2015-08-30 05:30:43 UTC (rev 358)
@@ -869,34 +869,6 @@
sljit_si src1, sljit_sw src1w,
sljit_si src2, sljit_sw src2w);
-/* The following function is a helper function for sljit_emit_op_custom.
- It returns with the real machine register index ( >=0 ) of any SLJIT_R,
- SLJIT_S and SLJIT_SP registers.
-
- Note: it returns with -1 for virtual registers (only on x86-32). */
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg);
-
-/* The following function is a helper function for sljit_emit_op_custom.
- It returns with the real machine register index of any SLJIT_FLOAT register.
-
- Note: the index is always an even number on ARM (except ARM-64), MIPS, and SPARC. */
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg);
-
-/* Any instruction can be inserted into the instruction stream by
- sljit_emit_op_custom. It has a similar purpose as inline assembly.
- The size parameter must match to the instruction size of the target
- architecture:
-
- x86: 0 < size <= 15. The instruction argument can be byte aligned.
- Thumb2: if size == 2, the instruction argument must be 2 byte aligned.
- if size == 4, the instruction argument must be 4 byte aligned.
- Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
- void *instruction, sljit_si size);
-
/* Returns with non-zero if fpu is available. */
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void);
@@ -1214,4 +1186,64 @@
#endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */
+/* --------------------------------------------------------------------- */
+/* CPU specific functions */
+/* --------------------------------------------------------------------- */
+
+/* The following function is a helper function for sljit_emit_op_custom.
+ It returns with the real machine register index ( >=0 ) of any SLJIT_R,
+ SLJIT_S and SLJIT_SP registers.
+
+ Note: it returns with -1 for virtual registers (only on x86-32). */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg);
+
+/* The following function is a helper function for sljit_emit_op_custom.
+ It returns with the real machine register index of any SLJIT_FLOAT register.
+
+ Note: the index is always an even number on ARM (except ARM-64), MIPS, and SPARC. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg);
+
+/* Any instruction can be inserted into the instruction stream by
+ sljit_emit_op_custom. It has a similar purpose as inline assembly.
+ The size parameter must match to the instruction size of the target
+ architecture:
+
+ x86: 0 < size <= 15. The instruction argument can be byte aligned.
+ Thumb2: if size == 2, the instruction argument must be 2 byte aligned.
+ if size == 4, the instruction argument must be 4 byte aligned.
+ Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
+ void *instruction, sljit_si size);
+
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+
+/* Returns with non-zero if sse2 is available. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_sse2_available(void);
+
+/* Returns with non-zero if cmov instruction is available. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_cmov_available(void);
+
+/* Emit a conditional mov instruction on x86 CPUs. This instruction
+ moves src to destination, if the condition is satisfied. Unlike
+ other arithmetic instructions, destination must be a register.
+ Before such instructions are emitted, cmov support should be
+ checked by sljit_x86_is_cmov_available function.
+ type must be between SLJIT_EQUAL and SLJIT_S_ORDERED
+ dst_reg must be a valid register and it can be combined
+ with SLJIT_INT_OP to perform 32 bit arithmetic
+ Flags: I - (never set any flags)
+ */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_emit_cmov(struct sljit_compiler *compiler,
+ sljit_si type,
+ sljit_si dst_reg,
+ sljit_si src, sljit_sw srcw);
+
+#endif
+
#endif /* _SLJIT_LIR_H_ */
Modified: code/trunk/src/sljit/sljitNativeX86_common.c
===================================================================
--- code/trunk/src/sljit/sljitNativeX86_common.c 2015-08-29 17:13:09 UTC (rev 357)
+++ code/trunk/src/sljit/sljitNativeX86_common.c 2015-08-30 05:30:43 UTC (rev 358)
@@ -2936,3 +2936,69 @@
{
*(sljit_sw*)addr = new_constant;
}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_sse2_available(void)
+{
+#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+ if (cpu_has_sse2 == -1)
+ get_cpu_features();
+ return cpu_has_sse2;
+#else
+ return 1;
+#endif
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_cmov_available(void)
+{
+ if (cpu_has_cmov == -1)
+ get_cpu_features();
+ return cpu_has_cmov;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_emit_cmov(struct sljit_compiler *compiler,
+ sljit_si type,
+ sljit_si dst_reg,
+ sljit_si src, sljit_sw srcw)
+{
+ sljit_ub* inst;
+
+ CHECK_ERROR();
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_x86_is_cmov_available());
+ CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_INT_OP)));
+ CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_D_ORDERED);
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_INT_OP));
+ FUNCTION_CHECK_SRC(src, srcw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ fprintf(compiler->verbose, " x86_cmov%s %s%s, ",
+ !(dst_reg & SLJIT_INT_OP) ? "" : ".i",
+ JUMP_PREFIX(type), jump_names[type & 0xff]);
+ sljit_verbose_reg(compiler, dst_reg & ~SLJIT_INT_OP);
+ fprintf(compiler->verbose, ", ");
+ sljit_verbose_param(compiler, src, srcw);
+ fprintf(compiler->verbose, "\n");
+ }
+#endif
+
+ ADJUST_LOCAL_OFFSET(src, srcw);
+ CHECK_EXTRA_REGS(src, srcw, (void)0);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = dst_reg & SLJIT_INT_OP;
+#endif
+ dst_reg &= ~SLJIT_INT_OP;
+
+ if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
+ src = TMP_REG1;
+ srcw = 0;
+ }
+
+ inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
+ FAIL_IF(!inst);
+ *inst++ = GROUP_0F;
+ *inst = get_jump_code(type & 0xff) - 0x40;
+ return SLJIT_SUCCESS;
+}