[Pcre-svn] [1203] code/trunk/sljit: JIT compiler update.

Kezdőlap
Üzenet törlése
Szerző: Subversion repository
Dátum:  
Címzett: pcre-svn
Tárgy: [Pcre-svn] [1203] code/trunk/sljit: JIT compiler update.
Revision: 1203
          http://vcs.pcre.org/viewvc?view=rev&revision=1203
Author:   zherczeg
Date:     2012-11-05 12:30:13 +0000 (Mon, 05 Nov 2012)


Log Message:
-----------
JIT compiler update.

Modified Paths:
--------------
    code/trunk/sljit/sljitLir.c
    code/trunk/sljit/sljitLir.h
    code/trunk/sljit/sljitNativeARM_Thumb2.c
    code/trunk/sljit/sljitNativeARM_v5.c
    code/trunk/sljit/sljitNativeMIPS_common.c
    code/trunk/sljit/sljitNativePPC_common.c
    code/trunk/sljit/sljitNativeSPARC_common.c
    code/trunk/sljit/sljitNativeX86_common.c


Modified: code/trunk/sljit/sljitLir.c
===================================================================
--- code/trunk/sljit/sljitLir.c    2012-11-04 16:13:29 UTC (rev 1202)
+++ code/trunk/sljit/sljitLir.c    2012-11-05 12:30:13 UTC (rev 1203)
@@ -1200,15 +1200,17 @@
     SLJIT_UNUSED_ARG(type);


     SLJIT_ASSERT(type >= SLJIT_C_EQUAL && type < SLJIT_JUMP);
-    SLJIT_ASSERT(op == SLJIT_MOV || GET_OPCODE(op) == SLJIT_OR);
-    SLJIT_ASSERT(GET_ALL_FLAGS(op) == 0 || GET_ALL_FLAGS(op) == SLJIT_SET_E || GET_ALL_FLAGS(op) == SLJIT_KEEP_FLAGS);
+    SLJIT_ASSERT(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI
+        || GET_OPCODE(op) == SLJIT_OR || GET_OPCODE(op) == SLJIT_AND);
+    SLJIT_ASSERT((op & (SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C)) == 0);
+    SLJIT_ASSERT((op & (SLJIT_SET_E | SLJIT_KEEP_FLAGS)) != (SLJIT_SET_E | SLJIT_KEEP_FLAGS));
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
     FUNCTION_CHECK_DST(dst, dstw);
 #endif
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose)) {
-        fprintf(compiler->verbose, "  cond_set%s%s <%s> ", !(op & SLJIT_SET_E) ? "" : "E",
-            !(op & SLJIT_KEEP_FLAGS) ? "" : "K", op_names[GET_OPCODE(op)]);
+        fprintf(compiler->verbose, "  %scond_value%s%s <%s> ", !(op & SLJIT_INT_OP) ? "" : "i",
+            !(op & SLJIT_SET_E) ? "" : "E", !(op & SLJIT_KEEP_FLAGS) ? "" : "K", op_names[GET_OPCODE(op)]);
         sljit_verbose_param(dst, dstw);
         fprintf(compiler->verbose, ", <%s>\n", jump_names[type]);
     }


Modified: code/trunk/sljit/sljitLir.h
===================================================================
--- code/trunk/sljit/sljitLir.h    2012-11-04 16:13:29 UTC (rev 1202)
+++ code/trunk/sljit/sljitLir.h    2012-11-05 12:30:13 UTC (rev 1203)
@@ -834,14 +834,18 @@
    Flags: destroy all flags for calls. */
 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw);


-/* If op == SLJIT_MOV:
+/* If op == SLJIT_MOV, SLJIT_MOV_SI, SLJIT_MOV_UI:
      Set dst to 1 if condition is fulfilled, 0 otherwise
        type must be between SLJIT_C_EQUAL and SLJIT_C_FLOAT_ORDERED
      Flags: - (never set any flags)
    If op == SLJIT_OR
      Dst is used as src as well, and set its lowest bit to 1 if
-     the condition is fulfilled. Otherwise it does nothing.
-     Flags: E | K
+     the condition is fulfilled. Other bits are unaffected
+     Flags: I | E | K
+   If op == SLJIT_AND
+     Dst is used as src as well, and set its lowest bit to 0 if
+     the condition is not fulfilled. Resets all other bits.
+     Flags: I | E | K
    Note: sljit_emit_cond_value does nothing, if dst is SLJIT_UNUSED (regardless of op). */
 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_cond_value(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si type);



Modified: code/trunk/sljit/sljitNativeARM_Thumb2.c
===================================================================
--- code/trunk/sljit/sljitNativeARM_Thumb2.c    2012-11-04 16:13:29 UTC (rev 1202)
+++ code/trunk/sljit/sljitNativeARM_Thumb2.c    2012-11-05 12:30:13 UTC (rev 1203)
@@ -1892,7 +1892,7 @@


 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_cond_value(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si type)
 {
-    sljit_si dst_r;
+    sljit_si dst_r, flags = GET_ALL_FLAGS(op);
     sljit_uw cc;


     CHECK_ERROR();
@@ -1902,37 +1902,42 @@
     if (dst == SLJIT_UNUSED)
         return SLJIT_SUCCESS;


+    op = GET_OPCODE(op);
     cc = get_cc(type);
-    if (GET_OPCODE(op) == SLJIT_OR && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
+    if ((op == SLJIT_AND || op == SLJIT_OR) && dst <= SLJIT_NO_REGISTERS) {
         FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
-        FAIL_IF(push_inst32(compiler, ORRI | RN4(dst) | RD4(dst) | 0x1));
-        if (op & SLJIT_SET_E) {
+        FAIL_IF(push_inst32(compiler, ((op == SLJIT_AND) ? ANDI : ORRI) | RN4(dst) | RD4(dst) | 0x1));
+        if (flags & SLJIT_SET_E) {
+            /* The condition must always be set, even if the AND/ORR is not executed above. */
             if (reg_map[dst] <= 7)
-                return push_inst16(compiler, ORRS | RD3(dst) | RN3(dst));
-            return push_inst32(compiler, ORR_W | SET_FLAGS | RD4(TMP_REG1) | RN4(dst) | RM4(dst));
+                return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst));
+            return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst));
         }
         return SLJIT_SUCCESS;
     }


+    FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
     dst_r = TMP_REG2;
-    if (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS && reg_map[dst] <= 7)
+    if (op < SLJIT_ADD && dst <= SLJIT_NO_REGISTERS) {
+        if (reg_map[dst] > 7) {
+            FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst) | 1));
+            return push_inst32(compiler, MOV_WI | RD4(dst) | 0);
+        }
         dst_r = dst;
+    }


-    FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
-    FAIL_IF(push_inst16(compiler, MOVSI | 0x1 | RDN3(dst_r)));
-    FAIL_IF(push_inst16(compiler, MOVSI | 0x0 | RDN3(dst_r)));
+    FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 0x1));
+    FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 0x0));


     if (dst_r == TMP_REG2) {
-        if (GET_OPCODE(op) == SLJIT_OR) {
+        if (op == SLJIT_AND || op == SLJIT_OR) {
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
             compiler->skip_checks = 1;
 #endif
-            return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG2, 0);
+            return sljit_emit_op2(compiler, op | flags, dst, dstw, dst, dstw, TMP_REG2, 0);
         }
-        if (dst & SLJIT_MEM)
-            return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw);
-        else
-            return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG2));
+        SLJIT_ASSERT(dst & SLJIT_MEM);
+        return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw);
     }


     return SLJIT_SUCCESS;


Modified: code/trunk/sljit/sljitNativeARM_v5.c
===================================================================
--- code/trunk/sljit/sljitNativeARM_v5.c    2012-11-04 16:13:29 UTC (rev 1202)
+++ code/trunk/sljit/sljitNativeARM_v5.c    2012-11-05 12:30:13 UTC (rev 1203)
@@ -2418,30 +2418,28 @@
         return SLJIT_SUCCESS;


     cc = get_cc(type);
-    if (GET_OPCODE(op) == SLJIT_OR) {
-        if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
-            EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ORR_DP, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc);
-            if (op & SLJIT_SET_E)
-                return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst)));
-            return SLJIT_SUCCESS;
-        }
+    if (GET_OPCODE(op) < SLJIT_ADD) {
+        reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG2;


-        EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, SRC2_IMM | 0));
-        EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc);
-#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
-        compiler->skip_checks = 1;
-#endif
-        return emit_op(compiler, op, ALLOW_IMM, dst, dstw, TMP_REG1, 0, dst, dstw);
+        EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, SRC2_IMM | 0));
+        EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc);
+
+        return (reg == TMP_REG2) ? emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, TMP_REG2, 0) : SLJIT_SUCCESS;
     }


-    reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG2;
+    if (dst <= SLJIT_NO_REGISTERS) {
+        EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(GET_OPCODE(op) == SLJIT_AND ? AND_DP : ORR_DP,
+            0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc);
+        /* The condition must always be set, even if the AND/ORR is not executed above. */
+        return (op & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst))) : SLJIT_SUCCESS;
+    }


-    EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, SRC2_IMM | 0));
-    EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc);
-
-    if (reg == TMP_REG2)
-        return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
-    return SLJIT_SUCCESS;
+    EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, SRC2_IMM | 0));
+    EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc);
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    compiler->skip_checks = 1;
+#endif
+    return emit_op(compiler, op, ALLOW_IMM, dst, dstw, TMP_REG1, 0, dst, dstw);
 }


SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)

Modified: code/trunk/sljit/sljitNativeMIPS_common.c
===================================================================
--- code/trunk/sljit/sljitNativeMIPS_common.c    2012-11-04 16:13:29 UTC (rev 1202)
+++ code/trunk/sljit/sljitNativeMIPS_common.c    2012-11-05 12:30:13 UTC (rev 1203)
@@ -1748,6 +1748,7 @@
 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_cond_value(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si type)
 {
     sljit_si sugg_dst_ar, dst_ar;
+    sljit_si flags = GET_ALL_FLAGS(op);


     CHECK_ERROR();
     check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
@@ -1756,7 +1757,8 @@
     if (dst == SLJIT_UNUSED)
         return SLJIT_SUCCESS;


-    sugg_dst_ar = DR((op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG2);
+    op = GET_OPCODE(op);
+    sugg_dst_ar = DR((op < SLJIT_ADD && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG2);


     switch (type) {
     case SLJIT_C_EQUAL:
@@ -1818,10 +1820,10 @@
         dst_ar = sugg_dst_ar;
     }


-    if (GET_OPCODE(op) == SLJIT_OR) {
+    if (op == SLJIT_AND || op == SLJIT_OR) {
         if (DR(TMP_REG2) != dst_ar)
             FAIL_IF(push_inst(compiler, ADDU_W | SA(dst_ar) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
-        return emit_op(compiler, op, CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, dst, dstw, TMP_REG2, 0);
+        return emit_op(compiler, op | flags, CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, dst, dstw, TMP_REG2, 0);
     }


     if (dst & SLJIT_MEM)


Modified: code/trunk/sljit/sljitNativePPC_common.c
===================================================================
--- code/trunk/sljit/sljitNativePPC_common.c    2012-11-04 16:13:29 UTC (rev 1202)
+++ code/trunk/sljit/sljitNativePPC_common.c    2012-11-05 12:30:13 UTC (rev 1203)
@@ -1850,7 +1850,7 @@


 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_cond_value(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si type)
 {
-    sljit_si reg;
+    sljit_si reg, flags = GET_ALL_FLAGS(op);


     CHECK_ERROR();
     check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
@@ -1859,7 +1859,8 @@
     if (dst == SLJIT_UNUSED)
         return SLJIT_SUCCESS;


-    reg = (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG2;
+    op = GET_OPCODE(op);
+    reg = (op < SLJIT_ADD && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG2;


     switch (type) {
     case SLJIT_C_EQUAL:
@@ -1945,10 +1946,25 @@
         break;
     }


-    if (GET_OPCODE(op) == SLJIT_OR)
-        return emit_op(compiler, SLJIT_OR, GET_FLAGS(op) ? ALT_SET_FLAGS : 0, dst, dstw, dst, dstw, TMP_REG2, 0);
+    if (op < SLJIT_ADD) {
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+        if (op == SLJIT_MOV)
+            flags = WORD_DATA;
+        else {
+            op = SLJIT_MOV_UI;
+            flags = INT_DATA;
+        }
+#else
+        op = SLJIT_MOV;
+        flags = WORD_DATA;
+#endif
+        return (reg == TMP_REG2) ? emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0) : SLJIT_SUCCESS;
+    }


-    return (reg == TMP_REG2) ? emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0) : SLJIT_SUCCESS;
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    compiler->skip_checks = 1;
+#endif
+    return sljit_emit_op2(compiler, op | flags, dst, dstw, dst, dstw, TMP_REG2, 0);
 }


SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)

Modified: code/trunk/sljit/sljitNativeSPARC_common.c
===================================================================
--- code/trunk/sljit/sljitNativeSPARC_common.c    2012-11-04 16:13:29 UTC (rev 1202)
+++ code/trunk/sljit/sljitNativeSPARC_common.c    2012-11-05 12:30:13 UTC (rev 1203)
@@ -1272,7 +1272,7 @@


 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_cond_value(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si type)
 {
-    sljit_si reg;
+    sljit_si reg, flags = (GET_FLAGS(op) ? SET_FLAGS : 0);


     CHECK_ERROR();
     check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
@@ -1282,7 +1282,8 @@
         return SLJIT_SUCCESS;


 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
-    reg = (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG2;
+    op = GET_OPCODE(op);
+    reg = (op < SLJIT_ADD && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG2;


     if (type < SLJIT_C_FLOAT_EQUAL)
         FAIL_IF(push_inst(compiler, BICC | get_cc(type) | 3, UNMOVABLE_INS));
@@ -1292,8 +1293,8 @@
     FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(1), UNMOVABLE_INS));
     FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(0), UNMOVABLE_INS));


-    if (GET_OPCODE(op) == SLJIT_OR)
-        return emit_op(compiler, SLJIT_OR, (GET_FLAGS(op) ? SET_FLAGS : 0) | CUMULATIVE_OP | IMM_OP, dst, dstw, dst, dstw, TMP_REG2, 0);
+    if (op == SLJIT_AND || op == SLJIT_OR)
+        return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, dst, dstw, TMP_REG2, 0);


     return (reg == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS;
 #else


Modified: code/trunk/sljit/sljitNativeX86_common.c
===================================================================
--- code/trunk/sljit/sljitNativeX86_common.c    2012-11-04 16:13:29 UTC (rev 1202)
+++ code/trunk/sljit/sljitNativeX86_common.c    2012-11-05 12:30:13 UTC (rev 1203)
@@ -263,9 +263,9 @@
    built-in CPU features. Therefore they can be overwritten by different threads
    if they detect the CPU features in the same time. */
 #if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
-static sljit_ui cpu_has_sse2 = -1;
+static sljit_si cpu_has_sse2 = -1;
 #endif
-static sljit_ui cpu_has_cmov = -1;
+static sljit_si cpu_has_cmov = -1;


 #if defined(_MSC_VER) && (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 #include <intrin.h>
@@ -2604,10 +2604,12 @@
 {
     sljit_ub *inst;
     sljit_ub cond_set = 0;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+    sljit_si reg;
+#else
+    /* CHECK_EXTRA_REGS migh overwrite these values. */
     sljit_si dst_save = dst;
     sljit_sw dstw_save = dstw;
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-    sljit_si reg;
 #endif


     CHECK_ERROR();
@@ -2703,21 +2705,21 @@
     *inst++ = MOVZX_r_rm8;
     *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];


-    if (reg == TMP_REGISTER) {
-        if (op == SLJIT_MOV) {
-            compiler->mode32 = 0;
-            EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
-        }
-        else {
+    if (reg != TMP_REGISTER)
+        return SLJIT_SUCCESS;
+
+    if (GET_OPCODE(op) < SLJIT_ADD) {
+        compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
+        return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0);
+    }
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
-            compiler->skip_checks = 1;
+    compiler->skip_checks = 1;
 #endif
-            return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REGISTER, 0);
-        }
-    }
-#else
-    if (op == SLJIT_MOV) {
-        if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
+    return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
+#else /* SLJIT_CONFIG_X86_64 */
+    if (GET_OPCODE(op) < SLJIT_ADD && dst <= SLJIT_NO_REGISTERS) {
+        if (reg_map[dst] <= 4) {
+            /* Low byte is accessible. */
             inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
             FAIL_IF(!inst);
             INC_SIZE(3 + 3);
@@ -2729,66 +2731,68 @@
             *inst++ = GROUP_0F;
             *inst++ = MOVZX_r_rm8;
             *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
+            return SLJIT_SUCCESS;
         }
-        else {
-            EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);


-            inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
-            FAIL_IF(!inst);
-            INC_SIZE(3 + 3);
-            /* Set al to conditional flag. */
-            *inst++ = GROUP_0F;
-            *inst++ = cond_set;
-            *inst++ = MOD_REG;
+        /* Low byte is not accessible. */
+        if (cpu_has_cmov == -1)
+            get_cpu_features();


-            *inst++ = GROUP_0F;
-            *inst++ = MOVZX_r_rm8;
-            if (dst >= SLJIT_SAVED_REG1 && dst <= SLJIT_NO_REGISTERS)
-                *inst = MOD_REG | (reg_map[dst] << 3);
-            else {
-                *inst = MOD_REG;
-                EMIT_MOV(compiler, dst, dstw, SLJIT_TEMPORARY_REG1, 0);
-            }
+        if (cpu_has_cmov) {
+            EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_IMM, 1);
+            /* a xor reg, reg operation would overwrite the flags. */
+            EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);


-            EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
-        }
-    }
-    else {
-        if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
-            EMIT_MOV(compiler, TMP_REGISTER, 0, dst, 0);
             inst = (sljit_ub*)ensure_buf(compiler, 1 + 3);
             FAIL_IF(!inst);
             INC_SIZE(3);


             *inst++ = GROUP_0F;
-            *inst++ = cond_set;
-            *inst++ = MOD_REG | reg_map[dst];
+            *inst++ = cond_set - 0x50;
+            *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REGISTER];
+            return SLJIT_SUCCESS;
         }
-        else {
-            EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);


-            inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3 + 1);
-            FAIL_IF(!inst);
-            INC_SIZE(3 + 3 + 1);
-            /* Set al to conditional flag. */
-            *inst++ = GROUP_0F;
-            *inst++ = cond_set;
-            *inst++ = MOD_REG;
+        inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
+        FAIL_IF(!inst);
+        INC_SIZE(1 + 3 + 3 + 1);
+        *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER];
+        /* Set al to conditional flag. */
+        *inst++ = GROUP_0F;
+        *inst++ = cond_set;
+        *inst++ = MOD_REG | 0 /* eax */;


-            *inst++ = GROUP_0F;
-            *inst++ = MOVZX_r_rm8;
-            *inst++ = MOD_REG;
+        *inst++ = GROUP_0F;
+        *inst++ = MOVZX_r_rm8;
+        *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
+        *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER];
+        return SLJIT_SUCCESS;
+    }


-            *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER];
-        }
+    /* Set TMP_REGISTER to the bit. */
+    inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
+    FAIL_IF(!inst);
+    INC_SIZE(1 + 3 + 3 + 1);
+    *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER];
+    /* Set al to conditional flag. */
+    *inst++ = GROUP_0F;
+    *inst++ = cond_set;
+    *inst++ = MOD_REG | 0 /* eax */;
+
+    *inst++ = GROUP_0F;
+    *inst++ = MOVZX_r_rm8;
+    *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
+
+    *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER];
+
+    if (GET_OPCODE(op) < SLJIT_ADD)
+        return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0);
+
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
-        compiler->skip_checks = 1;
+    compiler->skip_checks = 1;
 #endif
-        return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REGISTER, 0);
-    }
-#endif
-
-    return SLJIT_SUCCESS;
+    return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REGISTER, 0);
+#endif /* SLJIT_CONFIG_X86_64 */
 }


SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)