[Pcre-svn] [765] code/trunk/src: JIT compielr update.

Inizio della pagina
Delete this message
Autore: Subversion repository
Data:  
To: pcre-svn
Oggetto: [Pcre-svn] [765] code/trunk/src: JIT compielr update.
Revision: 765
          http://www.exim.org/viewvc/pcre2?view=rev&revision=765
Author:   zherczeg
Date:     2017-04-26 09:34:09 +0100 (Wed, 26 Apr 2017)
Log Message:
-----------
JIT compielr update.


Modified Paths:
--------------
    code/trunk/src/pcre2_jit_compile.c
    code/trunk/src/sljit/sljitConfigInternal.h
    code/trunk/src/sljit/sljitLir.c
    code/trunk/src/sljit/sljitLir.h
    code/trunk/src/sljit/sljitNativeARM_32.c
    code/trunk/src/sljit/sljitNativeARM_64.c
    code/trunk/src/sljit/sljitNativeARM_T2_32.c
    code/trunk/src/sljit/sljitNativeMIPS_32.c
    code/trunk/src/sljit/sljitNativeMIPS_64.c
    code/trunk/src/sljit/sljitNativeMIPS_common.c
    code/trunk/src/sljit/sljitNativePPC_32.c
    code/trunk/src/sljit/sljitNativePPC_64.c
    code/trunk/src/sljit/sljitNativePPC_common.c
    code/trunk/src/sljit/sljitNativeSPARC_32.c
    code/trunk/src/sljit/sljitNativeSPARC_common.c
    code/trunk/src/sljit/sljitNativeTILEGX_64.c
    code/trunk/src/sljit/sljitNativeX86_common.c


Modified: code/trunk/src/pcre2_jit_compile.c
===================================================================
--- code/trunk/src/pcre2_jit_compile.c    2017-04-22 14:35:14 UTC (rev 764)
+++ code/trunk/src/pcre2_jit_compile.c    2017-04-26 08:34:09 UTC (rev 765)
@@ -4458,7 +4458,7 @@
 sljit_s32 cmp2a_ind = 5;
 sljit_s32 cmp2b_ind = 6;
 struct sljit_label *start;
-struct sljit_jump *jump[4];
+struct sljit_jump *jump[3];


sljit_u8 instruction[8];

@@ -4473,17 +4473,8 @@
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));

-  if (sljit_x86_is_cmov_available())
-    {
-    OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
-    sljit_x86_emit_cmov(compiler, SLJIT_LESS, STR_END, TMP1, 0);
-    }
-  else
-    {
-    jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
-    OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
-    JUMPHERE(jump[1]);
-    }
+  OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
+  sljit_emit_cmov(compiler, SLJIT_LESS, STR_END, TMP1, 0);
   }


/* MOVD xmm, r/m32 */
@@ -4574,7 +4565,7 @@

if (offs2 > 0)
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs2));
-jump[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);

OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
@@ -4582,7 +4573,7 @@

load_from_mem_sse2(compiler, data1_ind, str_ptr_ind);

-jump[2] = CMP(SLJIT_EQUAL, STR_PTR, 0, TMP1, 0);
+jump[1] = CMP(SLJIT_EQUAL, STR_PTR, 0, TMP1, 0);

load_from_mem_sse2(compiler, data2_ind, tmp1_ind);

@@ -4616,9 +4607,9 @@
instruction[3] = 0xc0 | (data2_ind << 3) | tmp_ind;
sljit_emit_op_custom(compiler, instruction, 4);

-jump[3] = JUMP(SLJIT_JUMP);
+jump[2] = JUMP(SLJIT_JUMP);

-JUMPHERE(jump[2]);
+JUMPHERE(jump[1]);

/* MOVDQA xmm1, xmm2/m128 */
/* instruction[0] = 0x66; */
@@ -4635,7 +4626,7 @@
instruction[4] = diff;
sljit_emit_op_custom(compiler, instruction, 5);

-JUMPHERE(jump[3]);
+JUMPHERE(jump[2]);

OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);

@@ -4667,7 +4658,7 @@
sljit_emit_op_custom(compiler, instruction, 3);
sljit_set_current_flags(compiler, SLJIT_SET_Z);

-jump[2] = JUMP(SLJIT_NOT_ZERO);
+jump[1] = JUMP(SLJIT_NOT_ZERO);

OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);

@@ -4680,7 +4671,7 @@
load_from_mem_sse2(compiler, data2_ind, str_ptr_ind);

OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
-jump[3] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+jump[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);

load_from_mem_sse2(compiler, data1_ind, str_ptr_ind);

@@ -4740,28 +4731,19 @@

JUMPTO(SLJIT_ZERO, start);

-JUMPHERE(jump[2]);
+JUMPHERE(jump[1]);

OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));

-JUMPHERE(jump[1]);
-JUMPHERE(jump[3]);
+JUMPHERE(jump[0]);
+JUMPHERE(jump[2]);

if (common->match_end_ptr != 0)
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);

-if (sljit_x86_is_cmov_available())
- {
- OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
- sljit_x86_emit_cmov(compiler, SLJIT_GREATER, STR_PTR, STR_END, 0);
- }
-else
- {
- jump[1] = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
- OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
- JUMPHERE(jump[1]);
- }
+OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
+sljit_emit_cmov(compiler, SLJIT_GREATER, STR_PTR, STR_END, 0);

if (common->match_end_ptr != 0)
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
@@ -4847,19 +4829,8 @@
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);

   OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, SLJIT_IMM, IN_UCHARS(offset + 1));
-#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
-  if (sljit_x86_is_cmov_available())
-    {
-    OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
-    sljit_x86_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
-    }
-  else
-#endif
-    {
-    quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0);
-    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
-    JUMPHERE(quit);
-    }
+  OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
+  sljit_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
   }


#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
@@ -4871,7 +4842,7 @@

/* SSE2 accelerated first character search. */

-if (sljit_x86_is_sse2_available())
+if (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
{
fast_forward_first_char2_sse2(common, char1, char2);

@@ -4906,17 +4877,17 @@
     if (offset > 0)
       OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
     }
-  else if (sljit_x86_is_cmov_available())
+  else
     {
     OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
-    sljit_x86_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, has_match_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end ? common->match_end_ptr : 0);
+    if (has_match_end)
+      {
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
+      sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, TMP1, 0);
+      }
+    else
+      sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, STR_END, 0);
     }
-  else
-    {
-    quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
-    OP1(SLJIT_MOV, STR_PTR, 0, has_match_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end ? common->match_end_ptr : 0);
-    JUMPHERE(quit);
-    }


   if (has_match_end)
     OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);


Modified: code/trunk/src/sljit/sljitConfigInternal.h
===================================================================
--- code/trunk/src/sljit/sljitConfigInternal.h    2017-04-22 14:35:14 UTC (rev 764)
+++ code/trunk/src/sljit/sljitConfigInternal.h    2017-04-26 08:34:09 UTC (rev 765)
@@ -393,7 +393,9 @@
 #ifndef SLJIT_W


 /* Defining long constants. */
-#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
+#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
+#define SLJIT_W(w)    (w##l)
+#elif (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
 #define SLJIT_W(w)    (w##ll)
 #else
 #define SLJIT_W(w)    (w)


Modified: code/trunk/src/sljit/sljitLir.c
===================================================================
--- code/trunk/src/sljit/sljitLir.c    2017-04-22 14:35:14 UTC (rev 764)
+++ code/trunk/src/sljit/sljitLir.c    2017-04-26 08:34:09 UTC (rev 765)
@@ -342,7 +342,7 @@
 /*  Public functions                                                     */
 /* --------------------------------------------------------------------- */


-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
#define SLJIT_NEEDS_COMPILER_INIT 1
static sljit_s32 compiler_initialized = 0;
/* A thread safe initialization. */
@@ -1042,7 +1042,7 @@

     switch (GET_OPCODE(op)) {
     case SLJIT_NOT:
-    case SLJIT_CLZ:
+        /* Only SLJIT_I32_OP and SLJIT_SET_Z are allowed. */
         CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK));
         break;
     case SLJIT_NEG:
@@ -1060,7 +1060,7 @@
         CHECK_ARGUMENT(!(op & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
         break;
     default:
-        /* Only SLJIT_I32_OP or SLJIT_F32_OP is allowed. */
+        /* Only SLJIT_I32_OP is allowed. */
         CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
         break;
     }
@@ -1241,7 +1241,7 @@
     }


 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
-    CHECK_ARGUMENT(sljit_is_fpu_available());
+    CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
     CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV_F64 && GET_OPCODE(op) <= SLJIT_ABS_F64);
     CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
     FUNCTION_FCHECK(src, srcw);
@@ -1279,7 +1279,7 @@
     }


 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
-    CHECK_ARGUMENT(sljit_is_fpu_available());
+    CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
     CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_CMP_F64);
     CHECK_ARGUMENT(!(op & SLJIT_SET_Z));
     CHECK_ARGUMENT((op & VARIABLE_FLAG_MASK)
@@ -1313,7 +1313,7 @@
     }


 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
-    CHECK_ARGUMENT(sljit_is_fpu_available());
+    CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
     CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONV_SW_FROM_F64 && GET_OPCODE(op) <= SLJIT_CONV_S32_FROM_F64);
     CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
     FUNCTION_FCHECK(src, srcw);
@@ -1343,7 +1343,7 @@
     }


 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
-    CHECK_ARGUMENT(sljit_is_fpu_available());
+    CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
     CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONV_F64_FROM_SW && GET_OPCODE(op) <= SLJIT_CONV_F64_FROM_S32);
     CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
     FUNCTION_CHECK_SRC(src, srcw);
@@ -1369,7 +1369,7 @@
     sljit_s32 src2, sljit_sw src2w)
 {
 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
-    CHECK_ARGUMENT(sljit_is_fpu_available());
+    CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
     CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD_F64 && GET_OPCODE(op) <= SLJIT_DIV_F64);
     CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
     FUNCTION_FCHECK(src1, src1w);
@@ -1394,6 +1394,11 @@
 {
     SLJIT_UNUSED_ARG(compiler);


+    if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+        compiler->skip_checks = 0;
+        CHECK_RETURN_OK;
+    }
+
 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
     compiler->last_flags = 0;
 #endif
@@ -1464,7 +1469,7 @@
     sljit_s32 src2, sljit_sw src2w)
 {
 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
-    CHECK_ARGUMENT(sljit_is_fpu_available());
+    CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
     CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_F32_OP)));
     CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL_F64 && (type & 0xff) <= SLJIT_ORDERED_F64);
     FUNCTION_FCHECK(src1, src1w);
@@ -1553,6 +1558,37 @@
     CHECK_RETURN_OK;
 }


+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+    sljit_s32 dst_reg,
+    sljit_s32 src, sljit_sw srcw)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+    CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP)));
+    CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64);
+    CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP));
+    if (src != SLJIT_IMM) {
+        CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src));
+    }
+
+    if ((type & 0xff) <= SLJIT_NOT_ZERO)
+        CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z);
+    else
+        CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff));
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+    if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+        fprintf(compiler->verbose, "  cmov%s %s%s, ",
+            !(dst_reg & SLJIT_I32_OP) ? "" : ".i",
+            jump_names[type & 0xff], JUMP_POSTFIX(type));
+        sljit_verbose_reg(compiler, dst_reg & ~SLJIT_I32_OP);
+        fprintf(compiler->verbose, ", ");
+        sljit_verbose_param(compiler, src, srcw);
+        fprintf(compiler->verbose, "\n");
+    }
+#endif
+    CHECK_RETURN_OK;
+}
+
 static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
 {
     SLJIT_UNUSED_ARG(offset);
@@ -1636,6 +1672,44 @@
     return sljit_emit_op1(compiler, op, SLJIT_RETURN_REG, 0, src, srcw);
 }


+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
+        || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \
+        || (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
+        || ((defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) && !(defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1))
+
+static SLJIT_INLINE sljit_s32 sljit_emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type,
+    sljit_s32 dst_reg,
+    sljit_s32 src, sljit_sw srcw)
+{
+    struct sljit_label *label;
+    struct sljit_jump *jump;
+    sljit_s32 op = (dst_reg & SLJIT_I32_OP) ? SLJIT_MOV32 : SLJIT_MOV;
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+        || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+    compiler->skip_checks = 1;
+#endif
+    jump = sljit_emit_jump(compiler, type ^ 0x1);
+    FAIL_IF(!jump);
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+        || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+    compiler->skip_checks = 1;
+#endif
+    FAIL_IF(sljit_emit_op1(compiler, op, dst_reg & ~SLJIT_I32_OP, 0, src, srcw));
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+        || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+    compiler->skip_checks = 1;
+#endif
+    label = sljit_emit_label(compiler);
+    FAIL_IF(!label);
+    sljit_set_label(jump, label);
+    return SLJIT_SUCCESS;
+}
+
+#endif
+
 /* CPU description section */


 #if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE)
@@ -1858,6 +1932,13 @@
     return NULL;
 }


+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
+{
+    SLJIT_UNUSED_ARG(feature_type);
+    SLJIT_UNREACHABLE();
+    return 0;
+}
+
 SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code)
 {
     SLJIT_UNUSED_ARG(code);
@@ -1985,12 +2066,6 @@
     SLJIT_UNUSED_ARG(current_flags);
 }


-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
-{
-    SLJIT_UNREACHABLE();
-    return 0;
-}
-
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
     sljit_s32 dst, sljit_sw dstw,
     sljit_s32 src, sljit_sw srcw)
@@ -2105,6 +2180,19 @@
     return SLJIT_ERR_UNSUPPORTED;
 }


+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+    sljit_s32 dst_reg,
+    sljit_s32 src, sljit_sw srcw)
+{
+    SLJIT_UNUSED_ARG(compiler);
+    SLJIT_UNUSED_ARG(type);
+    SLJIT_UNUSED_ARG(dst_reg);
+    SLJIT_UNUSED_ARG(src);
+    SLJIT_UNUSED_ARG(srcw);
+    SLJIT_UNREACHABLE();
+    return SLJIT_ERR_UNSUPPORTED;
+}
+
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
 {
     SLJIT_UNUSED_ARG(compiler);


Modified: code/trunk/src/sljit/sljitLir.h
===================================================================
--- code/trunk/src/sljit/sljitLir.h    2017-04-22 14:35:14 UTC (rev 764)
+++ code/trunk/src/sljit/sljitLir.h    2017-04-26 08:34:09 UTC (rev 765)
@@ -489,6 +489,29 @@
 */
 static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler *compiler) { return compiler->executable_size; }


+/* Returns with non-zero if the passed SLJIT_HAS_* feature is available.
+
+   Some features (e.g. floating point operations) require CPU support
+   while other (e.g. move with update) is emulated if not available.
+   However it might be worth to generate a special code path even in
+   the latter case in certain cases. */
+
+/* [Not emulated] Floating-point support is available. */
+#define SLJIT_HAS_FPU            0
+/* [Emulated] Some forms of move with pre update is supported. */
+#define SLJIT_HAS_PRE_UPDATE        1
+/* [Emulated] Count leading zero is supported. */
+#define SLJIT_HAS_CLZ            2
+/* [Emulated] Conditional move is supported. */
+#define SLJIT_HAS_CMOV            3
+
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+/* [Not emulated] SSE2 support is available on x86. */
+#define SLJIT_HAS_SSE2            100
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type);
+
 /* Instruction generation. Returns with any error code. If there is no
    error, they return with SLJIT_SUCCESS. */


@@ -896,7 +919,7 @@
 #define SLJIT_NEG            (SLJIT_OP1_BASE + 17)
 #define SLJIT_NEG32            (SLJIT_NEG | SLJIT_I32_OP)
 /* Count leading zeroes
-   Flags: Z */
+   Flags: - (may destroy flags) */
 #define SLJIT_CLZ            (SLJIT_OP1_BASE + 18)
 #define SLJIT_CLZ32            (SLJIT_CLZ | SLJIT_I32_OP)


@@ -961,10 +984,6 @@
     sljit_s32 src1, sljit_sw src1w,
     sljit_s32 src2, sljit_sw src2w);


-/* Returns with non-zero if fpu is available. */
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void);
-
 /* Starting index of opcodes for sljit_emit_fop1. */
 #define SLJIT_FOP1_BASE            128


@@ -1058,7 +1077,7 @@
 #define SLJIT_SET_SIG_LESS        SLJIT_SET(SLJIT_SIG_LESS)
 #define SLJIT_SIG_GREATER_EQUAL        7
 #define SLJIT_SIG_GREATER_EQUAL32    (SLJIT_SIG_GREATER_EQUAL | SLJIT_I32_OP)
-#define SLJIT_SET_SIG_GREATER_EQUAL    SLJIT_SET(SLJIT_SET_SIG_GREATER_EQUAL)
+#define SLJIT_SET_SIG_GREATER_EQUAL    SLJIT_SET(SLJIT_SIG_GREATER_EQUAL)
 #define SLJIT_SIG_GREATER        8
 #define SLJIT_SIG_GREATER32        (SLJIT_SIG_GREATER | SLJIT_I32_OP)
 #define SLJIT_SET_SIG_GREATER        SLJIT_SET(SLJIT_SIG_GREATER)
@@ -1171,7 +1190,7 @@
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw);


 /* Perform the operation using the conditional flags as the second argument.
-   Type must always be between SLJIT_EQUAL and SLJIT_S_ORDERED. The value
+   Type must always be between SLJIT_EQUAL and SLJIT_ORDERED_F64. The value
    represented by the type is 1, if the condition represented by the type
    is fulfilled, and 0 otherwise.


@@ -1190,6 +1209,20 @@
     sljit_s32 src, sljit_sw srcw,
     sljit_s32 type);


+/* Emit a conditional mov instruction which moves source to destination,
+   if the condition is satisfied. Unlike other arithmetic operations this
+   instruction does not support memory accesses.
+
+   type must be between SLJIT_EQUAL and SLJIT_ORDERED_F64
+   dst_reg must be a valid register and it can be combined
+      with SLJIT_I32_OP to perform a 32 bit arithmetic operation
+   src must be register or immediate (SLJIT_IMM)
+
+   Flags: - (does not modify flags) */
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+    sljit_s32 dst_reg,
+    sljit_s32 src, sljit_sw srcw);
+
 /* Copies the base address of SLJIT_SP + offset to dst.
    Flags: - (may destroy flags) */
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset);
@@ -1215,7 +1248,7 @@
 /* --------------------------------------------------------------------- */


 #define SLJIT_MAJOR_VERSION    0
-#define SLJIT_MINOR_VERSION    93
+#define SLJIT_MINOR_VERSION    94


 /* Get the human readable name of the platform. Can be useful on platforms
    like ARM, where ARM and Thumb2 functions can be mixed, and
@@ -1354,32 +1387,4 @@
 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler,
     sljit_s32 current_flags);


-#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
-
-/* Returns with non-zero if sse2 is available. */
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void);
-
-/* Returns with non-zero if cmov instruction is available. */
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_cmov_available(void);
-
-/* Emit a conditional mov instruction on x86 CPUs. This instruction
-   moves src to destination, if the condition is satisfied. Unlike
-   other arithmetic instructions, destination must be a register.
-   Before such instructions are emitted, cmov support should be
-   checked by sljit_x86_is_cmov_available function.
-    type must be between SLJIT_EQUAL and SLJIT_S_ORDERED
-    dst_reg must be a valid register and it can be combined
-      with SLJIT_I32_OP to perform 32 bit arithmetic
-   Flags: - (does not modify flags)
- */
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_emit_cmov(struct sljit_compiler *compiler,
-    sljit_s32 type,
-    sljit_s32 dst_reg,
-    sljit_s32 src, sljit_sw srcw);
-
-#endif
-
 #endif /* _SLJIT_LIR_H_ */


Modified: code/trunk/src/sljit/sljitNativeARM_32.c
===================================================================
--- code/trunk/src/sljit/sljitNativeARM_32.c    2017-04-22 14:35:14 UTC (rev 764)
+++ code/trunk/src/sljit/sljitNativeARM_32.c    2017-04-26 08:34:09 UTC (rev 765)
@@ -82,6 +82,7 @@
 #define BLX        0xe12fff30
 #define BX        0xe12fff10
 #define CLZ        0xe16f0f10
+#define CMN_DP        0xb
 #define CMP_DP        0xa
 #define BKPT        0xe1200070
 #define EOR_DP        0x1
@@ -813,6 +814,27 @@
     return code;
 }


+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
+{
+    switch (feature_type) {
+    case SLJIT_HAS_FPU:
+#ifdef SLJIT_IS_FPU_AVAILABLE
+        return SLJIT_IS_FPU_AVAILABLE;
+#else
+        /* Available by default. */
+        return 1;
+#endif
+
+    case SLJIT_HAS_PRE_UPDATE:
+    case SLJIT_HAS_CLZ:
+    case SLJIT_HAS_CMOV:
+        return 1;
+
+    default:
+        return 0;
+    }
+}
+
 /* --------------------------------------------------------------------- */
 /*  Entry, exit                                                          */
 /* --------------------------------------------------------------------- */
@@ -969,6 +991,8 @@
 #define INV_IMM        0x02
   /* Source and destination is register. */
 #define MOVE_REG_CONV    0x04
+  /* Unused return value. */
+#define UNUSED_RETURN    0x08
 /* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */
 #define SET_FLAGS    (1 << 20)
 /* dst: reg
@@ -1055,12 +1079,13 @@
         SLJIT_ASSERT(!(flags & INV_IMM));
         SLJIT_ASSERT(!(src2 & SRC2_IMM));
         FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2)));
-        if (flags & SET_FLAGS)
-            return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, flags & SET_FLAGS, SLJIT_UNUSED, dst, SRC2_IMM));
         return SLJIT_SUCCESS;


     case SLJIT_ADD:
         SLJIT_ASSERT(!(flags & INV_IMM));
+        if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED))
+            return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMN_DP, SET_FLAGS,
+                SLJIT_UNUSED, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
         return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, flags & SET_FLAGS,
             dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));


@@ -1071,6 +1096,9 @@

     case SLJIT_SUB:
         SLJIT_ASSERT(!(flags & INV_IMM));
+        if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED))
+            return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, SET_FLAGS,
+                SLJIT_UNUSED, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
         return push_inst(compiler, EMIT_DATA_PROCESS_INS(!(flags & ARGS_SWAPPED) ? SUB_DP : RSB_DP, flags & SET_FLAGS,
             dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));


@@ -1296,7 +1324,10 @@
     /* Load integer. */
     return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), imm);
 #else
-    return emit_imm(compiler, reg, imm);
+    FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)));
+    if (imm <= 0xffff)
+        return SLJIT_SUCCESS;
+    return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff));
 #endif
 }


@@ -1413,10 +1444,8 @@
     sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;


     /* Destination check. */
-    if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
-        if (op <= SLJIT_MOVU_P && !(src2 & SLJIT_MEM))
-            return SLJIT_SUCCESS;
-    }
+    if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED))
+        flags |= UNUSED_RETURN;


     SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM));


@@ -1752,44 +1781,7 @@
 /*  Floating point operators                                             */
 /* --------------------------------------------------------------------- */


-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)

-/* 0 - no fpu
-   1 - vfp */
-static sljit_s32 arm_fpu_type = -1;
-
-static void init_compiler(void)
-{
-    if (arm_fpu_type != -1)
-        return;
-
-    /* TODO: Only the OS can help to determine the correct fpu type. */
-    arm_fpu_type = 1;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
-{
-#ifdef SLJIT_IS_FPU_AVAILABLE
-    return SLJIT_IS_FPU_AVAILABLE;
-#else
-    if (arm_fpu_type == -1)
-        init_compiler();
-    return arm_fpu_type;
-#endif
-}
-
-#else
-
-#define arm_fpu_type 1
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
-{
-    /* Always available. */
-    return 1;
-}
-
-#endif
-
 #define FPU_LOAD (1 << 20)
 #define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \
     ((inst) | ((add) << 23) | (reg_map[base] << 16) | (freg << 12) | (offs))
@@ -2254,6 +2246,43 @@
     return (flags & SLJIT_SET_Z) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG2, SLJIT_UNUSED, RM(dst_reg))) : SLJIT_SUCCESS;
 }


+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+    sljit_s32 dst_reg,
+    sljit_s32 src, sljit_sw srcw)
+{
+    sljit_uw cc, tmp;
+
+    CHECK_ERROR();
+    CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+
+    dst_reg &= ~SLJIT_I32_OP;
+
+    cc = get_cc(type & 0xff);
+
+    if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
+        tmp = get_imm(srcw);
+        if (tmp)
+            return push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_reg, SLJIT_UNUSED, tmp) & ~COND_MASK) | cc);
+
+        tmp = get_imm(~srcw);
+        if (tmp)
+            return push_inst(compiler, (EMIT_DATA_PROCESS_INS(MVN_DP, 0, dst_reg, SLJIT_UNUSED, tmp) & ~COND_MASK) | cc);
+
+#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+        tmp = (sljit_uw) srcw;
+        FAIL_IF(push_inst(compiler, (MOVW & ~COND_MASK) | cc | RD(dst_reg) | ((tmp << 4) & 0xf0000) | (tmp & 0xfff)));
+        if (tmp <= 0xffff)
+            return SLJIT_SUCCESS;
+        return push_inst(compiler, (MOVT & ~COND_MASK) | cc | RD(dst_reg) | ((tmp >> 12) & 0xf0000) | ((tmp >> 16) & 0xfff));
+#else
+        FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+        src = TMP_REG1;
+#endif
+    }
+
+    return push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_reg, SLJIT_UNUSED, src) & ~COND_MASK) | cc);
+}
+
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
 {
     struct sljit_const *const_;


Modified: code/trunk/src/sljit/sljitNativeARM_64.c
===================================================================
--- code/trunk/src/sljit/sljitNativeARM_64.c    2017-04-22 14:35:14 UTC (rev 764)
+++ code/trunk/src/sljit/sljitNativeARM_64.c    2017-04-26 08:34:09 UTC (rev 765)
@@ -76,6 +76,7 @@
 #define BRK 0xd4200000
 #define CBZ 0xb4000000
 #define CLZ 0xdac01000
+#define CSEL 0x9a800000
 #define CSINC 0x9a800400
 #define EOR 0xca000000
 #define EORI 0xd2000000
@@ -323,6 +324,27 @@
     return code;
 }


+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
+{
+    switch (feature_type) {
+    case SLJIT_HAS_FPU:
+#ifdef SLJIT_IS_FPU_AVAILABLE
+        return SLJIT_IS_FPU_AVAILABLE;
+#else
+        /* Available by default. */
+        return 1;
+#endif
+
+    case SLJIT_HAS_PRE_UPDATE:
+    case SLJIT_HAS_CLZ:
+    case SLJIT_HAS_CMOV:
+        return 1;
+
+    default:
+        return 0;
+    }
+}
+
 /* --------------------------------------------------------------------- */
 /*  Core code generator functions.                                       */
 /* --------------------------------------------------------------------- */
@@ -712,7 +734,7 @@
     case SLJIT_NOT:
         SLJIT_ASSERT(arg1 == TMP_REG1);
         FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2)));
-        goto set_flags;
+        break; /* Set flags. */
     case SLJIT_NEG:
         SLJIT_ASSERT(arg1 == TMP_REG1);
         if (flags & SET_FLAGS)
@@ -720,8 +742,7 @@
         return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2));
     case SLJIT_CLZ:
         SLJIT_ASSERT(arg1 == TMP_REG1);
-        FAIL_IF(push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2)));
-        goto set_flags;
+        return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2));
     case SLJIT_ADD:
         CHECK_FLAGS(1 << 29);
         return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
@@ -750,24 +771,24 @@
         return push_inst(compiler, (AND ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
     case SLJIT_OR:
         FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
-        goto set_flags;
+        break; /* Set flags. */
     case SLJIT_XOR:
         FAIL_IF(push_inst(compiler, (EOR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
-        goto set_flags;
+        break; /* Set flags. */
     case SLJIT_SHL:
         FAIL_IF(push_inst(compiler, (LSLV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
-        goto set_flags;
+        break; /* Set flags. */
     case SLJIT_LSHR:
         FAIL_IF(push_inst(compiler, (LSRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
-        goto set_flags;
+        break; /* Set flags. */
     case SLJIT_ASHR:
         FAIL_IF(push_inst(compiler, (ASRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
-        goto set_flags;
+        break; /* Set flags. */
+    default:
+        SLJIT_UNREACHABLE();
+        return SLJIT_SUCCESS;
     }


-    SLJIT_UNREACHABLE();
-    return SLJIT_SUCCESS;
-
 set_flags:
     if (flags & SET_FLAGS)
         return push_inst(compiler, (SUBS ^ inv_bits) | RD(TMP_ZERO) | RN(dst) | RM(TMP_ZERO));
@@ -1539,16 +1560,6 @@
 /*  Floating point operators                                             */
 /* --------------------------------------------------------------------- */


-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
-{
-#ifdef SLJIT_IS_FPU_AVAILABLE
-    return SLJIT_IS_FPU_AVAILABLE;
-#else
-    /* Available by default. */
-    return 1;
-#endif
-}
-
 static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
 {
     sljit_u32 shift = MEM_SIZE_SHIFT(flags);
@@ -2016,6 +2027,30 @@
     return emit_op_mem2(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0);
 }


+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+    sljit_s32 dst_reg,
+    sljit_s32 src, sljit_sw srcw)
+{
+    sljit_ins inv_bits = (dst_reg & SLJIT_I32_OP) ? (1 << 31) : 0;
+    sljit_ins cc;
+
+    CHECK_ERROR();
+    CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+
+    if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
+        if (dst_reg & SLJIT_I32_OP)
+            srcw = (sljit_s32)srcw;
+        FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+        src = TMP_REG1;
+        srcw = 0;
+    }
+
+    cc = get_cc(type & 0xff);
+    dst_reg &= ~SLJIT_I32_OP;
+
+    return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(src) | RM(dst_reg));
+}
+
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
 {
     struct sljit_const *const_;


Modified: code/trunk/src/sljit/sljitNativeARM_T2_32.c
===================================================================
--- code/trunk/src/sljit/sljitNativeARM_T2_32.c    2017-04-22 14:35:14 UTC (rev 764)
+++ code/trunk/src/sljit/sljitNativeARM_T2_32.c    2017-04-26 08:34:09 UTC (rev 765)
@@ -107,7 +107,11 @@
 #define BLX        0x4780
 #define BX        0x4700
 #define CLZ        0xfab0f080
+#define CMNI_W        0xf1100f00
+#define CMP        0x4280
 #define CMPI        0x2800
+#define CMPI_W        0xf1b00f00
+#define CMP_X        0x4500
 #define CMP_W        0xebb00f00
 #define EORI        0xf0800000
 #define EORS        0x4040
@@ -429,6 +433,27 @@
     return (void*)((sljit_uw)code | 0x1);
 }


+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
+{
+    switch (feature_type) {
+    case SLJIT_HAS_FPU:
+#ifdef SLJIT_IS_FPU_AVAILABLE
+        return SLJIT_IS_FPU_AVAILABLE;
+#else
+        /* Available by default. */
+        return 1;
+#endif
+
+    case SLJIT_HAS_PRE_UPDATE:
+    case SLJIT_HAS_CLZ:
+    case SLJIT_HAS_CMOV:
+        return 1;
+
+    default:
+        return 0;
+    }
+}
+
 /* --------------------------------------------------------------------- */
 /*  Core code generator functions.                                       */
 /* --------------------------------------------------------------------- */
@@ -566,9 +591,12 @@
                 if (nimm <= 0xfff)
                     return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(nimm));
             }
-            imm = get_imm(imm);
-            if (imm != INVALID_IMM)
-                return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+            nimm = get_imm(imm);
+            if (nimm != INVALID_IMM)
+                return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm);
+            nimm = get_imm(-imm);
+            if (nimm != INVALID_IMM)
+                return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm);
             break;
         case SLJIT_ADDC:
             imm = get_imm(imm);
@@ -576,6 +604,7 @@
                 return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
             break;
         case SLJIT_SUB:
+            /* SUB operation can be replaced by ADD because of the negative carry flag. */
             if (flags & ARG1_IMM) {
                 if (imm == 0 && IS_2_LO_REGS(reg, dst))
                     return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg));
@@ -584,6 +613,16 @@
                     return push_inst32(compiler, RSB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
                 break;
             }
+            if (flags & UNUSED_RETURN) {
+                if (imm <= 0xff && reg_map[reg] <= 7)
+                    return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg));
+                nimm = get_imm(imm);
+                if (nimm != INVALID_IMM)
+                    return push_inst32(compiler, CMPI_W | RN4(reg) | nimm);
+                nimm = get_imm(-imm);
+                if (nimm != INVALID_IMM)
+                    return push_inst32(compiler, CMNI_W | RN4(reg) | nimm);
+            }
             nimm = -imm;
             if (IS_2_LO_REGS(reg, dst)) {
                 if (imm <= 0x7)
@@ -596,8 +635,6 @@
                     if (nimm <= 0xff)
                         return push_inst16(compiler, ADDSI8 | IMM8(nimm) | RDN3(dst));
                 }
-                if (imm <= 0xff && (flags & UNUSED_RETURN))
-                    return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg));
             }
             if (!(flags & SET_FLAGS)) {
                 if (imm <= 0xfff)
@@ -605,9 +642,12 @@
                 if (nimm <= 0xfff)
                     return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(nimm));
             }
-            imm = get_imm(imm);
-            if (imm != INVALID_IMM)
-                return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+            nimm = get_imm(imm);
+            if (nimm != INVALID_IMM)
+                return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm);
+            nimm = get_imm(-imm);
+            if (nimm != INVALID_IMM)
+                return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm);
             break;
         case SLJIT_SUBC:
             if (flags & ARG1_IMM)
@@ -729,11 +769,6 @@
     case SLJIT_CLZ:
         SLJIT_ASSERT(arg1 == TMP_REG2);
         FAIL_IF(push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2)));
-        if (flags & SET_FLAGS) {
-            if (reg_map[dst] <= 7)
-                return push_inst16(compiler, CMPI | RDN3(dst));
-            return push_inst32(compiler, ADD_WI | SET_FLAGS | RN4(dst) | RD4(dst));
-        }
         return SLJIT_SUCCESS;
     case SLJIT_ADD:
         if (IS_3_LO_REGS(dst, arg1, arg2))
@@ -746,6 +781,11 @@
             return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2));
         return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
     case SLJIT_SUB:
+        if (flags & UNUSED_RETURN) {
+            if (IS_2_LO_REGS(arg1, arg2))
+                return push_inst16(compiler, CMP | RD3(arg1) | RN3(arg2));
+            return push_inst16(compiler, CMP_X | SET_REGS44(arg1, arg2));
+        }
         if (IS_3_LO_REGS(dst, arg1, arg2))
             return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2));
         return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
@@ -1407,16 +1447,6 @@
 /*  Floating point operators                                             */
 /* --------------------------------------------------------------------- */


-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
-{
-#ifdef SLJIT_IS_FPU_AVAILABLE
-    return SLJIT_IS_FPU_AVAILABLE;
-#else
-    /* Available by default. */
-    return 1;
-#endif
-}
-
 #define FPU_LOAD (1 << 20)


 static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
@@ -1830,6 +1860,7 @@
             FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 1));
             FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 0));
         } else {
+            /* The movsi (immediate) instruction does not set flags in IT block. */
             FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 1));
             FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 0));
         }
@@ -1885,6 +1916,54 @@
     return SLJIT_SUCCESS;
 }


+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+    sljit_s32 dst_reg,
+    sljit_s32 src, sljit_sw srcw)
+{
+    sljit_uw cc, tmp;
+
+    CHECK_ERROR();
+    CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+
+    dst_reg &= ~SLJIT_I32_OP;
+
+    cc = get_cc(type & 0xff);
+
+    if (!(src & SLJIT_IMM)) {
+        FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
+        return push_inst16(compiler, MOV | SET_REGS44(dst_reg, src));
+    }
+
+    tmp = (sljit_uw) srcw;
+
+    if (tmp < 0x10000) {
+        /* set low 16 bits, set hi 16 bits to 0. */
+        FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
+        return push_inst32(compiler, MOVW | RD4(dst_reg) |
+            COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff));
+    }
+
+    tmp = get_imm(srcw);
+    if (tmp != INVALID_IMM) {
+        FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
+        return push_inst32(compiler, MOV_WI | RD4(dst_reg) | tmp);
+    }
+
+    tmp = get_imm(~srcw);
+    if (tmp != INVALID_IMM) {
+        FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
+        return push_inst32(compiler, MVN_WI | RD4(dst_reg) | tmp);
+    }
+
+    FAIL_IF(push_inst16(compiler, IT | (cc << 4) | ((cc & 0x1) << 3) | 0x4));
+
+    tmp = (sljit_uw) srcw;
+    FAIL_IF(push_inst32(compiler, MOVW | RD4(dst_reg) |
+        COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff)));
+    return push_inst32(compiler, MOVT | RD4(dst_reg) |
+        COPY_BITS(tmp, 12 + 16, 16, 4) | COPY_BITS(tmp, 11 + 16, 26, 1) | COPY_BITS(tmp, 8 + 16, 12, 3) | ((tmp & 0xff0000) >> 16));
+}
+
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
 {
     struct sljit_const *const_;


Modified: code/trunk/src/sljit/sljitNativeMIPS_32.c
===================================================================
--- code/trunk/src/sljit/sljitNativeMIPS_32.c    2017-04-22 14:35:14 UTC (rev 764)
+++ code/trunk/src/sljit/sljitNativeMIPS_32.c    2017-04-26 08:34:09 UTC (rev 765)
@@ -149,8 +149,6 @@
         FAIL_IF(push_inst(compiler, ADDIU | S(dst) | T(dst) | IMM(1), DR(dst)));
         FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
         FAIL_IF(push_inst(compiler, SLL | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS));
-        if (op & SLJIT_SET_Z)
-            return push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG);
 #endif
         return SLJIT_SUCCESS;



Modified: code/trunk/src/sljit/sljitNativeMIPS_64.c
===================================================================
--- code/trunk/src/sljit/sljitNativeMIPS_64.c    2017-04-22 14:35:14 UTC (rev 764)
+++ code/trunk/src/sljit/sljitNativeMIPS_64.c    2017-04-26 08:34:09 UTC (rev 765)
@@ -240,8 +240,6 @@
         FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(dst) | T(dst) | IMM(1), DR(dst)));
         FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
         FAIL_IF(push_inst(compiler, SELECT_OP(DSLL, SLL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS));
-        if (op & SLJIT_SET_Z)
-            return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG);
 #endif
         return SLJIT_SUCCESS;



Modified: code/trunk/src/sljit/sljitNativeMIPS_common.c
===================================================================
--- code/trunk/src/sljit/sljitNativeMIPS_common.c    2017-04-22 14:35:14 UTC (rev 764)
+++ code/trunk/src/sljit/sljitNativeMIPS_common.c    2017-04-26 08:34:09 UTC (rev 765)
@@ -173,6 +173,10 @@
 #if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
 #define CLZ        (HI(28) | LO(32))
 #define DCLZ        (HI(28) | LO(36))
+#define MOVF        (HI(0) | (0 << 16) | LO(1))
+#define MOVN        (HI(0) | LO(11))
+#define MOVT        (HI(0) | (1 << 16) | LO(1))
+#define MOVZ        (HI(0) | LO(10))
 #define MUL        (HI(28) | LO(2))
 #define SEB        (HI(31) | (16 << 6) | LO(32))
 #define SEH        (HI(31) | (24 << 6) | LO(32))
@@ -490,6 +494,31 @@
     return code;
 }


+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
+{
+    switch (feature_type) {
+    case SLJIT_HAS_FPU:
+#ifdef SLJIT_IS_FPU_AVAILABLE
+        return SLJIT_IS_FPU_AVAILABLE;
+#elif defined(__GNUC__)
+        sljit_sw fir;
+        asm ("cfc1 %0, $0" : "=r"(fir));
+        return (fir >> 22) & 0x1;
+#else
+#error "FIR check is not implemented for this architecture"
+#endif
+
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+    case SLJIT_HAS_CLZ:
+    case SLJIT_HAS_CMOV:
+        return 1;
+#endif
+
+    default:
+        return 0;
+    }
+}
+
 /* --------------------------------------------------------------------- */
 /*  Entry, exit                                                          */
 /* --------------------------------------------------------------------- */
@@ -1250,19 +1279,6 @@
 /*  Floating point operators                                             */
 /* --------------------------------------------------------------------- */


-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
-{
-#ifdef SLJIT_IS_FPU_AVAILABLE
-    return SLJIT_IS_FPU_AVAILABLE;
-#elif defined(__GNUC__)
-    sljit_sw fir;
-    asm ("cfc1 %0, $0" : "=r"(fir));
-    return (fir >> 22) & 0x1;
-#else
-#error "FIR check is not implemented for this architecture"
-#endif
-}
-
 #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 7))
 #define FMT(op) (((op & SLJIT_F32_OP) ^ SLJIT_F32_OP) << (21 - 8))


@@ -1976,6 +1992,79 @@
#endif
}

+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+    sljit_s32 dst_reg,
+    sljit_s32 src, sljit_sw srcw)
+{
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+    sljit_ins ins;
+#endif
+
+    CHECK_ERROR();
+    CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+
+    if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+        if (dst_reg & SLJIT_I32_OP)
+            srcw = (sljit_s32)srcw;
+#endif
+        FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw));
+        src = TMP_REG1;
+        srcw = 0;
+    }
+
+    dst_reg &= ~SLJIT_I32_OP;
+
+    switch (type & 0xff) {
+    case SLJIT_EQUAL:
+        ins = MOVZ | TA(EQUAL_FLAG);
+        break;
+    case SLJIT_NOT_EQUAL:
+        ins = MOVN | TA(EQUAL_FLAG);
+        break;
+    case SLJIT_LESS:
+    case SLJIT_GREATER:
+    case SLJIT_SIG_LESS:
+    case SLJIT_SIG_GREATER:
+    case SLJIT_OVERFLOW:
+    case SLJIT_MUL_OVERFLOW:
+        ins = MOVN | TA(OTHER_FLAG);
+        break;
+    case SLJIT_GREATER_EQUAL:
+    case SLJIT_LESS_EQUAL:
+    case SLJIT_SIG_GREATER_EQUAL:
+    case SLJIT_SIG_LESS_EQUAL:
+    case SLJIT_NOT_OVERFLOW:
+    case SLJIT_MUL_NOT_OVERFLOW:
+        ins = MOVZ | TA(OTHER_FLAG);
+        break;
+    case SLJIT_EQUAL_F64:
+    case SLJIT_LESS_F64:
+    case SLJIT_LESS_EQUAL_F64:
+    case SLJIT_UNORDERED_F64:
+        ins = MOVT;
+        break;
+    case SLJIT_NOT_EQUAL_F64:
+    case SLJIT_GREATER_EQUAL_F64:
+    case SLJIT_GREATER_F64:
+    case SLJIT_ORDERED_F64:
+        ins = MOVF;
+        break;
+    default:
+        ins = MOVZ | TA(OTHER_FLAG);
+        SLJIT_UNREACHABLE();
+        break;
+    }
+
+    return push_inst(compiler, ins | S(src) | D(dst_reg), DR(dst_reg));
+
+#else
+    return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
+#endif
+}
+
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
 {
     struct sljit_const *const_;


Modified: code/trunk/src/sljit/sljitNativePPC_32.c
===================================================================
--- code/trunk/src/sljit/sljitNativePPC_32.c    2017-04-22 14:35:14 UTC (rev 764)
+++ code/trunk/src/sljit/sljitNativePPC_32.c    2017-04-26 08:34:09 UTC (rev 765)
@@ -88,35 +88,42 @@


     case SLJIT_NEG:
         SLJIT_ASSERT(src1 == TMP_REG1);
-        return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2));
+        /* Setting XER SO is not enough, CR SO is also needed. */
+        return push_inst(compiler, NEG | OE((flags & ALT_FORM1) ? ALT_SET_FLAGS : 0) | RC(flags) | D(dst) | A(src2));


     case SLJIT_CLZ:
         SLJIT_ASSERT(src1 == TMP_REG1);
-        return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst));
+        return push_inst(compiler, CNTLZW | S(src2) | A(dst));


     case SLJIT_ADD:
         if (flags & ALT_FORM1) {
-            /* Flags does not set: BIN_IMM_EXTS unnecessary. */
-            SLJIT_ASSERT(src2 == TMP_REG2);
-            return push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm);
+            /* Setting XER SO is not enough, CR SO is also needed. */
+            return push_inst(compiler, ADD | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2));
         }
+
         if (flags & ALT_FORM2) {
             /* Flags does not set: BIN_IMM_EXTS unnecessary. */
             SLJIT_ASSERT(src2 == TMP_REG2);
-            return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm);
+
+            if (flags & ALT_FORM3)
+                return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm);
+
+            if (flags & ALT_FORM4) {
+                FAIL_IF(push_inst(compiler, ADDIS | D(dst) | A(src1) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1))));
+                src1 = dst;
+            }
+
+            return push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff));
         }
         if (flags & ALT_FORM3) {
             SLJIT_ASSERT(src2 == TMP_REG2);
             return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm);
         }
-        if (flags & ALT_FORM4) {
-            /* Flags does not set: BIN_IMM_EXTS unnecessary. */
-            FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff)));
-            return push_inst(compiler, ADDIS | D(dst) | A(dst) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1)));
-        }
         if (!(flags & ALT_SET_FLAGS))
             return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2));
-        return push_inst(compiler, ADDC | OERC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2));
+        if (flags & ALT_FORM4)
+            return push_inst(compiler, ADDC | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2));
+        return push_inst(compiler, ADD | RC(flags) | D(dst) | A(src1) | B(src2));


     case SLJIT_ADDC:
         return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2));
@@ -123,28 +130,42 @@


     case SLJIT_SUB:
         if (flags & ALT_FORM1) {
-            SLJIT_ASSERT(src2 == TMP_REG2);
-            return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm);
+            if (flags & ALT_FORM2) {
+                FAIL_IF(push_inst(compiler, CMPLI | CRD(0) | A(src1) | compiler->imm));
+                if (!(flags & ALT_FORM3))
+                    return SLJIT_SUCCESS;
+                return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff));
+            }
+            FAIL_IF(push_inst(compiler, CMPL | CRD(0) | A(src1) | B(src2)));
+            if (!(flags & ALT_FORM3))
+                return SLJIT_SUCCESS;
+            return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
         }
-        if (flags & (ALT_FORM2 | ALT_FORM3)) {
-            SLJIT_ASSERT(src2 == TMP_REG2);
-            return push_inst(compiler, ((flags & ALT_FORM2) ? CMPI : CMPLI) | CRD(0) | A(src1) | compiler->imm);
+
+        if (flags & ALT_FORM2) {
+            /* Setting XER SO is not enough, CR SO is also needed. */
+            return push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
         }
-        if (flags & (ALT_FORM4 | ALT_FORM5)) {
-            return push_inst(compiler, ((flags & ALT_FORM4) ? CMP : CMPL) | CRD(0) | A(src1) | B(src2));
-        }
-        if (flags & ALT_FORM6) {
+
+        if (flags & ALT_FORM3) {
+            /* Flags does not set: BIN_IMM_EXTS unnecessary. */
             SLJIT_ASSERT(src2 == TMP_REG2);
-            FAIL_IF(push_inst(compiler, CMPLI | CRD(0) | A(src1) | compiler->imm));
-            return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff));
+            return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm);
         }
-        if (flags & ALT_FORM7) {
-            FAIL_IF(push_inst(compiler, CMPL | CRD(0) | A(src1) | B(src2)));
-            return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
+
+        if (flags & ALT_FORM4) {
+            if (flags & ALT_FORM5) {
+                SLJIT_ASSERT(src2 == TMP_REG2);
+                return push_inst(compiler, CMPI | CRD(0) | A(src1) | compiler->imm);
+            }
+            return push_inst(compiler, CMP | CRD(0) | A(src1) | B(src2));
         }
+
         if (!(flags & ALT_SET_FLAGS))
             return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
-        return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
+        if (flags & ALT_FORM5)
+            return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
+        return push_inst(compiler, SUBF | RC(flags) | D(dst) | A(src2) | B(src1));


     case SLJIT_SUBC:
         return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1));
@@ -154,7 +175,7 @@
             SLJIT_ASSERT(src2 == TMP_REG2);
             return push_inst(compiler, MULLI | D(dst) | A(src1) | compiler->imm);
         }
-        return push_inst(compiler, MULLW | OERC(flags) | D(dst) | A(src2) | B(src1));
+        return push_inst(compiler, MULLW | OE(flags) | RC(flags) | D(dst) | A(src2) | B(src1));


     case SLJIT_AND:
         if (flags & ALT_FORM1) {


Modified: code/trunk/src/sljit/sljitNativePPC_64.c
===================================================================
--- code/trunk/src/sljit/sljitNativePPC_64.c    2017-04-22 14:35:14 UTC (rev 764)
+++ code/trunk/src/sljit/sljitNativePPC_64.c    2017-04-26 08:34:09 UTC (rev 765)
@@ -204,25 +204,51 @@


     case SLJIT_NEG:
         SLJIT_ASSERT(src1 == TMP_REG1);
+
+        if ((flags & (ALT_FORM1 | ALT_SIGN_EXT)) == (ALT_FORM1 | ALT_SIGN_EXT)) {
+            FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1)));
+            FAIL_IF(push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(TMP_REG2)));
+            return push_inst(compiler, RLDI(dst, dst, 32, 32, 0));
+        }
+
         UN_EXTS();
-        return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2));
+        /* Setting XER SO is not enough, CR SO is also needed. */
+        return push_inst(compiler, NEG | OE((flags & ALT_FORM1) ? ALT_SET_FLAGS : 0) | RC(flags) | D(dst) | A(src2));


     case SLJIT_CLZ:
         SLJIT_ASSERT(src1 == TMP_REG1);
         if (flags & ALT_FORM1)
-            return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst));
-        return push_inst(compiler, CNTLZD | RC(flags) | S(src2) | A(dst));
+            return push_inst(compiler, CNTLZW | S(src2) | A(dst));
+        return push_inst(compiler, CNTLZD | S(src2) | A(dst));


     case SLJIT_ADD:
         if (flags & ALT_FORM1) {
-            /* Flags does not set: BIN_IMM_EXTS unnecessary. */
-            SLJIT_ASSERT(src2 == TMP_REG2);
-            return push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm);
+            if (flags & ALT_SIGN_EXT) {
+                FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1)));
+                src1 = TMP_REG1;
+                FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1)));
+                src2 = TMP_REG2;
+            }
+            /* Setting XER SO is not enough, CR SO is also needed. */
+            FAIL_IF(push_inst(compiler, ADD | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)));
+            if (flags & ALT_SIGN_EXT)
+                return push_inst(compiler, RLDI(dst, dst, 32, 32, 0));
+            return SLJIT_SUCCESS;
         }
+
         if (flags & ALT_FORM2) {
             /* Flags does not set: BIN_IMM_EXTS unnecessary. */
             SLJIT_ASSERT(src2 == TMP_REG2);
-            return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm);
+
+            if (flags & ALT_FORM3)
+                return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm);
+
+            if (flags & ALT_FORM4) {
+                FAIL_IF(push_inst(compiler, ADDIS | D(dst) | A(src1) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1))));
+                src1 = dst;
+            }
+
+            return push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff));
         }
         if (flags & ALT_FORM3) {
             SLJIT_ASSERT(src2 == TMP_REG2);
@@ -229,15 +255,12 @@
             BIN_IMM_EXTS();
             return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm);
         }
-        if (flags & ALT_FORM4) {
-            /* Flags does not set: BIN_IMM_EXTS unnecessary. */
-            FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff)));
-            return push_inst(compiler, ADDIS | D(dst) | A(dst) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1)));
-        }
         if (!(flags & ALT_SET_FLAGS))
             return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2));
         BIN_EXTS();
-        return push_inst(compiler, ADDC | OERC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2));
+        if (flags & ALT_FORM4)
+            return push_inst(compiler, ADDC | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2));
+        return push_inst(compiler, ADD | RC(flags) | D(dst) | A(src1) | B(src2));


     case SLJIT_ADDC:
         BIN_EXTS();
@@ -245,30 +268,52 @@


     case SLJIT_SUB:
         if (flags & ALT_FORM1) {
+            if (flags & ALT_FORM2) {
+                FAIL_IF(push_inst(compiler, CMPLI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm));
+                if (!(flags & ALT_FORM3))
+                    return SLJIT_SUCCESS;
+                return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff));
+            }
+            FAIL_IF(push_inst(compiler, CMPL | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)));
+            if (!(flags & ALT_FORM3))
+                return SLJIT_SUCCESS;
+            return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
+        }
+
+        if (flags & ALT_FORM2) {
+            if (flags & ALT_SIGN_EXT) {
+                FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1)));
+                src1 = TMP_REG1;
+                FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1)));
+                src2 = TMP_REG2;
+            }
+            /* Setting XER SO is not enough, CR SO is also needed. */
+            FAIL_IF(push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)));
+            if (flags & ALT_SIGN_EXT)
+                return push_inst(compiler, RLDI(dst, dst, 32, 32, 0));
+            return SLJIT_SUCCESS;
+        }
+
+        if (flags & ALT_FORM3) {
             /* Flags does not set: BIN_IMM_EXTS unnecessary. */
             SLJIT_ASSERT(src2 == TMP_REG2);
             return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm);
         }
-        if (flags & (ALT_FORM2 | ALT_FORM3)) {
-            SLJIT_ASSERT(src2 == TMP_REG2);
-            return push_inst(compiler, ((flags & ALT_FORM2) ? CMPI : CMPLI) | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm);
+
+        if (flags & ALT_FORM4) {
+            if (flags & ALT_FORM5) {
+                SLJIT_ASSERT(src2 == TMP_REG2);
+                return push_inst(compiler, CMPI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm);
+            }
+            return push_inst(compiler, CMP | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2));
         }
-        if (flags & (ALT_FORM4 | ALT_FORM5)) {
-            return push_inst(compiler, ((flags & ALT_FORM4) ? CMP : CMPL) | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2));
-        }
-        if (flags & ALT_FORM6) {
-            SLJIT_ASSERT(src2 == TMP_REG2);
-            FAIL_IF(push_inst(compiler, CMPLI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm));
-            return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff));
-        }
-        if (flags & ALT_FORM7) {
-            FAIL_IF(push_inst(compiler, CMPL | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)));
-            return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
-        }
+
         if (!(flags & ALT_SET_FLAGS))
             return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
         BIN_EXTS();
-        return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
+        if (flags & ALT_FORM5)
+            return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
+        return push_inst(compiler, SUBF | RC(flags) | D(dst) | A(src2) | B(src1));


     case SLJIT_SUBC:
         BIN_EXTS();
@@ -281,8 +326,8 @@
         }
         BIN_EXTS();
         if (flags & ALT_FORM2)
-            return push_inst(compiler, MULLW | OERC(flags) | D(dst) | A(src2) | B(src1));
-        return push_inst(compiler, MULLD | OERC(flags) | D(dst) | A(src2) | B(src1));
+            return push_inst(compiler, MULLW | OE(flags) | RC(flags) | D(dst) | A(src2) | B(src1));
+        return push_inst(compiler, MULLD | OE(flags) | RC(flags) | D(dst) | A(src2) | B(src1));


     case SLJIT_AND:
         if (flags & ALT_FORM1) {


Modified: code/trunk/src/sljit/sljitNativePPC_common.c
===================================================================
--- code/trunk/src/sljit/sljitNativePPC_common.c    2017-04-22 14:35:14 UTC (rev 764)
+++ code/trunk/src/sljit/sljitNativePPC_common.c    2017-04-26 08:34:09 UTC (rev 765)
@@ -127,9 +127,9 @@


 /* Instruction bit sections.
    OE and Rc flag (see ALT_SET_FLAGS). */
-#define OERC(flags)    (((flags & ALT_SET_FLAGS) >> 10) | (flags & ALT_SET_FLAGS))
+#define OE(flags)    ((flags) & ALT_SET_FLAGS)
 /* Rc flag (see ALT_SET_FLAGS). */
-#define RC(flags)    ((flags & ALT_SET_FLAGS) >> 10)
+#define RC(flags)    (((flags) & ALT_SET_FLAGS) >> 10)
 #define HI(opcode)    ((opcode) << 26)
 #define LO(opcode)    ((opcode) << 1)


@@ -524,6 +524,26 @@
#endif
}

+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
+{
+    switch (feature_type) {
+    case SLJIT_HAS_FPU:
+#ifdef SLJIT_IS_FPU_AVAILABLE
+        return SLJIT_IS_FPU_AVAILABLE;
+#else
+        /* Available by default. */
+        return 1;
+#endif
+
+    case SLJIT_HAS_PRE_UPDATE:
+    case SLJIT_HAS_CLZ:
+        return 1;
+
+    default:
+        return 0;
+    }
+}
+
 /* --------------------------------------------------------------------- */
 /*  Entry, exit                                                          */
 /* --------------------------------------------------------------------- */
@@ -558,8 +578,6 @@
 #define ALT_FORM3    0x040000
 #define ALT_FORM4    0x080000
 #define ALT_FORM5    0x100000
-#define ALT_FORM6    0x200000
-#define ALT_FORM7    0x400000


 /* Source and destination is register. */
 #define REG_DEST    0x000001
@@ -574,7 +592,7 @@
 ALT_SET_FLAGS        0x000400
 ALT_FORM1        0x010000
 ...
-ALT_FORM7        0x400000 */
+ALT_FORM5        0x100000 */


 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
 #include "sljitNativePPC_32.c"
@@ -1142,7 +1160,7 @@
     sljit_s32 src1_r;
     sljit_s32 src2_r;
     sljit_s32 sugg_src2_r = TMP_REG2;
-    sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_FORM7 | ALT_SIGN_EXT | ALT_SET_FLAGS);
+    sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_SIGN_EXT | ALT_SET_FLAGS);


     if (!(input_flags & ALT_KEEP_CACHE)) {
         compiler->cache_arg = 0;
@@ -1350,6 +1368,8 @@
             flags |= INT_DATA | SIGNED_DATA;
             if (src & SLJIT_IMM)
                 srcw = (sljit_s32)srcw;
+            if (HAS_FLAGS(op_flags))
+                flags |= ALT_SIGN_EXT;
         }
 #endif
     }
@@ -1415,7 +1435,7 @@
         return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);


     case SLJIT_NEG:
-        return emit_op(compiler, SLJIT_NEG, flags, dst, dstw, TMP_REG1, 0, src, srcw);
+        return emit_op(compiler, SLJIT_NEG, flags | (GET_FLAG_TYPE(op_flags) ? ALT_FORM1 : 0), dst, dstw, TMP_REG1, 0, src, srcw);


     case SLJIT_CLZ:
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
@@ -1500,31 +1520,34 @@


     switch (GET_OPCODE(op)) {
     case SLJIT_ADD:
+        if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW || GET_FLAG_TYPE(op) == SLJIT_NOT_OVERFLOW)
+            return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w);
+
         if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
             if (TEST_SL_IMM(src2, src2w)) {
                 compiler->imm = src2w & 0xffff;
-                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
             }
             if (TEST_SL_IMM(src1, src1w)) {
                 compiler->imm = src1w & 0xffff;
-                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
             }
             if (TEST_SH_IMM(src2, src2w)) {
                 compiler->imm = (src2w >> 16) & 0xffff;
-                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
             }
             if (TEST_SH_IMM(src1, src1w)) {
                 compiler->imm = (src1w >> 16) & 0xffff;
-                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
             }
             /* Range between -1 and -32768 is covered above. */
             if (TEST_ADD_IMM(src2, src2w)) {
                 compiler->imm = src2w & 0xffffffff;
-                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
             }
             if (TEST_ADD_IMM(src1, src1w)) {
                 compiler->imm = src1w & 0xffffffff;
-                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0);
             }
         }
         if (HAS_FLAGS(op)) {
@@ -1537,56 +1560,55 @@
                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
             }
         }
-        return emit_op(compiler, SLJIT_ADD, flags, dst, dstw, src1, src1w, src2, src2w);
+        return emit_op(compiler, SLJIT_ADD, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM4 : 0), dst, dstw, src1, src1w, src2, src2w);


     case SLJIT_ADDC:
         return emit_op(compiler, SLJIT_ADDC, flags, dst, dstw, src1, src1w, src2, src2w);


     case SLJIT_SUB:
-        if (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_LESS_EQUAL)
-        {
-            if (dst == SLJIT_UNUSED)
-            {
+        if (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_LESS_EQUAL) {
+            if (dst == SLJIT_UNUSED) {
                 if (TEST_UL_IMM(src2, src2w)) {
                     compiler->imm = src2w & 0xffff;
-                    return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
+                    return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
                 }
-                return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM5, dst, dstw, src1, src1w, src2, src2w);
+                return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w);
             }


-            if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= (SIMM_MAX + 1))
-            {
+            if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= (SIMM_MAX + 1)) {
                 compiler->imm = src2w;
-                return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM6, dst, dstw, src1, src1w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
             }
-            return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM7, dst, dstw, src1, src1w, src2, src2w);
+            return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM3, dst, dstw, src1, src1w, src2, src2w);
         }


+        if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW || GET_FLAG_TYPE(op) == SLJIT_NOT_OVERFLOW)
+            return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, src2, src2w);
+
         if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
             if (TEST_SL_IMM(src2, -src2w)) {
                 compiler->imm = (-src2w) & 0xffff;
-                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
             }
             if (TEST_SL_IMM(src1, src1w)) {
                 compiler->imm = src1w & 0xffff;
-                return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
             }
             if (TEST_SH_IMM(src2, -src2w)) {
                 compiler->imm = ((-src2w) >> 16) & 0xffff;
-                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_ADD, flags |  ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
             }
             /* Range between -1 and -32768 is covered above. */
             if (TEST_ADD_IMM(src2, -src2w)) {
                 compiler->imm = -src2w & 0xffffffff;
-                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
             }
         }


-        if (dst == SLJIT_UNUSED && GET_FLAG_TYPE(op) != GET_FLAG_TYPE(SLJIT_SET_CARRY)
-                && GET_FLAG_TYPE(op) == SLJIT_OVERFLOW && GET_FLAG_TYPE(op) == SLJIT_NOT_OVERFLOW) {
+        if (dst == SLJIT_UNUSED && GET_FLAG_TYPE(op) != GET_FLAG_TYPE(SLJIT_SET_CARRY)) {
             if (TEST_SL_IMM(src2, src2w)) {
                 compiler->imm = src2w & 0xffff;
-                return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4 | ALT_FORM5, dst, dstw, src1, src1w, TMP_REG2, 0);
             }
             return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w);
         }
@@ -1596,7 +1618,7 @@
             return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
         }
         /* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
-        return emit_op(compiler, SLJIT_SUB, flags, dst, dstw, src1, src1w, src2, src2w);
+        return emit_op(compiler, SLJIT_SUB, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);


     case SLJIT_SUBC:
         return emit_op(compiler, SLJIT_SUBC, flags, dst, dstw, src1, src1w, src2, src2w);
@@ -1697,16 +1719,6 @@
 /*  Floating point operators                                             */
 /* --------------------------------------------------------------------- */


-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
-{
-#ifdef SLJIT_IS_FPU_AVAILABLE
-    return SLJIT_IS_FPU_AVAILABLE;
-#else
-    /* Available by default. */
-    return 1;
-#endif
-}
-
 #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 6))
 #define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double)


@@ -2369,6 +2381,16 @@
     return sljit_emit_op2(compiler, op | flags, dst, original_dstw, src, srcw, TMP_REG2, 0);
 }


+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+    sljit_s32 dst_reg,
+    sljit_s32 src, sljit_sw srcw)
+{
+    CHECK_ERROR();
+    CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+
+    return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);;
+}
+
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
 {
     struct sljit_const *const_;


Modified: code/trunk/src/sljit/sljitNativeSPARC_32.c
===================================================================
--- code/trunk/src/sljit/sljitNativeSPARC_32.c    2017-04-22 14:35:14 UTC (rev 764)
+++ code/trunk/src/sljit/sljitNativeSPARC_32.c    2017-04-26 08:34:09 UTC (rev 765)
@@ -80,11 +80,10 @@


     case SLJIT_CLZ:
         SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
-        /* sparc 32 does not support SLJIT_KEEP_FLAGS. Not sure I can fix this. */
         FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(src2) | S2(0), SET_FLAGS));
         FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2(src2), DR(TMP_REG1)));
         FAIL_IF(push_inst(compiler, BICC | DA(0x1) | (7 & DISP_MASK), UNMOVABLE_INS));
-        FAIL_IF(push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(0) | IMM(32), UNMOVABLE_INS | (flags & SET_FLAGS)));
+        FAIL_IF(push_inst(compiler, OR | D(dst) | S1(0) | IMM(32), UNMOVABLE_INS));
         FAIL_IF(push_inst(compiler, OR | D(dst) | S1(0) | IMM(-1), DR(dst)));


         /* Loop. */
@@ -91,7 +90,7 @@
         FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(0), SET_FLAGS));
         FAIL_IF(push_inst(compiler, SLL | D(TMP_REG1) | S1(TMP_REG1) | IMM(1), DR(TMP_REG1)));
         FAIL_IF(push_inst(compiler, BICC | DA(0xe) | (-2 & DISP_MASK), UNMOVABLE_INS));
-        return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS | (flags & SET_FLAGS));
+        return push_inst(compiler, ADD | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS);


     case SLJIT_ADD:
         return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));


Modified: code/trunk/src/sljit/sljitNativeSPARC_common.c
===================================================================
--- code/trunk/src/sljit/sljitNativeSPARC_common.c    2017-04-22 14:35:14 UTC (rev 764)
+++ code/trunk/src/sljit/sljitNativeSPARC_common.c    2017-04-26 08:34:09 UTC (rev 765)
@@ -394,6 +394,27 @@
     return code;
 }


+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
+{
+    switch (feature_type) {
+    case SLJIT_HAS_FPU:
+#ifdef SLJIT_IS_FPU_AVAILABLE
+        return SLJIT_IS_FPU_AVAILABLE;
+#else
+        /* Available by default. */
+        return 1;
+#endif
+
+#if (defined SLJIT_CONFIG_SPARC_64 && SLJIT_CONFIG_SPARC_64)
+    case SLJIT_HAS_CMOV:
+        return 1;
+#endif
+
+    default:
+        return 0;
+    }
+}
+
 /* --------------------------------------------------------------------- */
 /*  Entry, exit                                                          */
 /* --------------------------------------------------------------------- */
@@ -953,16 +974,6 @@
 /*  Floating point operators                                             */
 /* --------------------------------------------------------------------- */


-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
-{
-#ifdef SLJIT_IS_FPU_AVAILABLE
-    return SLJIT_IS_FPU_AVAILABLE;
-#else
-    /* Available by default. */
-    return 1;
-#endif
-}
-
 #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 7))
 #define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double)
 #define FLOAT_TMP_MEM_OFFSET (22 * sizeof(sljit_sw))
@@ -1426,6 +1437,20 @@
 #endif
 }


+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+    sljit_s32 dst_reg,
+    sljit_s32 src, sljit_sw srcw)
+{
+    CHECK_ERROR();
+    CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+    return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);;
+#else
+#error "Implementation required"
+#endif
+}
+
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
 {
     sljit_s32 reg;


Modified: code/trunk/src/sljit/sljitNativeTILEGX_64.c
===================================================================
--- code/trunk/src/sljit/sljitNativeTILEGX_64.c    2017-04-22 14:35:14 UTC (rev 764)
+++ code/trunk/src/sljit/sljitNativeTILEGX_64.c    2017-04-26 08:34:09 UTC (rev 765)
@@ -2487,11 +2487,6 @@
     return jump;
 }


-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
-{
-    return 0;
-}
-
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw)
 {
     SLJIT_UNREACHABLE();


Modified: code/trunk/src/sljit/sljitNativeX86_common.c
===================================================================
--- code/trunk/src/sljit/sljitNativeX86_common.c    2017-04-22 14:35:14 UTC (rev 764)
+++ code/trunk/src/sljit/sljitNativeX86_common.c    2017-04-26 08:34:09 UTC (rev 765)
@@ -588,6 +588,42 @@
     return (void*)(code + executable_offset);
 }


+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
+{
+    switch (feature_type) {
+    case SLJIT_HAS_FPU:
+#ifdef SLJIT_IS_FPU_AVAILABLE
+        return SLJIT_IS_FPU_AVAILABLE;
+#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+        if (cpu_has_sse2 == -1)
+            get_cpu_features();
+        return cpu_has_sse2;
+#else /* SLJIT_DETECT_SSE2 */
+        return 1;
+#endif /* SLJIT_DETECT_SSE2 */
+
+    case SLJIT_HAS_CLZ:
+        return 1;
+
+    case SLJIT_HAS_CMOV:
+        if (cpu_has_cmov == -1)
+            get_cpu_features();
+        return cpu_has_cmov;
+
+    case SLJIT_HAS_SSE2:
+#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+        if (cpu_has_sse2 == -1)
+            get_cpu_features();
+        return cpu_has_sse2;
+#else
+        return 1;
+#endif
+
+    default:
+        return 0;
+    }
+}
+
 /* --------------------------------------------------------------------- */
 /*  Operators                                                            */
 /* --------------------------------------------------------------------- */
@@ -1131,22 +1167,8 @@
     sljit_s32 dst_r;


     SLJIT_UNUSED_ARG(op_flags);
-    if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
-        /* Just set the zero flag. */
-        EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
-        inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
-        FAIL_IF(!inst);
-        *inst++ = GROUP_F7;
-        *inst |= NOT_rm;
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-        inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0);
-#else
-        inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, TMP_REG1, 0);
-#endif
-        FAIL_IF(!inst);
-        *inst |= SHR;
+    if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED))
         return SLJIT_SUCCESS;
-    }


     if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
         EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
@@ -2221,19 +2243,6 @@
     sse2_buffer[13] = 0x7fffffff;
 }


-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
-{
-#ifdef SLJIT_IS_FPU_AVAILABLE
-    return SLJIT_IS_FPU_AVAILABLE;
-#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
-    if (cpu_has_sse2 == -1)
-        get_cpu_features();
-    return cpu_has_sse2;
-#else /* SLJIT_DETECT_SSE2 */
-    return 1;
-#endif /* SLJIT_DETECT_SSE2 */
-}
-
 static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
     sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
 {
@@ -2776,6 +2785,46 @@
 #endif /* SLJIT_CONFIG_X86_64 */
 }


+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+    sljit_s32 dst_reg,
+    sljit_s32 src, sljit_sw srcw)
+{
+    sljit_u8* inst;
+
+    CHECK_ERROR();
+    CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+    dst_reg &= ~SLJIT_I32_OP;
+
+    if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3))
+        return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
+#else
+    if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
+        return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
+#endif
+
+    /* ADJUST_LOCAL_OFFSET is not needed. */
+    CHECK_EXTRA_REGS(src, srcw, (void)0);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+    compiler->mode32 = dst_reg & SLJIT_I32_OP;
+    dst_reg &= ~SLJIT_I32_OP;
+#endif
+
+    if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
+        EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
+        src = TMP_REG1;
+        srcw = 0;
+    }
+
+    inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
+    FAIL_IF(!inst);
+    *inst++ = GROUP_0F;
+    *inst = get_jump_code(type & 0xff) - 0x40;
+    return SLJIT_SUCCESS;
+}
+
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
 {
     CHECK_ERROR();
@@ -2869,74 +2918,3 @@
     SLJIT_UNUSED_ARG(executable_offset);
     sljit_unaligned_store_sw((void*)addr, new_constant);
 }
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void)
-{
-#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
-    if (cpu_has_sse2 == -1)
-        get_cpu_features();
-    return cpu_has_sse2;
-#else
-    return 1;
-#endif
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_cmov_available(void)
-{
-    if (cpu_has_cmov == -1)
-        get_cpu_features();
-    return cpu_has_cmov;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_emit_cmov(struct sljit_compiler *compiler,
-    sljit_s32 type,
-    sljit_s32 dst_reg,
-    sljit_s32 src, sljit_sw srcw)
-{
-    sljit_u8* inst;
-
-    CHECK_ERROR();
-#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
-    CHECK_ARGUMENT(sljit_x86_is_cmov_available());
-    CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP)));
-    CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64);
-    CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP));
-    FUNCTION_CHECK_SRC(src, srcw);
-
-    if ((type & 0xff) <= SLJIT_NOT_ZERO)
-        CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z);
-    else
-        CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff));
-#endif
-#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
-    if (SLJIT_UNLIKELY(!!compiler->verbose)) {
-        fprintf(compiler->verbose, "  x86_cmov%s %s%s, ",
-            !(dst_reg & SLJIT_I32_OP) ? "" : ".i",
-            jump_names[type & 0xff], JUMP_POSTFIX(type));
-        sljit_verbose_reg(compiler, dst_reg & ~SLJIT_I32_OP);
-        fprintf(compiler->verbose, ", ");
-        sljit_verbose_param(compiler, src, srcw);
-        fprintf(compiler->verbose, "\n");
-    }
-#endif
-
-    ADJUST_LOCAL_OFFSET(src, srcw);
-    CHECK_EXTRA_REGS(src, srcw, (void)0);
-
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-    compiler->mode32 = dst_reg & SLJIT_I32_OP;
-#endif
-    dst_reg &= ~SLJIT_I32_OP;
-
-    if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
-        EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
-        src = TMP_REG1;
-        srcw = 0;
-    }
-
-    inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
-    FAIL_IF(!inst);
-    *inst++ = GROUP_0F;
-    *inst = get_jump_code(type & 0xff) - 0x40;
-    return SLJIT_SUCCESS;
-}