[Pcre-svn] [1483] code/trunk: Major JIT compiler update.

トップ ページ
このメッセージを削除
著者: Subversion repository
日付:  
To: pcre-svn
題目: [Pcre-svn] [1483] code/trunk: Major JIT compiler update.
Revision: 1483
          http://vcs.pcre.org/viewvc?view=rev&revision=1483
Author:   zherczeg
Date:     2014-06-17 16:48:37 +0100 (Tue, 17 Jun 2014)


Log Message:
-----------
Major JIT compiler update.

Modified Paths:
--------------
    code/trunk/pcre_jit_compile.c
    code/trunk/sljit/sljitConfigInternal.h
    code/trunk/sljit/sljitLir.c
    code/trunk/sljit/sljitLir.h
    code/trunk/sljit/sljitNativeARM_32.c
    code/trunk/sljit/sljitNativeARM_64.c
    code/trunk/sljit/sljitNativeARM_T2_32.c
    code/trunk/sljit/sljitNativeMIPS_common.c
    code/trunk/sljit/sljitNativePPC_common.c
    code/trunk/sljit/sljitNativeSPARC_common.c
    code/trunk/sljit/sljitNativeX86_32.c
    code/trunk/sljit/sljitNativeX86_64.c
    code/trunk/sljit/sljitNativeX86_common.c


Modified: code/trunk/pcre_jit_compile.c
===================================================================
--- code/trunk/pcre_jit_compile.c    2014-05-28 11:10:58 UTC (rev 1482)
+++ code/trunk/pcre_jit_compile.c    2014-06-17 15:48:37 UTC (rev 1483)
@@ -477,14 +477,14 @@


 #define TMP1          SLJIT_SCRATCH_REG1
 #define TMP2          SLJIT_SCRATCH_REG3
-#define TMP3          SLJIT_TEMPORARY_EREG2
+#define TMP3          SLJIT_SCRATCH_EREG2
 #define STR_PTR       SLJIT_SAVED_REG1
 #define STR_END       SLJIT_SAVED_REG2
 #define STACK_TOP     SLJIT_SCRATCH_REG2
 #define STACK_LIMIT   SLJIT_SAVED_REG3
 #define ARGUMENTS     SLJIT_SAVED_EREG1
 #define COUNT_MATCH   SLJIT_SAVED_EREG2
-#define RETURN_ADDR   SLJIT_TEMPORARY_EREG1
+#define RETURN_ADDR   SLJIT_SCRATCH_EREG1


/* Local space layout. */
/* These two locals can be used by the current opcode. */

Modified: code/trunk/sljit/sljitConfigInternal.h
===================================================================
--- code/trunk/sljit/sljitConfigInternal.h    2014-05-28 11:10:58 UTC (rev 1482)
+++ code/trunk/sljit/sljitConfigInternal.h    2014-06-17 15:48:37 UTC (rev 1483)
@@ -34,7 +34,8 @@
    SLJIT_32BIT_ARCHITECTURE : 32 bit architecture
    SLJIT_64BIT_ARCHITECTURE : 64 bit architecture
    SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index
-   SLJIT_DOUBLE_SHIFT : the shift required to apply when accessing a double array by index
+   SLJIT_DOUBLE_SHIFT : the shift required to apply when accessing a double precision floating point array by index
+   SLJIT_SINGLE_SHIFT : the shift required to apply when accessing a single precision floating point array by index
    SLJIT_LITTLE_ENDIAN : little endian architecture
    SLJIT_BIG_ENDIAN : big endian architecture
    SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!)
@@ -142,6 +143,19 @@
 #undef SLJIT_EXECUTABLE_ALLOCATOR
 #endif


+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#define SLJIT_CONFIG_X86 1
+#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
+    || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
+#define SLJIT_CONFIG_ARM 1
+#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#define SLJIT_CONFIG_PPC 1
+#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+#define SLJIT_CONFIG_MIPS 1
+#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) || (defined SLJIT_CONFIG_SPARC_64 && SLJIT_CONFIG_SPARC_64)
+#define SLJIT_CONFIG_SPARC 1
+#endif
+
 #if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED)


/* These libraries are needed for the macros below. */
@@ -219,7 +233,7 @@

#ifndef SLJIT_CACHE_FLUSH

-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)

 /* Not required to implement on archs with unified caches. */
 #define SLJIT_CACHE_FLUSH(from, to)
@@ -240,7 +254,7 @@
 #define SLJIT_CACHE_FLUSH(from, to) \
     cacheflush((long)(from), (long)(to), 0)


-#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)

/* The __clear_cache() implementation of GCC is a dummy function on PowerPC. */
#define SLJIT_CACHE_FLUSH(from, to) \
@@ -314,6 +328,7 @@

/* Shift for double precision sized data. */
#define SLJIT_DOUBLE_SHIFT 3
+#define SLJIT_SINGLE_SHIFT 2

#ifndef SLJIT_W

@@ -418,22 +433,12 @@
#endif
#endif /* SLJIT_RETURN_ADDRESS_OFFSET */

-#ifndef SLJIT_SSE2
-
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-/* Turn on SSE2 support on x86. */
-#define SLJIT_SSE2 1
-
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 /* Auto detect SSE2 support using CPUID.
    On 64 bit x86 cpus, sse2 must be present. */
 #define SLJIT_DETECT_SSE2 1
 #endif


-#endif /* (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) */
-
-#endif /* !SLJIT_SSE2 */
-
#ifndef SLJIT_UNALIGNED

#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \

Modified: code/trunk/sljit/sljitLir.c
===================================================================
--- code/trunk/sljit/sljitLir.c    2014-05-28 11:10:58 UTC (rev 1482)
+++ code/trunk/sljit/sljitLir.c    2014-06-17 15:48:37 UTC (rev 1483)
@@ -117,7 +117,7 @@
 #define JUMP_ADDR    0x2
 /* SLJIT_REWRITABLE_JUMP is 0x1000. */


-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
 #    define PATCH_MB    0x4
 #    define PATCH_MW    0x8
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@@ -162,7 +162,7 @@
 #    define PATCH_ABS64    0x100
 #endif


-#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
 #    define IS_COND        0x004
 #    define IS_CALL        0x008
 #    define PATCH_B        0x010
@@ -174,7 +174,7 @@
 #    define REMOVE_COND    0x100
 #endif


-#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
 #    define IS_MOVABLE    0x004
 #    define IS_JAL        0x008
 #    define IS_CALL        0x010
@@ -254,7 +254,7 @@
 #ifdef _AIX
 #define FIXED_LOCALS_OFFSET ((6 + 8) * sizeof(sljit_sw))
 #else
-#define FIXED_LOCALS_OFFSET (2 * sizeof(sljit_sw))
+#define FIXED_LOCALS_OFFSET (3 * sizeof(sljit_sw))
 #endif
 #endif


@@ -307,15 +307,11 @@
#include "sljitExecAllocator.c"
#endif

-#if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) && !(defined SLJIT_SSE2 && SLJIT_SSE2)
-#error SLJIT_SSE2_AUTO cannot be enabled without SLJIT_SSE2
-#endif
-
 /* --------------------------------------------------------------------- */
 /*  Public functions                                                     */
 /* --------------------------------------------------------------------- */


-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || ((defined SLJIT_SSE2 && SLJIT_SSE2) && ((defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)))
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
 #define SLJIT_NEEDS_COMPILER_INIT 1
 static sljit_si compiler_initialized = 0;
 /* A thread safe initialization. */
@@ -382,7 +378,7 @@
     compiler->cpool_diff = 0xffffffff;
 #endif


-#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
     compiler->delay_slot = UNMOVABLE_INS;
 #endif


@@ -593,10 +589,6 @@
     case SLJIT_MUL: \
         SLJIT_ASSERT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))); \
         break; \
-    case SLJIT_CMPD: \
-        SLJIT_ASSERT(!(op & (SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \
-        SLJIT_ASSERT((op & (SLJIT_SET_E | SLJIT_SET_S))); \
-        break; \
     case SLJIT_ADD: \
         SLJIT_ASSERT(!(op & (SLJIT_SET_U | SLJIT_SET_S))); \
         break; \
@@ -625,6 +617,19 @@
         break; \
     }


+#define FUNCTION_CHECK_FOP() \
+    SLJIT_ASSERT(!GET_FLAGS(op) || !(op & SLJIT_KEEP_FLAGS)); \
+    switch (GET_OPCODE(op)) { \
+    case SLJIT_CMPD: \
+        SLJIT_ASSERT(!(op & (SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \
+        SLJIT_ASSERT((op & (SLJIT_SET_E | SLJIT_SET_S))); \
+        break; \
+    default: \
+        /* Only SLJIT_INT_OP or SLJIT_SINGLE_OP is allowed. */ \
+        SLJIT_ASSERT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \
+        break; \
+    }
+
 #define FUNCTION_CHECK_IS_REG(r) \
     ((r) == SLJIT_UNUSED || \
     ((r) >= SLJIT_SCRATCH_REG1 && (r) <= SLJIT_SCRATCH_REG1 - 1 + compiler->scratches) || \
@@ -669,6 +674,8 @@
 #define FUNCTION_FCHECK(p, i) \
     if ((p) >= SLJIT_FLOAT_REG1 && (p) <= SLJIT_FLOAT_REG6) \
         SLJIT_ASSERT(i == 0); \
+    else if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \
+        SLJIT_ASSERT((i) >= 0 && (i) < compiler->logical_local_size); \
     else if ((p) & SLJIT_MEM) { \
         SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & REG_MASK)); \
         if ((p) & OFFS_REG_MASK) { \
@@ -777,7 +784,9 @@
     (char*)"mul", (char*)"and", (char*)"or", (char*)"xor",
     (char*)"shl", (char*)"lshr", (char*)"ashr",
     /* fop1 */
-    (char*)"cmp", (char*)"mov", (char*)"neg", (char*)"abs",
+    (char*)"mov", (char*)"neg", (char*)"abs", (char*)"conv",
+    (char*)"conv", (char*)"conv", (char*)"conv", (char*)"conv",
+    (char*)"cmp",
     /* fop2 */
     (char*)"add", (char*)"sub", (char*)"mul", (char*)"div"
 };
@@ -1055,6 +1064,32 @@
     SLJIT_ASSERT(instruction);
 }


+#define SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw) \
+    SLJIT_ASSERT(sljit_is_fpu_available()); \
+    SLJIT_COMPILE_ASSERT(!(SLJIT_CONVW_FROMD & 0x1) && !(SLJIT_CONVD_FROMW & 0x1) && (SLJIT_MOVD < SLJIT_CONVW_FROMD), \
+        invalid_float_opcodes); \
+    if (GET_OPCODE(op) >= SLJIT_CONVW_FROMD) { \
+        if (GET_OPCODE(op) == SLJIT_CMPD) { \
+            check_sljit_emit_fop1_cmp(compiler, op, dst, dstw, src, srcw); \
+            ADJUST_LOCAL_OFFSET(dst, dstw); \
+            ADJUST_LOCAL_OFFSET(src, srcw); \
+            return sljit_emit_fop1_cmp(compiler, op, dst, dstw, src, srcw); \
+        } \
+        if ((GET_OPCODE(op) | 0x1) == SLJIT_CONVI_FROMD) { \
+            check_sljit_emit_fop1_convw_fromd(compiler, op, dst, dstw, src, srcw); \
+            ADJUST_LOCAL_OFFSET(dst, dstw); \
+            ADJUST_LOCAL_OFFSET(src, srcw); \
+            return sljit_emit_fop1_convw_fromd(compiler, op, dst, dstw, src, srcw); \
+        } \
+        check_sljit_emit_fop1_convd_fromw(compiler, op, dst, dstw, src, srcw); \
+        ADJUST_LOCAL_OFFSET(dst, dstw); \
+        ADJUST_LOCAL_OFFSET(src, srcw); \
+        return sljit_emit_fop1_convd_fromw(compiler, op, dst, dstw, src, srcw); \
+    } \
+    check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); \
+    ADJUST_LOCAL_OFFSET(dst, dstw); \
+    ADJUST_LOCAL_OFFSET(src, srcw);
+
 static SLJIT_INLINE void check_sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
     sljit_si dst, sljit_sw dstw,
     sljit_si src, sljit_sw srcw)
@@ -1074,25 +1109,139 @@
     }
 #endif


-    SLJIT_ASSERT(sljit_is_fpu_available());
-    SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_CMPD && GET_OPCODE(op) <= SLJIT_ABSD);
+    SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_MOVD && GET_OPCODE(op) <= SLJIT_CONVD_FROMS);
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
-    FUNCTION_CHECK_OP();
+    FUNCTION_CHECK_FOP();
     FUNCTION_FCHECK(src, srcw);
     FUNCTION_FCHECK(dst, dstw);
 #endif
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+        fprintf(compiler->verbose, "  %s%s ", op_names[GET_OPCODE(op)],
+            (GET_OPCODE(op) == SLJIT_CONVD_FROMS)
+            ? ((op & SLJIT_SINGLE_OP) ? "s.fromd" : "d.froms")
+            : ((op & SLJIT_SINGLE_OP) ? "s" : "d"));
+        sljit_verbose_fparam(dst, dstw);
+        fprintf(compiler->verbose, ", ");
+        sljit_verbose_fparam(src, srcw);
+        fprintf(compiler->verbose, "\n");
+    }
+#endif
+}
+
+static SLJIT_INLINE void check_sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si src1, sljit_sw src1w,
+    sljit_si src2, sljit_sw src2w)
+{
+    /* If debug and verbose are disabled, all arguments are unused. */
+    SLJIT_UNUSED_ARG(compiler);
+    SLJIT_UNUSED_ARG(op);
+    SLJIT_UNUSED_ARG(src1);
+    SLJIT_UNUSED_ARG(src1w);
+    SLJIT_UNUSED_ARG(src2);
+    SLJIT_UNUSED_ARG(src2w);
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+        compiler->skip_checks = 0;
+        return;
+    }
+#endif
+
+    SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_CMPD);
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    FUNCTION_CHECK_FOP();
+    FUNCTION_FCHECK(src1, src1w);
+    FUNCTION_FCHECK(src2, src2w);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+    if (SLJIT_UNLIKELY(!!compiler->verbose)) {
         fprintf(compiler->verbose, "  %s%s%s%s ", op_names[GET_OPCODE(op)], (op & SLJIT_SINGLE_OP) ? "s" : "d",
             !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_S) ? "" : ".s");
-        sljit_verbose_fparam(dst, dstw);
+        sljit_verbose_fparam(src1, src1w);
         fprintf(compiler->verbose, ", ");
+        sljit_verbose_fparam(src2, src2w);
+        fprintf(compiler->verbose, "\n");
+    }
+#endif
+}
+
+static SLJIT_INLINE void check_sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si dst, sljit_sw dstw,
+    sljit_si src, sljit_sw srcw)
+{
+    /* If debug and verbose are disabled, all arguments are unused. */
+    SLJIT_UNUSED_ARG(compiler);
+    SLJIT_UNUSED_ARG(op);
+    SLJIT_UNUSED_ARG(dst);
+    SLJIT_UNUSED_ARG(dstw);
+    SLJIT_UNUSED_ARG(src);
+    SLJIT_UNUSED_ARG(srcw);
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+        compiler->skip_checks = 0;
+        return;
+    }
+#endif
+
+    SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_CONVW_FROMD && GET_OPCODE(op) <= SLJIT_CONVI_FROMD);
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    FUNCTION_CHECK_FOP();
+    FUNCTION_FCHECK(src, srcw);
+    FUNCTION_CHECK_DST(dst, dstw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+    if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+        fprintf(compiler->verbose, "  %s%s.from%s ", op_names[GET_OPCODE(op)],
+            (GET_OPCODE(op) == SLJIT_CONVI_FROMD) ? "i" : "w",
+            (op & SLJIT_SINGLE_OP) ? "s" : "d");
+        sljit_verbose_param(dst, dstw);
+        fprintf(compiler->verbose, ", ");
         sljit_verbose_fparam(src, srcw);
         fprintf(compiler->verbose, "\n");
     }
 #endif
 }


+static SLJIT_INLINE void check_sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si dst, sljit_sw dstw,
+    sljit_si src, sljit_sw srcw)
+{
+    /* If debug and verbose are disabled, all arguments are unused. */
+    SLJIT_UNUSED_ARG(compiler);
+    SLJIT_UNUSED_ARG(op);
+    SLJIT_UNUSED_ARG(dst);
+    SLJIT_UNUSED_ARG(dstw);
+    SLJIT_UNUSED_ARG(src);
+    SLJIT_UNUSED_ARG(srcw);
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+        compiler->skip_checks = 0;
+        return;
+    }
+#endif
+
+    SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_CONVD_FROMW && GET_OPCODE(op) <= SLJIT_CONVD_FROMI);
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    FUNCTION_CHECK_FOP();
+    FUNCTION_CHECK_SRC(src, srcw);
+    FUNCTION_FCHECK(dst, dstw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+    if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+        fprintf(compiler->verbose, "  %s%s.from%s ", op_names[GET_OPCODE(op)],
+            (op & SLJIT_SINGLE_OP) ? "s" : "d",
+            (GET_OPCODE(op) == SLJIT_CONVD_FROMI) ? "i" : "w");
+        sljit_verbose_fparam(dst, dstw);
+        fprintf(compiler->verbose, ", ");
+        sljit_verbose_param(src, srcw);
+        fprintf(compiler->verbose, "\n");
+    }
+#endif
+}
+
 static SLJIT_INLINE void check_sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
     sljit_si dst, sljit_sw dstw,
     sljit_si src1, sljit_sw src1w,
@@ -1111,7 +1260,7 @@
     SLJIT_ASSERT(sljit_is_fpu_available());
     SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_ADDD && GET_OPCODE(op) <= SLJIT_DIVD);
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
-    FUNCTION_CHECK_OP();
+    FUNCTION_CHECK_FOP();
     FUNCTION_FCHECK(src1, src1w);
     FUNCTION_FCHECK(src2, src2w);
     FUNCTION_FCHECK(dst, dstw);
@@ -1374,10 +1523,8 @@


#define SLJIT_CPUINFO SLJIT_CPUINFO_PART1 SLJIT_CPUINFO_PART2 SLJIT_CPUINFO_PART3

-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
 #    include "sljitNativeX86_common.c"
-#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-#    include "sljitNativeX86_common.c"
 #elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
 #    include "sljitNativeARM_32.c"
 #elif (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
@@ -1386,21 +1533,17 @@
 #    include "sljitNativeARM_T2_32.c"
 #elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
 #    include "sljitNativeARM_64.c"
-#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
 #    include "sljitNativePPC_common.c"
-#elif (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
-#    include "sljitNativePPC_common.c"
-#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
 #    include "sljitNativeMIPS_common.c"
-#elif (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
-#    include "sljitNativeMIPS_common.c"
-#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC)
 #    include "sljitNativeSPARC_common.c"
 #elif (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
 #    include "sljitNativeTILEGX_64.c"
 #endif


-#if !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) && !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+#if !(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)

 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type,
     sljit_si src1, sljit_sw src1w,
@@ -1508,7 +1651,7 @@


#endif

-#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) && !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)

SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
{

Modified: code/trunk/sljit/sljitLir.h
===================================================================
--- code/trunk/sljit/sljitLir.h    2014-05-28 11:10:58 UTC (rev 1482)
+++ code/trunk/sljit/sljitLir.h    2014-06-17 15:48:37 UTC (rev 1483)
@@ -114,8 +114,8 @@
 /* Note: extra registers cannot be used for memory addressing. */
 /* Note: on x86-32, these registers are emulated (using stack
    loads & stores). */
-#define SLJIT_TEMPORARY_EREG1    4
-#define SLJIT_TEMPORARY_EREG2    5
+#define SLJIT_SCRATCH_EREG1    4
+#define SLJIT_SCRATCH_EREG2    5


 /* Saved registers whose preserve their values across function calls. */
 #define SLJIT_SAVED_REG1    6
@@ -238,7 +238,7 @@
     sljit_si mode32;
 #endif


-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
     sljit_si flags_saved;
 #endif


@@ -271,13 +271,13 @@
     sljit_sw cache_argw;
 #endif


-#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
     sljit_sw imm;
     sljit_si cache_arg;
     sljit_sw cache_argw;
 #endif


-#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
     sljit_si delay_slot;
     sljit_si cache_arg;
     sljit_sw cache_argw;
@@ -738,37 +738,55 @@


SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void);

-/* Note: dst is the left and src is the right operand for SLJIT_FCMP.
-   Note: NaN check is always performed. If SLJIT_C_FLOAT_UNORDERED is set,
-         the comparison result is unpredictable.
-   Flags: SP | E | S (see SLJIT_C_FLOAT_*) */
-#define SLJIT_CMPD            36
-#define SLJIT_CMPS            (SLJIT_CMPD | SLJIT_SINGLE_OP)
 /* Flags: SP - (never set any flags) */
-#define SLJIT_MOVD            37
+#define SLJIT_MOVD            36
 #define SLJIT_MOVS            (SLJIT_MOVD | SLJIT_SINGLE_OP)
 /* Flags: SP - (never set any flags) */
-#define SLJIT_NEGD            38
+#define SLJIT_NEGD            37
 #define SLJIT_NEGS            (SLJIT_NEGD | SLJIT_SINGLE_OP)
 /* Flags: SP - (never set any flags) */
-#define SLJIT_ABSD            39
+#define SLJIT_ABSD            38
 #define SLJIT_ABSS            (SLJIT_ABSD | SLJIT_SINGLE_OP)
+/* Convert opcodes: CONV[DST_TYPE].FROM[SRC_TYPE]
+   SRC/DST TYPE can be: D - double, S - single, W - signed word, I - signed int
+   Rounding mode when the destination is W or I: round towards zero. */
+/* Flags: SP - (never set any flags) */
+#define SLJIT_CONVD_FROMS        39
+#define SLJIT_CONVS_FROMD        (SLJIT_CONVD_FROMS | SLJIT_SINGLE_OP)
+/* Flags: SP - (never set any flags) */
+#define SLJIT_CONVW_FROMD        40
+#define SLJIT_CONVW_FROMS        (SLJIT_CONVW_FROMD | SLJIT_SINGLE_OP)
+/* Flags: SP - (never set any flags) */
+#define SLJIT_CONVI_FROMD        41
+#define SLJIT_CONVI_FROMS        (SLJIT_CONVI_FROMD | SLJIT_SINGLE_OP)
+/* Flags: SP - (never set any flags) */
+#define SLJIT_CONVD_FROMW        42
+#define SLJIT_CONVS_FROMW        (SLJIT_CONVD_FROMW | SLJIT_SINGLE_OP)
+/* Flags: SP - (never set any flags) */
+#define SLJIT_CONVD_FROMI        43
+#define SLJIT_CONVS_FROMI        (SLJIT_CONVD_FROMI | SLJIT_SINGLE_OP)
+/* Note: dst is the left and src is the right operand for SLJIT_CMPD.
+   Note: NaN check is always performed. If SLJIT_C_FLOAT_UNORDERED flag
+         is set, the comparison result is unpredictable.
+   Flags: SP | E | S (see SLJIT_C_FLOAT_*) */
+#define SLJIT_CMPD            44
+#define SLJIT_CMPS            (SLJIT_CMPD | SLJIT_SINGLE_OP)


 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
     sljit_si dst, sljit_sw dstw,
     sljit_si src, sljit_sw srcw);


 /* Flags: SP - (never set any flags) */
-#define SLJIT_ADDD            40
+#define SLJIT_ADDD            45
 #define SLJIT_ADDS            (SLJIT_ADDD | SLJIT_SINGLE_OP)
 /* Flags: SP - (never set any flags) */
-#define SLJIT_SUBD            41
+#define SLJIT_SUBD            46
 #define SLJIT_SUBS            (SLJIT_SUBD | SLJIT_SINGLE_OP)
 /* Flags: SP - (never set any flags) */
-#define SLJIT_MULD            42
+#define SLJIT_MULD            47
 #define SLJIT_MULS            (SLJIT_MULD | SLJIT_SINGLE_OP)
 /* Flags: SP - (never set any flags) */
-#define SLJIT_DIVD            43
+#define SLJIT_DIVD            48
 #define SLJIT_DIVS            (SLJIT_DIVD | SLJIT_SINGLE_OP)


SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
@@ -912,7 +930,7 @@
/* --------------------------------------------------------------------- */

 #define SLJIT_MAJOR_VERSION    0
-#define SLJIT_MINOR_VERSION    91
+#define SLJIT_MINOR_VERSION    92


 /* Get the human readable name of the platform. Can be useful on platforms
    like ARM, where ARM and Thumb2 functions can be mixed, and


Modified: code/trunk/sljit/sljitNativeARM_32.c
===================================================================
--- code/trunk/sljit/sljitNativeARM_32.c    2014-05-28 11:10:58 UTC (rev 1482)
+++ code/trunk/sljit/sljitNativeARM_32.c    2014-06-17 15:48:37 UTC (rev 1483)
@@ -102,8 +102,12 @@
 #define VABS_F32    0xeeb00ac0
 #define VADD_F32    0xee300a00
 #define VCMP_F32    0xeeb40a40
+#define VCVT_F32_S32    0xeeb80ac0
+#define VCVT_F64_F32    0xeeb70ac0
+#define VCVT_S32_F32    0xeebd0ac0
 #define VDIV_F32    0xee800a00
 #define VMOV_F32    0xeeb00a40
+#define VMOV        0xee000a10
 #define VMRS        0xeef1fa10
 #define VMUL_F32    0xee200a00
 #define VNEG_F32    0xeeb10a40
@@ -810,9 +814,6 @@
 #define SIGNED_DATA    0x40
 #define LOAD_DATA    0x80


-#define EMIT_INSTRUCTION(inst) \
-    FAIL_IF(push_inst(compiler, (inst)))
-
 /* Condition: AL. */
 #define EMIT_DATA_PROCESS_INS(opcode, set_flags, dst, src1, src2) \
     (0xe0000000 | ((opcode) << 21) | (set_flags) | RD(dst) | RN(src1) | (src2))
@@ -853,7 +854,7 @@
         push |= 1 << 5;
     if (saveds >= 1)
         push |= 1 << 4;
-    EMIT_INSTRUCTION(push);
+    FAIL_IF(push_inst(compiler, push));


     /* Stack must be aligned to 8 bytes: */
     size = (1 + saveds) * sizeof(sljit_uw);
@@ -867,11 +868,11 @@
         FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size));


     if (args >= 1)
-        EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG1, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG1)));
+        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG1, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG1))));
     if (args >= 2)
-        EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG2, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG2)));
+        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG2, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG2))));
     if (args >= 3)
-        EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG3, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG3)));
+        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG3, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG3))));


     return SLJIT_SUCCESS;
 }
@@ -1031,7 +1032,7 @@
 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
             if (op == SLJIT_MOV_UB)
                 return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff));
-            EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2]));
+            FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2])));
             return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_UB ? 0x20 : 0x40) | reg_map[dst]));
 #else
             return push_inst(compiler, (op == SLJIT_MOV_UB ? UXTB : SXTB) | RD(dst) | RM(src2));
@@ -1050,7 +1051,7 @@
         SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
         if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) {
 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
-            EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2]));
+            FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2])));
             return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_UH ? 0x20 : 0x40) | reg_map[dst]));
 #else
             return push_inst(compiler, (op == SLJIT_MOV_UH ? UXTH : SXTH) | RD(dst) | RM(src2));
@@ -1303,8 +1304,8 @@
             return 0;
     }


-    EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(positive ? MOV_DP : MVN_DP, 0, reg, SLJIT_UNUSED, imm1));
-    EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(positive ? ORR_DP : BIC_DP, 0, reg, reg, imm2));
+    FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? MOV_DP : MVN_DP, 0, reg, SLJIT_UNUSED, imm1)));
+    FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? ORR_DP : BIC_DP, 0, reg, reg, imm2)));
     return 1;
 }
 #endif
@@ -1320,16 +1321,12 @@


     /* Create imm by 1 inst. */
     tmp = get_imm(imm);
-    if (tmp) {
-        EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, tmp));
-        return SLJIT_SUCCESS;
-    }
+    if (tmp)
+        return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, tmp));


     tmp = get_imm(~imm);
-    if (tmp) {
-        EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, tmp));
-        return SLJIT_SUCCESS;
-    }
+    if (tmp)
+        return push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, tmp));


 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
     /* Create imm by 2 inst. */
@@ -1369,14 +1366,14 @@
         if (imm) {
             if (inp_flags & ARG_TEST)
                 return 1;
-            EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, imm));
+            FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, imm)));
             return -1;
         }
         imm = get_imm(~argw);
         if (imm) {
             if (inp_flags & ARG_TEST)
                 return 1;
-            EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, imm));
+            FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, imm)));
             return -1;
         }
         return 0;
@@ -1394,8 +1391,8 @@


         if (inp_flags & ARG_TEST)
             return 1;
-        EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK,
-            RM(OFFS_REG(arg)) | (IS_TYPE1_TRANSFER(inp_flags) ? SRC2_IMM : 0) | ((argw & 0x3) << 7)));
+        FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK,
+            RM(OFFS_REG(arg)) | (IS_TYPE1_TRANSFER(inp_flags) ? SRC2_IMM : 0) | ((argw & 0x3) << 7))));
         return -1;
     }


@@ -1403,13 +1400,13 @@
         if (argw >= 0 && argw <= 0xfff) {
             if (inp_flags & ARG_TEST)
                 return 1;
-            EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, argw));
+            FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, argw)));
             return -1;
         }
         if (argw < 0 && argw >= -0xfff) {
             if (inp_flags & ARG_TEST)
                 return 1;
-            EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, -argw));
+            FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, -argw)));
             return -1;
         }
     }
@@ -1417,14 +1414,14 @@
         if (argw >= 0 && argw <= 0xff) {
             if (inp_flags & ARG_TEST)
                 return 1;
-            EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw)));
+            FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw))));
             return -1;
         }
         if (argw < 0 && argw >= -0xff) {
             if (inp_flags & ARG_TEST)
                 return 1;
             argw = -argw;
-            EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw)));
+            FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw))));
             return -1;
         }
     }
@@ -1477,7 +1474,7 @@
             /* This can only happen for stores */ \
             /* since ldr reg, [reg, ...]! has no meaning */ \
             SLJIT_ASSERT(!(inp_flags & LOAD_DATA)); \
-            EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(reg))); \
+            FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(reg)))); \
             reg = TMP_REG3; \
         } \
     }
@@ -1537,9 +1534,8 @@
         SLJIT_ASSERT((argw & 0x3) && !(max_delta & 0xf00));
         if (inp_flags & WRITE_BACK)
             tmp_r = arg & REG_MASK;
-        EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)));
-        EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, 0, reg, tmp_r, TYPE2_TRANSFER_IMM(0)));
-        return SLJIT_SUCCESS;
+        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))));
+        return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, 0, reg, tmp_r, TYPE2_TRANSFER_IMM(0)));
     }


     imm = (sljit_uw)(argw - compiler->cache_argw);
@@ -1558,7 +1554,7 @@
     imm = get_imm(argw & ~max_delta);
     if (imm) {
         TEST_WRITE_BACK();
-        EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, imm));
+        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, imm)));
         GETPUT_ARG_DATA_TRANSFER(1, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta);
         return SLJIT_SUCCESS;
     }
@@ -1567,15 +1563,14 @@
     if (imm) {
         argw = -argw;
         TEST_WRITE_BACK();
-        EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(SUB_DP, 0, tmp_r, arg & REG_MASK, imm));
+        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, tmp_r, arg & REG_MASK, imm)));
         GETPUT_ARG_DATA_TRANSFER(0, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta);
         return SLJIT_SUCCESS;
     }


     if ((compiler->cache_arg & SLJIT_IMM) && compiler->cache_argw == argw) {
         TEST_WRITE_BACK();
-        EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
-        return SLJIT_SUCCESS;
+        return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
     }


     if (argw == next_argw && (next_arg & SLJIT_MEM)) {
@@ -1586,15 +1581,14 @@
         compiler->cache_argw = argw;


         TEST_WRITE_BACK();
-        EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
-        return SLJIT_SUCCESS;
+        return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
     }


     imm = (sljit_uw)(argw - next_argw);
     if (arg == next_arg && !(inp_flags & WRITE_BACK) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) {
         SLJIT_ASSERT(inp_flags & LOAD_DATA);
         FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
-        EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, TMP_REG3, reg_map[arg & REG_MASK]));
+        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, TMP_REG3, reg_map[arg & REG_MASK])));


         compiler->cache_arg = arg;
         compiler->cache_argw = argw;
@@ -1610,8 +1604,7 @@
     }


     FAIL_IF(load_immediate(compiler, tmp_r, argw));
-    EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, reg_map[tmp_r] | (max_delta & 0xf00 ? SRC2_IMM : 0)));
-    return SLJIT_SUCCESS;
+    return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, reg_map[tmp_r] | (max_delta & 0xf00 ? SRC2_IMM : 0)));
 }


 static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
@@ -1843,10 +1836,10 @@
     op = GET_OPCODE(op);
     switch (op) {
     case SLJIT_BREAKPOINT:
-        EMIT_INSTRUCTION(BKPT);
+        FAIL_IF(push_inst(compiler, BKPT));
         break;
     case SLJIT_NOP:
-        EMIT_INSTRUCTION(NOP);
+        FAIL_IF(push_inst(compiler, NOP));
         break;
     case SLJIT_UMUL:
     case SLJIT_SMUL:
@@ -1857,7 +1850,7 @@
             | (reg_map[SLJIT_SCRATCH_REG1] << 8)
             | reg_map[SLJIT_SCRATCH_REG2]);
 #else
-        EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG2)));
+        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG2))));
         return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL)
             | (reg_map[SLJIT_SCRATCH_REG2] << 16)
             | (reg_map[SLJIT_SCRATCH_REG1] << 12)
@@ -1867,7 +1860,7 @@
     case SLJIT_UDIV:
     case SLJIT_SDIV:
         if (compiler->scratches >= 3)
-            EMIT_INSTRUCTION(0xe52d2008 /* str r2, [sp, #-8]! */);
+            FAIL_IF(push_inst(compiler, 0xe52d2008 /* str r2, [sp, #-8]! */));
 #if defined(__GNUC__)
         FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
             (op == SLJIT_UDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
@@ -2064,7 +2057,7 @@
     SLJIT_ASSERT(arg & SLJIT_MEM);


     if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
-        EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)));
+        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))));
         arg = SLJIT_MEM | TMP_REG1;
         argw = 0;
     }
@@ -2097,13 +2090,13 @@
         }
         imm = get_imm(argw & ~0x3fc);
         if (imm) {
-            EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, imm));
+            FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, imm)));
             return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, (argw & 0x3fc) >> 2));
         }
         imm = get_imm(-argw & ~0x3fc);
         if (imm) {
             argw = -argw;
-            EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG1, arg & REG_MASK, imm));
+            FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG1, arg & REG_MASK, imm)));
             return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG1, reg, (argw & 0x3fc) >> 2));
         }
     }
@@ -2112,7 +2105,7 @@
     compiler->cache_argw = argw;
     if (arg & REG_MASK) {
         FAIL_IF(load_immediate(compiler, TMP_REG1, argw));
-        EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, arg & REG_MASK, reg_map[TMP_REG1]));
+        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, arg & REG_MASK, reg_map[TMP_REG1])));
     }
     else
         FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
@@ -2120,60 +2113,114 @@
     return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0));
 }


+static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si dst, sljit_sw dstw,
+    sljit_si src, sljit_sw srcw)
+{
+    if (src & SLJIT_MEM) {
+        FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src, srcw));
+        src = TMP_FREG1;
+    }
+
+    FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_SINGLE_OP, TMP_FREG1, src, 0)));
+
+    if (dst == SLJIT_UNUSED)
+        return SLJIT_SUCCESS;
+
+    if (FAST_IS_REG(dst))
+        return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (TMP_FREG1 << 16));
+
+    /* Store the integer value from a VFP register. */
+    return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si dst, sljit_sw dstw,
+    sljit_si src, sljit_sw srcw)
+{
+    sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+    if (FAST_IS_REG(src))
+        FAIL_IF(push_inst(compiler, VMOV | RD(src) | (TMP_FREG1 << 16)));
+    else if (src & SLJIT_MEM) {
+        /* Load the integer value into a VFP register. */
+        FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
+    }
+    else {
+        FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+        FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | (TMP_FREG1 << 16)));
+    }
+
+    FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_SINGLE_OP, dst_r, TMP_FREG1, 0)));
+
+    if (dst & SLJIT_MEM)
+        return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw);
+    return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si src1, sljit_sw src1w,
+    sljit_si src2, sljit_sw src2w)
+{
+    if (src1 & SLJIT_MEM) {
+        FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w));
+        src1 = TMP_FREG1;
+    }
+
+    if (src2 & SLJIT_MEM) {
+        FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w));
+        src2 = TMP_FREG2;
+    }
+
+    FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_SINGLE_OP, src1, src2, 0)));
+    return push_inst(compiler, VMRS);
+}
+
 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
     sljit_si dst, sljit_sw dstw,
     sljit_si src, sljit_sw srcw)
 {
-    sljit_si dst_fr;
+    sljit_si dst_r;


     CHECK_ERROR();
-    check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
-    SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error);
-
     compiler->cache_arg = 0;
     compiler->cache_argw = 0;
-    op ^= SLJIT_SINGLE_OP;
+    if (GET_OPCODE(op) != SLJIT_CONVD_FROMS)
+        op ^= SLJIT_SINGLE_OP;


-    if (GET_OPCODE(op) == SLJIT_CMPD) {
-        if (dst & SLJIT_MEM) {
-            FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, dst, dstw));
-            dst = TMP_FREG1;
-        }
-        if (src & SLJIT_MEM) {
-            FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src, srcw));
-            src = TMP_FREG2;
-        }
-        EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_SINGLE_OP, dst, src, 0));
-        EMIT_INSTRUCTION(VMRS);
-        return SLJIT_SUCCESS;
-    }
+    SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error);
+    SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);


-    dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+    dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;


     if (src & SLJIT_MEM) {
-        FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_fr, src, srcw));
-        src = dst_fr;
+        FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_r, src, srcw));
+        src = dst_r;
     }


     switch (GET_OPCODE(op)) {
-        case SLJIT_MOVD:
-            if (src != dst_fr && dst_fr != TMP_FREG1)
-                EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_SINGLE_OP, dst_fr, src, 0));
-            break;
-        case SLJIT_NEGD:
-            EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_SINGLE_OP, dst_fr, src, 0));
-            break;
-        case SLJIT_ABSD:
-            EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_SINGLE_OP, dst_fr, src, 0));
-            break;
+    case SLJIT_MOVD:
+        if (src != dst_r) {
+            if (dst_r != TMP_FREG1)
+                FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0)));
+            else
+                dst_r = src;
+        }
+        break;
+    case SLJIT_NEGD:
+        FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0)));
+        break;
+    case SLJIT_ABSD:
+        FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0)));
+        break;
+    case SLJIT_CONVD_FROMS:
+        FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0)));
+        op ^= SLJIT_SINGLE_OP;
+        break;
     }


-    if (dst_fr == TMP_FREG1) {
-        if (GET_OPCODE(op) == SLJIT_MOVD)
-            dst_fr = src;
-        FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), dst_fr, dst, dstw));
-    }
-
+    if (dst & SLJIT_MEM)
+        return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), dst_r, dst, dstw);
     return SLJIT_SUCCESS;
 }


@@ -2182,16 +2229,19 @@
     sljit_si src1, sljit_sw src1w,
     sljit_si src2, sljit_sw src2w)
 {
-    sljit_si dst_fr;
+    sljit_si dst_r;


     CHECK_ERROR();
     check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+    ADJUST_LOCAL_OFFSET(dst, dstw);
+    ADJUST_LOCAL_OFFSET(src1, src1w);
+    ADJUST_LOCAL_OFFSET(src2, src2w);


     compiler->cache_arg = 0;
     compiler->cache_argw = 0;
     op ^= SLJIT_SINGLE_OP;


-    dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+    dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;


     if (src2 & SLJIT_MEM) {
         FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w));
@@ -2205,23 +2255,23 @@


     switch (GET_OPCODE(op)) {
     case SLJIT_ADDD:
-        EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1));
+        FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1)));
         break;


     case SLJIT_SUBD:
-        EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1));
+        FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1)));
         break;


     case SLJIT_MULD:
-        EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1));
+        FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1)));
         break;


     case SLJIT_DIVD:
-        EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1));
+        FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1)));
         break;
     }


-    if (dst_fr == TMP_FREG1)
+    if (dst_r == TMP_FREG1)
         FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw));


     return SLJIT_SUCCESS;
@@ -2252,7 +2302,7 @@
     if (getput_arg_fast(compiler, WORD_DATA, TMP_REG3, dst, dstw))
         return compiler->error;
     /* TMP_REG3 is used for caching. */
-    EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG2, SLJIT_UNUSED, RM(TMP_REG3)));
+    FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG2, SLJIT_UNUSED, RM(TMP_REG3))));
     compiler->cache_arg = 0;
     compiler->cache_argw = 0;
     return getput_arg(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0);
@@ -2265,7 +2315,7 @@
     ADJUST_LOCAL_OFFSET(src, srcw);


     if (FAST_IS_REG(src))
-        EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(src)));
+        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(src))));
     else if (src & SLJIT_MEM) {
         if (getput_arg_fast(compiler, WORD_DATA | LOAD_DATA, TMP_REG3, src, srcw))
             FAIL_IF(compiler->error);
@@ -2273,7 +2323,7 @@
             compiler->cache_arg = 0;
             compiler->cache_argw = 0;
             FAIL_IF(getput_arg(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, 0, 0));
-            EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(TMP_REG2)));
+            FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(TMP_REG2))));
         }
     }
     else if (src & SLJIT_IMM)
@@ -2454,14 +2504,14 @@
     dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;


     if (op < SLJIT_ADD) {
-        EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 0));
-        EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc);
+        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 0)));
+        FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc));
         return (dst_r == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS;
     }


     ins = (op == SLJIT_AND ? AND_DP : (op == SLJIT_OR ? ORR_DP : EOR_DP));
     if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) {
-        EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ins, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc);
+        FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc));
         /* The condition must always be set, even if the ORR/EOR is not executed above. */
         return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst))) : SLJIT_SUCCESS;
     }
@@ -2478,8 +2528,8 @@
         srcw = 0;
     }


-    EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 1) & ~COND_MASK) | cc);
-    EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000));
+    FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 1) & ~COND_MASK) | cc));
+    FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
     if (dst_r == TMP_REG2)
         FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0));



Modified: code/trunk/sljit/sljitNativeARM_64.c
===================================================================
--- code/trunk/sljit/sljitNativeARM_64.c    2014-05-28 11:10:58 UTC (rev 1482)
+++ code/trunk/sljit/sljitNativeARM_64.c    2014-06-17 15:48:37 UTC (rev 1483)
@@ -83,6 +83,8 @@
 #define FABS 0x1e60c000
 #define FADD 0x1e602800
 #define FCMP 0x1e602000
+#define FCVT 0x1e224000
+#define FCVTZS 0x9e780000
 #define FDIV 0x1e601800
 #define FMOV 0x1e604000
 #define FMUL 0x1e600800
@@ -104,6 +106,7 @@
 #define RET 0xd65f0000
 #define SBC 0xda000000
 #define SBFM 0x93000000
+#define SCVTF 0x9e620000
 #define SDIV 0x9ac00c00
 #define SMADDL 0x9b200000
 #define SMULH 0x9b403c00
@@ -1524,41 +1527,107 @@
     return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(TMP_REG3));
 }


+static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si dst, sljit_sw dstw,
+    sljit_si src, sljit_sw srcw)
+{
+    sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+    sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0;
+
+    if (GET_OPCODE(op) == SLJIT_CONVI_FROMD)
+        inv_bits |= (1 << 31);
+
+    if (src & SLJIT_MEM) {
+        emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw);
+        src = TMP_FREG1;
+    }
+
+    FAIL_IF(push_inst(compiler, (FCVTZS ^ inv_bits) | RD(dst_r) | VN(src)));
+
+    if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
+        return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONVI_FROMD) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw);
+    return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si dst, sljit_sw dstw,
+    sljit_si src, sljit_sw srcw)
+{
+    sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+    sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0;
+
+    if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
+        inv_bits |= (1 << 31);
+
+    if (src & SLJIT_MEM) {
+        emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONVD_FROMI) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw);
+        src = TMP_REG1;
+    } else if (src & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+        if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
+            srcw = (sljit_si)srcw;
+#endif
+        FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+        src = TMP_REG1;
+    }
+
+    FAIL_IF(push_inst(compiler, (SCVTF ^ inv_bits) | VD(dst_r) | RN(src)));
+
+    if (dst & SLJIT_MEM)
+        return emit_fop_mem(compiler, ((op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw);
+    return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si src1, sljit_sw src1w,
+    sljit_si src2, sljit_sw src2w)
+{
+    sljit_si mem_flags = (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE;
+    sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0;
+
+    if (src1 & SLJIT_MEM) {
+        emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w);
+        src1 = TMP_FREG1;
+    }
+
+    if (src2 & SLJIT_MEM) {
+        emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w);
+        src2 = TMP_FREG2;
+    }
+
+    return push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2));
+}
+
 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
     sljit_si dst, sljit_sw dstw,
     sljit_si src, sljit_sw srcw)
 {
     sljit_si dst_r, mem_flags = (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE;
-    sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0;
+    sljit_ins inv_bits;


     CHECK_ERROR();
-    check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
-
     compiler->cache_arg = 0;
     compiler->cache_argw = 0;


-    if (GET_OPCODE(op) == SLJIT_CMPD) {
-        if (dst & SLJIT_MEM) {
-            emit_fop_mem(compiler, mem_flags, TMP_FREG1, dst, dstw);
-            dst = TMP_FREG1;
-        }
-        if (src & SLJIT_MEM) {
-            emit_fop_mem(compiler, mem_flags, TMP_FREG2, src, srcw);
-            src = TMP_FREG2;
-        }
-        return push_inst(compiler, (FCMP ^ inv_bits) | VN(dst) | VM(src));
-    }
+    SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x100) == WORD_SIZE, must_be_one_bit_difference);
+    SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);


-    dst_r = (dst <= REG_MASK) ? dst : TMP_FREG1;
+    inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0;
+    dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
     if (src & SLJIT_MEM) {
-        emit_fop_mem(compiler, mem_flags, dst_r, src, srcw);
+        emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONVD_FROMS) ? (mem_flags ^ 0x100) : mem_flags, dst_r, src, srcw);
         src = dst_r;
     }


     switch (GET_OPCODE(op)) {
     case SLJIT_MOVD:
-        if (src != dst_r)
-            FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src)));
+        if (src != dst_r) {
+            if (dst_r != TMP_FREG1)
+                FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src)));
+            else
+                dst_r = src;
+        }
         break;
     case SLJIT_NEGD:
         FAIL_IF(push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(src)));
@@ -1566,11 +1635,14 @@
     case SLJIT_ABSD:
         FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src)));
         break;
+    case SLJIT_CONVD_FROMS:
+        FAIL_IF(push_inst(compiler, FCVT | ((op & SLJIT_SINGLE_OP) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src)));
+        break;
     }


-    if (!(dst & SLJIT_MEM))
-        return SLJIT_SUCCESS;
-    return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw);
+    if (dst & SLJIT_MEM)
+        return emit_fop_mem(compiler, mem_flags | STORE, dst_r, dst, dstw);
+    return SLJIT_SUCCESS;
 }


SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
@@ -1583,11 +1655,14 @@

     CHECK_ERROR();
     check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+    ADJUST_LOCAL_OFFSET(dst, dstw);
+    ADJUST_LOCAL_OFFSET(src1, src1w);
+    ADJUST_LOCAL_OFFSET(src2, src2w);


     compiler->cache_arg = 0;
     compiler->cache_argw = 0;


-    dst_r = (dst <= REG_MASK) ? dst : TMP_FREG1;
+    dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
     if (src1 & SLJIT_MEM) {
         emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w);
         src1 = TMP_FREG1;
@@ -1631,7 +1706,7 @@
     if (dst == SLJIT_UNUSED)
         return SLJIT_SUCCESS;


-    if (dst <= REG_MASK)
+    if (FAST_IS_REG(dst))
         return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR));


     /* Memory. */
@@ -1644,7 +1719,7 @@
     check_sljit_emit_fast_return(compiler, src, srcw);
     ADJUST_LOCAL_OFFSET(src, srcw);


-    if (src <= REG_MASK)
+    if (FAST_IS_REG(src))
         FAIL_IF(push_inst(compiler, ORR | RD(TMP_LR) | RN(TMP_ZERO) | RM(src)));
     else if (src & SLJIT_MEM)
         FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw));
@@ -1833,7 +1908,7 @@
         return SLJIT_SUCCESS;


     cc = get_cc(type);
-    dst_r = (dst <= REG_MASK) ? dst : TMP_REG1;
+    dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;


     if (GET_OPCODE(op) < SLJIT_ADD) {
         FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(dst_r) | RN(TMP_ZERO) | RM(TMP_ZERO)));


Modified: code/trunk/sljit/sljitNativeARM_T2_32.c
===================================================================
--- code/trunk/sljit/sljitNativeARM_T2_32.c    2014-05-28 11:10:58 UTC (rev 1482)
+++ code/trunk/sljit/sljitNativeARM_T2_32.c    2014-06-17 15:48:37 UTC (rev 1483)
@@ -169,8 +169,12 @@
 #define VABS_F32    0xeeb00ac0
 #define VADD_F32    0xee300a00
 #define VCMP_F32    0xeeb40a40
+#define VCVT_F32_S32    0xeeb80ac0
+#define VCVT_F64_F32    0xeeb70ac0
+#define VCVT_S32_F32    0xeebd0ac0
 #define VDIV_F32    0xee800a00
 #define VMOV_F32    0xeeb00a40
+#define VMOV        0xee000a10
 #define VMRS        0xeef1fa10
 #define VMUL_F32    0xee200a00
 #define VNEG_F32    0xeeb10a40
@@ -1607,6 +1611,69 @@
     return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg));
 }


+static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si dst, sljit_sw dstw,
+    sljit_si src, sljit_sw srcw)
+{
+    if (src & SLJIT_MEM) {
+        FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src, srcw));
+        src = TMP_FREG1;
+    }
+
+    FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_SINGLE_OP) | DD4(TMP_FREG1) | DM4(src)));
+
+    if (dst == SLJIT_UNUSED)
+        return SLJIT_SUCCESS;
+
+    if (FAST_IS_REG(dst))
+        return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | DN4(TMP_FREG1));
+
+    /* Store the integer value from a VFP register. */
+    return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si dst, sljit_sw dstw,
+    sljit_si src, sljit_sw srcw)
+{
+    sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+    if (FAST_IS_REG(src))
+        FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | DN4(TMP_FREG1)));
+    else if (src & SLJIT_MEM) {
+        /* Load the integer value into a VFP register. */
+        FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
+    }
+    else {
+        FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+        FAIL_IF(push_inst32(compiler, VMOV | RT4(TMP_REG1) | DN4(TMP_FREG1)));
+    }
+
+    FAIL_IF(push_inst32(compiler, VCVT_F32_S32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(TMP_FREG1)));
+
+    if (dst & SLJIT_MEM)
+        return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw);
+    return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si src1, sljit_sw src1w,
+    sljit_si src2, sljit_sw src2w)
+{
+    if (src1 & SLJIT_MEM) {
+        emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w);
+        src1 = TMP_FREG1;
+    }
+
+    if (src2 & SLJIT_MEM) {
+        emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w);
+        src2 = TMP_FREG2;
+    }
+
+    FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_SINGLE_OP) | DD4(src1) | DM4(src2)));
+    return push_inst32(compiler, VMRS);
+}
+
 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
     sljit_si dst, sljit_sw dstw,
     sljit_si src, sljit_sw srcw)
@@ -1614,27 +1681,16 @@
     sljit_si dst_r;


     CHECK_ERROR();
-    check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
-    SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error);
-
     compiler->cache_arg = 0;
     compiler->cache_argw = 0;
-    op ^= SLJIT_SINGLE_OP;
+    if (GET_OPCODE(op) != SLJIT_CONVD_FROMS)
+        op ^= SLJIT_SINGLE_OP;


-    if (GET_OPCODE(op) == SLJIT_CMPD) {
-        if (dst & SLJIT_MEM) {
-            emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, dst, dstw);
-            dst = TMP_FREG1;
-        }
-        if (src & SLJIT_MEM) {
-            emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src, srcw);
-            src = TMP_FREG2;
-        }
-        FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst) | DM4(src)));
-        return push_inst32(compiler, VMRS);
-    }
+    SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error);
+    SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);


-    dst_r = (dst <= REG_MASK) ? dst : TMP_FREG1;
+    dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
     if (src & SLJIT_MEM) {
         emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_r, src, srcw);
         src = dst_r;
@@ -1642,8 +1698,12 @@


     switch (GET_OPCODE(op)) {
     case SLJIT_MOVD:
-        if (src != dst_r)
-            FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src)));
+        if (src != dst_r) {
+            if (dst_r != TMP_FREG1)
+                FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src)));
+            else
+                dst_r = src;
+        }
         break;
     case SLJIT_NEGD:
         FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src)));
@@ -1651,11 +1711,15 @@
     case SLJIT_ABSD:
         FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src)));
         break;
+    case SLJIT_CONVD_FROMS:
+        FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src)));
+        op ^= SLJIT_SINGLE_OP;
+        break;
     }


-    if (!(dst & SLJIT_MEM))
-        return SLJIT_SUCCESS;
-    return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw);
+    if (dst & SLJIT_MEM)
+        return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), dst_r, dst, dstw);
+    return SLJIT_SUCCESS;
 }


SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
@@ -1667,12 +1731,15 @@

     CHECK_ERROR();
     check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+    ADJUST_LOCAL_OFFSET(dst, dstw);
+    ADJUST_LOCAL_OFFSET(src1, src1w);
+    ADJUST_LOCAL_OFFSET(src2, src2w);


     compiler->cache_arg = 0;
     compiler->cache_argw = 0;
     op ^= SLJIT_SINGLE_OP;


-    dst_r = (dst <= REG_MASK) ? dst : TMP_FREG1;
+    dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
     if (src1 & SLJIT_MEM) {
         emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w);
         src1 = TMP_FREG1;
@@ -1718,7 +1785,7 @@
     if (dst == SLJIT_UNUSED)
         return SLJIT_SUCCESS;


-    if (dst <= REG_MASK)
+    if (FAST_IS_REG(dst))
         return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG3));


     /* Memory. */
@@ -1737,7 +1804,7 @@
     check_sljit_emit_fast_return(compiler, src, srcw);
     ADJUST_LOCAL_OFFSET(src, srcw);


-    if (src <= REG_MASK)
+    if (FAST_IS_REG(src))
         FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, src)));
     else if (src & SLJIT_MEM) {
         if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG3, src, srcw))


Modified: code/trunk/sljit/sljitNativeMIPS_common.c
===================================================================
--- code/trunk/sljit/sljitNativeMIPS_common.c    2014-05-28 11:10:58 UTC (rev 1482)
+++ code/trunk/sljit/sljitNativeMIPS_common.c    2014-06-17 15:48:37 UTC (rev 1483)
@@ -92,10 +92,10 @@
 #define HI(opcode)    ((opcode) << 26)
 #define LO(opcode)    (opcode)
 /* S = (16 << 21) D = (17 << 21) */
-#define FMT_SD        (16 << 21)
+#define FMT_S        (16 << 21)


-#define ABS_fmt        (HI(17) | FMT_SD | LO(5))
-#define ADD_fmt        (HI(17) | FMT_SD | LO(0))
+#define ABS_S        (HI(17) | FMT_S | LO(5))
+#define ADD_S        (HI(17) | FMT_S | LO(0))
 #define ADDIU        (HI(9))
 #define ADDU        (HI(0) | LO(33))
 #define AND        (HI(0) | LO(36))
@@ -112,17 +112,18 @@
 #define BNE        (HI(5))
 #define BREAK        (HI(0) | LO(13))
 #define CFC1        (HI(17) | (2 << 21))
-#define C_UN_fmt    (HI(17) | FMT_SD | LO(49))
-#define C_UEQ_fmt    (HI(17) | FMT_SD | LO(51))
-#define C_ULE_fmt    (HI(17) | FMT_SD | LO(55))
-#define C_ULT_fmt    (HI(17) | FMT_SD | LO(53))
+#define C_UN_S        (HI(17) | FMT_S | LO(49))
+#define C_UEQ_S        (HI(17) | FMT_S | LO(51))
+#define C_ULE_S        (HI(17) | FMT_S | LO(55))
+#define C_ULT_S        (HI(17) | FMT_S | LO(53))
+#define CVT_S_S        (HI(17) | FMT_S | LO(32))
 #define DADDIU        (HI(25))
 #define DADDU        (HI(0) | LO(45))
 #define DDIV        (HI(0) | LO(30))
 #define DDIVU        (HI(0) | LO(31))
 #define DIV        (HI(0) | LO(26))
 #define DIVU        (HI(0) | LO(27))
-#define DIV_fmt        (HI(17) | FMT_SD | LO(3))
+#define DIV_S        (HI(17) | FMT_S | LO(3))
 #define DMULT        (HI(0) | LO(28))
 #define DMULTU        (HI(0) | LO(29))
 #define DSLL        (HI(0) | LO(56))
@@ -142,13 +143,15 @@
 #define LD        (HI(55))
 #define LUI        (HI(15))
 #define LW        (HI(35))
+#define MFC1        (HI(17))
 #define MFHI        (HI(0) | LO(16))
 #define MFLO        (HI(0) | LO(18))
-#define MOV_fmt        (HI(17) | FMT_SD | LO(6))
-#define MUL_fmt        (HI(17) | FMT_SD | LO(2))
+#define MOV_S        (HI(17) | FMT_S | LO(6))
+#define MTC1        (HI(17) | (4 << 21))
+#define MUL_S        (HI(17) | FMT_S | LO(2))
 #define MULT        (HI(0) | LO(24))
 #define MULTU        (HI(0) | LO(25))
-#define NEG_fmt        (HI(17) | FMT_SD | LO(7))
+#define NEG_S        (HI(17) | FMT_S | LO(7))
 #define NOP        (HI(0) | LO(0))
 #define NOR        (HI(0) | LO(39))
 #define OR        (HI(0) | LO(37))
@@ -164,9 +167,10 @@
 #define SRLV        (HI(0) | LO(6))
 #define SRA        (HI(0) | LO(3))
 #define SRAV        (HI(0) | LO(7))
-#define SUB_fmt        (HI(17) | FMT_SD | LO(1))
+#define SUB_S        (HI(17) | FMT_S | LO(1))
 #define SUBU        (HI(0) | LO(35))
 #define SW        (HI(43))
+#define TRUNC_W_S    (HI(17) | FMT_S | LO(13))
 #define XOR        (HI(0) | LO(38))
 #define XORI        (HI(14))


@@ -495,6 +499,7 @@
 /* Separates integer and floating point registers */
 #define GPR_REG        0x0f
 #define DOUBLE_DATA    0x10
+#define SINGLE_DATA    0x12


 #define MEM_MASK    0x1f


@@ -545,7 +550,7 @@
     compiler->logical_local_size = local_size;
 #endif


-    local_size += (saveds + 1 + 4) * sizeof(sljit_sw);
+    local_size += ((saveds + 1) * sizeof(sljit_sw)) + FIXED_LOCALS_OFFSET;
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
     local_size = (local_size + 15) & ~0xf;
 #else
@@ -599,7 +604,7 @@
     compiler->logical_local_size = local_size;
 #endif


-    local_size += (saveds + 1 + 4) * sizeof(sljit_sw);
+    local_size += ((saveds + 1) * sizeof(sljit_sw)) + FIXED_LOCALS_OFFSET;
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
     compiler->local_size = (local_size + 15) & ~0xf;
 #else
@@ -1278,83 +1283,164 @@
 #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 7))
 #define FMT(op) (((op & SLJIT_SINGLE_OP) ^ SLJIT_SINGLE_OP) << (21 - 8))


+static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si dst, sljit_sw dstw,
+    sljit_si src, sljit_sw srcw)
+{
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#    define flags 0
+#else
+    sljit_si flags = (GET_OPCODE(op) == SLJIT_CONVW_FROMD) << 21;
+#endif
+
+    if (src & SLJIT_MEM) {
+        FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
+        src = TMP_FREG1;
+    }
+    else
+        src <<= 1;
+
+    FAIL_IF(push_inst(compiler, (TRUNC_W_S ^ (flags >> 19)) | FMT(op) | FS(src) | FD(TMP_FREG1), MOVABLE_INS));
+
+    if (dst == SLJIT_UNUSED)
+        return SLJIT_SUCCESS;
+
+    if (FAST_IS_REG(dst))
+        return push_inst(compiler, MFC1 | flags | T(dst) | FS(TMP_FREG1), MOVABLE_INS);
+
+    /* Store the integer value from a VFP register. */
+    return emit_op_mem2(compiler, flags ? DOUBLE_DATA : SINGLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#    undef is_long
+#endif
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si dst, sljit_sw dstw,
+    sljit_si src, sljit_sw srcw)
+{
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#    define flags 0
+#else
+    sljit_si flags = (GET_OPCODE(op) == SLJIT_CONVD_FROMW) << 21;
+#endif
+
+    sljit_si dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
+
+    if (FAST_IS_REG(src))
+        FAIL_IF(push_inst(compiler, MTC1 | flags | T(src) | FS(TMP_FREG1), MOVABLE_INS));
+    else if (src & SLJIT_MEM) {
+        /* Load the integer value into a VFP register. */
+        FAIL_IF(emit_op_mem2(compiler, ((flags) ? DOUBLE_DATA : SINGLE_DATA) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
+    }
+    else {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+        if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
+            srcw = (sljit_si)srcw;
+#endif
+        FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw));
+        FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG1) | FS(TMP_FREG1), MOVABLE_INS));
+    }
+
+    FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | (((op & SLJIT_SINGLE_OP) ^ SLJIT_SINGLE_OP) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS));
+
+    if (dst & SLJIT_MEM)
+        return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
+    return SLJIT_SUCCESS;
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#    undef flags
+#endif
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si src1, sljit_sw src1w,
+    sljit_si src2, sljit_sw src2w)
+{
+    if (src1 & SLJIT_MEM) {
+        FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+        src1 = TMP_FREG1;
+    }
+    else
+        src1 <<= 1;
+
+    if (src2 & SLJIT_MEM) {
+        FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
+        src2 = TMP_FREG2;
+    }
+    else
+        src2 <<= 1;
+
+    /* src2 and src1 are swapped. */
+    if (op & SLJIT_SET_E) {
+        FAIL_IF(push_inst(compiler, C_UEQ_S | FMT(op) | FT(src2) | FS(src1), UNMOVABLE_INS));
+        FAIL_IF(push_inst(compiler, CFC1 | TA(EQUAL_FLAG) | DA(FCSR_REG), EQUAL_FLAG));
+        FAIL_IF(push_inst(compiler, SRL | TA(EQUAL_FLAG) | DA(EQUAL_FLAG) | SH_IMM(23), EQUAL_FLAG));
+        FAIL_IF(push_inst(compiler, ANDI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG));
+    }
+    if (op & SLJIT_SET_S) {
+        /* Mixing the instructions for the two checks. */
+        FAIL_IF(push_inst(compiler, C_ULT_S | FMT(op) | FT(src2) | FS(src1), UNMOVABLE_INS));
+        FAIL_IF(push_inst(compiler, CFC1 | TA(ULESS_FLAG) | DA(FCSR_REG), ULESS_FLAG));
+        FAIL_IF(push_inst(compiler, C_ULT_S | FMT(op) | FT(src1) | FS(src2), UNMOVABLE_INS));
+        FAIL_IF(push_inst(compiler, SRL | TA(ULESS_FLAG) | DA(ULESS_FLAG) | SH_IMM(23), ULESS_FLAG));
+        FAIL_IF(push_inst(compiler, ANDI | SA(ULESS_FLAG) | TA(ULESS_FLAG) | IMM(1), ULESS_FLAG));
+        FAIL_IF(push_inst(compiler, CFC1 | TA(UGREATER_FLAG) | DA(FCSR_REG), UGREATER_FLAG));
+        FAIL_IF(push_inst(compiler, SRL | TA(UGREATER_FLAG) | DA(UGREATER_FLAG) | SH_IMM(23), UGREATER_FLAG));
+        FAIL_IF(push_inst(compiler, ANDI | SA(UGREATER_FLAG) | TA(UGREATER_FLAG) | IMM(1), UGREATER_FLAG));
+    }
+    return push_inst(compiler, C_UN_S | FMT(op) | FT(src2) | FS(src1), FCSR_FCC);
+}
+
 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
     sljit_si dst, sljit_sw dstw,
     sljit_si src, sljit_sw srcw)
 {
-    sljit_si dst_fr;
+    sljit_si dst_r;


     CHECK_ERROR();
-    check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
-    SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
-
     compiler->cache_arg = 0;
     compiler->cache_argw = 0;


-    if (GET_OPCODE(op) == SLJIT_CMPD) {
-        if (dst & SLJIT_MEM) {
-            FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw));
-            dst = TMP_FREG1;
-        }
-        else
-            dst <<= 1;
+    SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
+    SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);


-        if (src & SLJIT_MEM) {
-            FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0));
-            src = TMP_FREG2;
-        }
-        else
-            src <<= 1;
+    if (GET_OPCODE(op) == SLJIT_CONVD_FROMS)
+        op ^= SLJIT_SINGLE_OP;


-        /* src and dst are swapped. */
-        if (op & SLJIT_SET_E) {
-            FAIL_IF(push_inst(compiler, C_UEQ_fmt | FMT(op) | FT(src) | FS(dst), UNMOVABLE_INS));
-            FAIL_IF(push_inst(compiler, CFC1 | TA(EQUAL_FLAG) | DA(FCSR_REG), EQUAL_FLAG));
-            FAIL_IF(push_inst(compiler, SRL | TA(EQUAL_FLAG) | DA(EQUAL_FLAG) | SH_IMM(23), EQUAL_FLAG));
-            FAIL_IF(push_inst(compiler, ANDI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG));
-        }
-        if (op & SLJIT_SET_S) {
-            /* Mixing the instructions for the two checks. */
-            FAIL_IF(push_inst(compiler, C_ULT_fmt | FMT(op) | FT(src) | FS(dst), UNMOVABLE_INS));
-            FAIL_IF(push_inst(compiler, CFC1 | TA(ULESS_FLAG) | DA(FCSR_REG), ULESS_FLAG));
-            FAIL_IF(push_inst(compiler, C_ULT_fmt | FMT(op) | FT(dst) | FS(src), UNMOVABLE_INS));
-            FAIL_IF(push_inst(compiler, SRL | TA(ULESS_FLAG) | DA(ULESS_FLAG) | SH_IMM(23), ULESS_FLAG));
-            FAIL_IF(push_inst(compiler, ANDI | SA(ULESS_FLAG) | TA(ULESS_FLAG) | IMM(1), ULESS_FLAG));
-            FAIL_IF(push_inst(compiler, CFC1 | TA(UGREATER_FLAG) | DA(FCSR_REG), UGREATER_FLAG));
-            FAIL_IF(push_inst(compiler, SRL | TA(UGREATER_FLAG) | DA(UGREATER_FLAG) | SH_IMM(23), UGREATER_FLAG));
-            FAIL_IF(push_inst(compiler, ANDI | SA(UGREATER_FLAG) | TA(UGREATER_FLAG) | IMM(1), UGREATER_FLAG));
-        }
-        return push_inst(compiler, C_UN_fmt | FMT(op) | FT(src) | FS(dst), FCSR_FCC);
-    }
+    dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;


-    dst_fr = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
-
     if (src & SLJIT_MEM) {
-        FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_fr, src, srcw, dst, dstw));
-        src = dst_fr;
+        FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
+        src = dst_r;
     }
     else
         src <<= 1;


     switch (GET_OPCODE(op)) {
-        case SLJIT_MOVD:
-            if (src != dst_fr && dst_fr != TMP_FREG1)
-                FAIL_IF(push_inst(compiler, MOV_fmt | FMT(op) | FS(src) | FD(dst_fr), MOVABLE_INS));
-            break;
-        case SLJIT_NEGD:
-            FAIL_IF(push_inst(compiler, NEG_fmt | FMT(op) | FS(src) | FD(dst_fr), MOVABLE_INS));
-            break;
-        case SLJIT_ABSD:
-            FAIL_IF(push_inst(compiler, ABS_fmt | FMT(op) | FS(src) | FD(dst_fr), MOVABLE_INS));
-            break;
+    case SLJIT_MOVD:
+        if (src != dst_r) {
+            if (dst_r != TMP_FREG1)
+                FAIL_IF(push_inst(compiler, MOV_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS));
+            else
+                dst_r = src;
+        }
+        break;
+    case SLJIT_NEGD:
+        FAIL_IF(push_inst(compiler, NEG_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS));
+        break;
+    case SLJIT_ABSD:
+        FAIL_IF(push_inst(compiler, ABS_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS));
+        break;
+    case SLJIT_CONVD_FROMS:
+        FAIL_IF(push_inst(compiler, CVT_S_S | ((op & SLJIT_SINGLE_OP) ? 1 : (1 << 21)) | FS(src) | FD(dst_r), MOVABLE_INS));
+        op ^= SLJIT_SINGLE_OP;
+        break;
     }


-    if (dst_fr == TMP_FREG1) {
-        if (GET_OPCODE(op) == SLJIT_MOVD)
-            dst_fr = src;
-        FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_fr, dst, dstw, 0, 0));
-    }
-
+    if (dst & SLJIT_MEM)
+        return emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0);
     return SLJIT_SUCCESS;
 }


@@ -1363,15 +1449,18 @@
     sljit_si src1, sljit_sw src1w,
     sljit_si src2, sljit_sw src2w)
 {
-    sljit_si dst_fr, flags = 0;
+    sljit_si dst_r, flags = 0;


     CHECK_ERROR();
     check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+    ADJUST_LOCAL_OFFSET(dst, dstw);
+    ADJUST_LOCAL_OFFSET(src1, src1w);
+    ADJUST_LOCAL_OFFSET(src2, src2w);


     compiler->cache_arg = 0;
     compiler->cache_argw = 0;


-    dst_fr = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2;
+    dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2;


     if (src1 & SLJIT_MEM) {
         if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
@@ -1415,23 +1504,23 @@


     switch (GET_OPCODE(op)) {
     case SLJIT_ADDD:
-        FAIL_IF(push_inst(compiler, ADD_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS));
+        FAIL_IF(push_inst(compiler, ADD_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS));
         break;


     case SLJIT_SUBD:
-        FAIL_IF(push_inst(compiler, SUB_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS));
+        FAIL_IF(push_inst(compiler, SUB_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS));
         break;


     case SLJIT_MULD:
-        FAIL_IF(push_inst(compiler, MUL_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS));
+        FAIL_IF(push_inst(compiler, MUL_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS));
         break;


     case SLJIT_DIVD:
-        FAIL_IF(push_inst(compiler, DIV_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS));
+        FAIL_IF(push_inst(compiler, DIV_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS));
         break;
     }


-    if (dst_fr == TMP_FREG2)
+    if (dst_r == TMP_FREG2)
         FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));


     return SLJIT_SUCCESS;
@@ -1794,36 +1883,36 @@


     switch (type & 0xff) {
     case SLJIT_C_FLOAT_EQUAL:
-        inst = C_UEQ_fmt;
+        inst = C_UEQ_S;
         if_true = 1;
         break;
     case SLJIT_C_FLOAT_NOT_EQUAL:
-        inst = C_UEQ_fmt;
+        inst = C_UEQ_S;
         if_true = 0;
         break;
     case SLJIT_C_FLOAT_LESS:
-        inst = C_ULT_fmt;
+        inst = C_ULT_S;
         if_true = 1;
         break;
     case SLJIT_C_FLOAT_GREATER_EQUAL:
-        inst = C_ULT_fmt;
+        inst = C_ULT_S;
         if_true = 0;
         break;
     case SLJIT_C_FLOAT_GREATER:
-        inst = C_ULE_fmt;
+        inst = C_ULE_S;
         if_true = 0;
         break;
     case SLJIT_C_FLOAT_LESS_EQUAL:
-        inst = C_ULE_fmt;
+        inst = C_ULE_S;
         if_true = 1;
         break;
     case SLJIT_C_FLOAT_UNORDERED:
-        inst = C_UN_fmt;
+        inst = C_UN_S;
         if_true = 1;
         break;
     case SLJIT_C_FLOAT_ORDERED:
     default: /* Make compilers happy. */
-        inst = C_UN_fmt;
+        inst = C_UN_S;
         if_true = 0;
         break;
     }


Modified: code/trunk/sljit/sljitNativePPC_common.c
===================================================================
--- code/trunk/sljit/sljitNativePPC_common.c    2014-05-28 11:10:58 UTC (rev 1482)
+++ code/trunk/sljit/sljitNativePPC_common.c    2014-06-17 15:48:37 UTC (rev 1483)
@@ -114,6 +114,7 @@
 #define B(b)        (reg_map[b] << 11)
 #define C(c)        (reg_map[c] << 6)
 #define FD(fd)        ((fd) << 21)
+#define FS(fs)        ((fs) << 21)
 #define FA(fa)        ((fa) << 16)
 #define FB(fb)        ((fb) << 11)
 #define FC(fc)        ((fc) << 6)
@@ -159,13 +160,17 @@
 #define FABS        (HI(63) | LO(264))
 #define FADD        (HI(63) | LO(21))
 #define FADDS        (HI(59) | LO(21))
+#define FCFID        (HI(63) | LO(846))
 #define FCMPU        (HI(63) | LO(0))
+#define FCTIDZ        (HI(63) | LO(815))
+#define FCTIWZ        (HI(63) | LO(15))
 #define FDIV        (HI(63) | LO(18))
 #define FDIVS        (HI(59) | LO(18))
 #define FMR        (HI(63) | LO(72))
 #define FMUL        (HI(63) | LO(25))
 #define FMULS        (HI(59) | LO(25))
 #define FNEG        (HI(63) | LO(40))
+#define FRSP        (HI(63) | LO(12))
 #define FSUB        (HI(63) | LO(20))
 #define FSUBS        (HI(59) | LO(20))
 #define LD        (HI(58) | 0)
@@ -202,6 +207,7 @@
 #define STD        (HI(62) | 0)
 #define STDU        (HI(62) | 1)
 #define STDUX        (HI(31) | LO(181))
+#define STFIWX        (HI(31) | LO(983))
 #define STW        (HI(36))
 #define STWU        (HI(37))
 #define STWUX        (HI(31) | LO(183))
@@ -602,25 +608,22 @@
     if (args >= 3)
         FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG3) | A(SLJIT_SAVED_REG3) | B(SLJIT_SCRATCH_REG3)));


-#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
-    compiler->local_size = (1 + saveds + 6 + 8) * sizeof(sljit_sw) + local_size;
-#else
-    compiler->local_size = (1 + saveds + 2) * sizeof(sljit_sw) + local_size;
-#endif
-    compiler->local_size = (compiler->local_size + 15) & ~0xf;
+    local_size += ((1 + saveds) * sizeof(sljit_sw)) + FIXED_LOCALS_OFFSET;
+    local_size = (local_size + 15) & ~0xf;
+    compiler->local_size = local_size;


 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
-    if (compiler->local_size <= SIMM_MAX)
-        FAIL_IF(push_inst(compiler, STWU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-compiler->local_size)));
+    if (local_size <= SIMM_MAX)
+        FAIL_IF(push_inst(compiler, STWU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-local_size)));
     else {
-        FAIL_IF(load_immediate(compiler, 0, -compiler->local_size));
+        FAIL_IF(load_immediate(compiler, 0, -local_size));
         FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0)));
     }
 #else
-    if (compiler->local_size <= SIMM_MAX)
-        FAIL_IF(push_inst(compiler, STDU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-compiler->local_size)));
+    if (local_size <= SIMM_MAX)
+        FAIL_IF(push_inst(compiler, STDU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-local_size)));
     else {
-        FAIL_IF(load_immediate(compiler, 0, -compiler->local_size));
+        FAIL_IF(load_immediate(compiler, 0, -local_size));
         FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0)));
     }
 #endif
@@ -639,12 +642,8 @@
     compiler->logical_local_size = local_size;
 #endif


-#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
-    compiler->local_size = (1 + saveds + 6 + 8) * sizeof(sljit_sw) + local_size;
-#else
-    compiler->local_size = (1 + saveds + 2) * sizeof(sljit_sw) + local_size;
-#endif
-    compiler->local_size = (compiler->local_size + 15) & ~0xf;
+    local_size += ((1 + saveds) * sizeof(sljit_sw)) + FIXED_LOCALS_OFFSET;
+    compiler->local_size = (local_size + 15) & ~0xf;
 }


 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
@@ -999,12 +998,12 @@
             tmp_r = arg;
             FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16)));
         }
-        else if (compiler->cache_arg != arg || high_short != compiler->cache_argw) {
+        else if (compiler->cache_arg != (SLJIT_MEM | arg) || high_short != compiler->cache_argw) {
             if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK)) {
                 next_high_short = (sljit_si)(next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
                 if (high_short == next_high_short) {
-                    compiler->cache_arg = SLJIT_IMM | arg;
-                    compiler->cache_argw = next_high_short;
+                    compiler->cache_arg = SLJIT_MEM | arg;
+                    compiler->cache_argw = high_short;
                     tmp_r = TMP_REG3;
                 }
             }
@@ -1685,59 +1684,233 @@
 #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 6))
 #define SELECT_FOP(op, single, double) ((op & SLJIT_SINGLE_OP) ? single : double)


-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw))
+#else
+#define FLOAT_TMP_MEM_OFFSET (2 * sizeof(sljit_sw))
+
+#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
+#define FLOAT_TMP_MEM_OFFSET_LOW (2 * sizeof(sljit_sw))
+#define FLOAT_TMP_MEM_OFFSET_HI (3 * sizeof(sljit_sw))
+#else
+#define FLOAT_TMP_MEM_OFFSET_LOW (3 * sizeof(sljit_sw))
+#define FLOAT_TMP_MEM_OFFSET_HI (2 * sizeof(sljit_sw))
+#endif
+
+#endif /* SLJIT_CONFIG_PPC_64 */
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
     sljit_si dst, sljit_sw dstw,
     sljit_si src, sljit_sw srcw)
 {
-    sljit_si dst_fr;
+    if (src & SLJIT_MEM) {
+        /* We can ignore the temporary data store on the stack from caching point of view. */
+        FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
+        src = TMP_FREG1;
+    }


-    CHECK_ERROR();
-    check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
-    SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error);
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+    op = GET_OPCODE(op);
+    FAIL_IF(push_inst(compiler, (op == SLJIT_CONVI_FROMD ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src)));


-    compiler->cache_arg = 0;
-    compiler->cache_argw = 0;
+    if (dst == SLJIT_UNUSED)
+        return SLJIT_SUCCESS;


-    if (GET_OPCODE(op) == SLJIT_CMPD) {
-        if (dst & SLJIT_MEM) {
-            FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw));
-            dst = TMP_FREG1;
+    if (op == SLJIT_CONVW_FROMD) {
+        if (FAST_IS_REG(dst)) {
+            FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, 0, 0));
+            return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, 0, 0);
         }
+        return emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
+    }


-        if (src & SLJIT_MEM) {
-            FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0));
-            src = TMP_FREG2;
+#else
+    FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src)));
+
+    if (dst == SLJIT_UNUSED)
+        return SLJIT_SUCCESS;
+#endif
+
+    if (FAST_IS_REG(dst)) {
+        FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET));
+        FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_LOCALS_REG) | B(TMP_REG1)));
+        return emit_op_mem2(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, 0, 0);
+    }
+
+    SLJIT_ASSERT(dst & SLJIT_MEM);
+
+    if (dst & OFFS_REG_MASK) {
+        dstw &= 0x3;
+        if (dstw) {
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+            FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | (dstw << 11) | ((31 - dstw) << 1)));
+#else
+            FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(dst), dstw, 63 - dstw, 1)));
+#endif
+            dstw = TMP_REG1;
         }
+        else
+            dstw = OFFS_REG(dst);
+    }
+    else {
+        if ((dst & REG_MASK) && !dstw) {
+            dstw = dst & REG_MASK;
+            dst = 0;
+        }
+        else {
+            /* This works regardless we have SLJIT_MEM1 or SLJIT_MEM0. */
+            FAIL_IF(load_immediate(compiler, TMP_REG1, dstw));
+            dstw = TMP_REG1;
+        }
+    }


-        return push_inst(compiler, FCMPU | CRD(4) | FA(dst) | FB(src));
+    return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw));
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si dst, sljit_sw dstw,
+    sljit_si src, sljit_sw srcw)
+{
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+
+    sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+    if (src & SLJIT_IMM) {
+        if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
+            srcw = (sljit_si)srcw;
+        FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+        src = TMP_REG1;
     }
+    else if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) {
+        if (FAST_IS_REG(src))
+            FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1)));
+        else
+            FAIL_IF(emit_op_mem2(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET));
+        src = TMP_REG1;
+    }


-    dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+    if (FAST_IS_REG(src)) {
+        FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET));
+        FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, dst, dstw));
+    }
+    else
+        FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));


+    FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1)));
+
+    if (dst & SLJIT_MEM)
+        return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
+    if (op & SLJIT_SINGLE_OP)
+        return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
+    return SLJIT_SUCCESS;
+
+#else
+
+    sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+    sljit_si invert_sign = 1;
+
+    if (src & SLJIT_IMM) {
+        FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ 0x80000000));
+        src = TMP_REG1;
+        invert_sign = 0;
+    }
+    else if (!FAST_IS_REG(src)) {
+        FAIL_IF(emit_op_mem2(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_LOW));
+        src = TMP_REG1;
+    }
+
+    /* First, a special double floating point value is constructed: (2^53 + (input xor (2^31)))
+       The double precision format has exactly 53 bit precision, so the lower 32 bit represents
+       the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000
+       to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating
+       point value, we need to substract 2^53 + 2^31 from the constructed value. */
+    FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
+    if (invert_sign)
+        FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000));
+    FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_HI, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET));
+    FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_HI));
+    FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000));
+    FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_LOW));
+    FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET));
+    FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_LOW));
+
+    FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2)));
+
+    if (dst & SLJIT_MEM)
+        return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
+    if (op & SLJIT_SINGLE_OP)
+        return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
+    return SLJIT_SUCCESS;
+
+#endif
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si src1, sljit_sw src1w,
+    sljit_si src2, sljit_sw src2w)
+{
+    if (src1 & SLJIT_MEM) {
+        FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+        src1 = TMP_FREG1;
+    }
+
+    if (src2 & SLJIT_MEM) {
+        FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
+        src2 = TMP_FREG2;
+    }
+
+    return push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si dst, sljit_sw dstw,
+    sljit_si src, sljit_sw srcw)
+{
+    sljit_si dst_r;
+
+    CHECK_ERROR();
+    compiler->cache_arg = 0;
+    compiler->cache_argw = 0;
+
+    SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error);
+    SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
+
+    if (GET_OPCODE(op) == SLJIT_CONVD_FROMS)
+        op ^= SLJIT_SINGLE_OP;
+
+    dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
     if (src & SLJIT_MEM) {
-        FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_fr, src, srcw, dst, dstw));
-        src = dst_fr;
+        FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
+        src = dst_r;
     }


     switch (GET_OPCODE(op)) {
-        case SLJIT_MOVD:
-            if (src != dst_fr && dst_fr != TMP_FREG1)
-                FAIL_IF(push_inst(compiler, FMR | FD(dst_fr) | FB(src)));
+    case SLJIT_CONVD_FROMS:
+        op ^= SLJIT_SINGLE_OP;
+        if (op & SLJIT_SINGLE_OP) {
+            FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src)));
             break;
-        case SLJIT_NEGD:
-            FAIL_IF(push_inst(compiler, FNEG | FD(dst_fr) | FB(src)));
-            break;
-        case SLJIT_ABSD:
-            FAIL_IF(push_inst(compiler, FABS | FD(dst_fr) | FB(src)));
-            break;
+        }
+        /* Fall through. */
+    case SLJIT_MOVD:
+        if (src != dst_r) {
+            if (dst_r != TMP_FREG1)
+                FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src)));
+            else
+                dst_r = src;
+        }
+        break;
+    case SLJIT_NEGD:
+        FAIL_IF(push_inst(compiler, FNEG | FD(dst_r) | FB(src)));
+        break;
+    case SLJIT_ABSD:
+        FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src)));
+        break;
     }


-    if (dst_fr == TMP_FREG1) {
-        if (GET_OPCODE(op) == SLJIT_MOVD)
-            dst_fr = src;
-        FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_fr, dst, dstw, 0, 0));
-    }
-
+    if (dst & SLJIT_MEM)
+        FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0));
     return SLJIT_SUCCESS;
 }


@@ -1746,15 +1919,18 @@
     sljit_si src1, sljit_sw src1w,
     sljit_si src2, sljit_sw src2w)
 {
-    sljit_si dst_fr, flags = 0;
+    sljit_si dst_r, flags = 0;


     CHECK_ERROR();
     check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+    ADJUST_LOCAL_OFFSET(dst, dstw);
+    ADJUST_LOCAL_OFFSET(src1, src1w);
+    ADJUST_LOCAL_OFFSET(src2, src2w);


     compiler->cache_arg = 0;
     compiler->cache_argw = 0;


-    dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG2;
+    dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;


     if (src1 & SLJIT_MEM) {
         if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
@@ -1794,23 +1970,23 @@


     switch (GET_OPCODE(op)) {
     case SLJIT_ADDD:
-        FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_fr) | FA(src1) | FB(src2)));
+        FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2)));
         break;


     case SLJIT_SUBD:
-        FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_fr) | FA(src1) | FB(src2)));
+        FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2)));
         break;


     case SLJIT_MULD:
-        FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_fr) | FA(src1) | FC(src2) /* FMUL use FC as src2 */));
+        FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */));
         break;


     case SLJIT_DIVD:
-        FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_fr) | FA(src1) | FB(src2)));
+        FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2)));
         break;
     }


-    if (dst_fr == TMP_FREG2)
+    if (dst_r == TMP_FREG2)
         FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));


     return SLJIT_SUCCESS;


Modified: code/trunk/sljit/sljitNativeSPARC_common.c
===================================================================
--- code/trunk/sljit/sljitNativeSPARC_common.c    2014-05-28 11:10:58 UTC (rev 1482)
+++ code/trunk/sljit/sljitNativeSPARC_common.c    2014-06-17 15:48:37 UTC (rev 1483)
@@ -128,10 +128,16 @@
 #define FCMPS        (OPC1(0x2) | OPC3(0x35) | DOP(0x51))
 #define FDIVD        (OPC1(0x2) | OPC3(0x34) | DOP(0x4e))
 #define FDIVS        (OPC1(0x2) | OPC3(0x34) | DOP(0x4d))
+#define FDTOI        (OPC1(0x2) | OPC3(0x34) | DOP(0xd2))
+#define FDTOS        (OPC1(0x2) | OPC3(0x34) | DOP(0xc6))
+#define FITOD        (OPC1(0x2) | OPC3(0x34) | DOP(0xc8))
+#define FITOS        (OPC1(0x2) | OPC3(0x34) | DOP(0xc4))
 #define FMOVS        (OPC1(0x2) | OPC3(0x34) | DOP(0x01))
 #define FMULD        (OPC1(0x2) | OPC3(0x34) | DOP(0x4a))
 #define FMULS        (OPC1(0x2) | OPC3(0x34) | DOP(0x49))
 #define FNEGS        (OPC1(0x2) | OPC3(0x34) | DOP(0x05))
+#define FSTOD        (OPC1(0x2) | OPC3(0x34) | DOP(0xc9))
+#define FSTOI        (OPC1(0x2) | OPC3(0x34) | DOP(0xd1))
 #define FSUBD        (OPC1(0x2) | OPC3(0x34) | DOP(0x46))
 #define FSUBS        (OPC1(0x2) | OPC3(0x34) | DOP(0x45))
 #define JMPL        (OPC1(0x2) | OPC3(0x38))
@@ -388,6 +394,7 @@
 /* Separates integer and floating point registers */
 #define GPR_REG        0x0f
 #define DOUBLE_DATA    0x10
+#define SINGLE_DATA    0x12


 #define MEM_MASK    0x1f


@@ -423,8 +430,7 @@
     compiler->logical_local_size = local_size;
 #endif


-    local_size += 23 * sizeof(sljit_sw);
-    local_size = (local_size + 7) & ~0x7;
+    local_size = (local_size + FIXED_LOCALS_OFFSET + 7) & ~0x7;
     compiler->local_size = local_size;


     if (local_size <= SIMM_MAX) {
@@ -456,8 +462,7 @@
     compiler->logical_local_size = local_size;
 #endif


-    local_size += 23 * sizeof(sljit_sw);
-    compiler->local_size = (local_size + 7) & ~0x7;
+    compiler->local_size = (local_size + FIXED_LOCALS_OFFSET + 7) & ~0x7;
 }


SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
@@ -953,73 +958,139 @@

#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 7))
#define SELECT_FOP(op, single, double) ((op & SLJIT_SINGLE_OP) ? single : double)
+#define FLOAT_TMP_MEM_OFFSET (22 * sizeof(sljit_sw))

+static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si dst, sljit_sw dstw,
+    sljit_si src, sljit_sw srcw)
+{
+    if (src & SLJIT_MEM) {
+        FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
+        src = TMP_FREG1;
+    }
+    else
+        src <<= 1;
+
+    FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOI, FDTOI) | DA(TMP_FREG1) | S2A(src), MOVABLE_INS));
+
+    if (dst == SLJIT_UNUSED)
+        return SLJIT_SUCCESS;
+
+    if (FAST_IS_REG(dst)) {
+        FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET));
+        return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET);
+    }
+
+    /* Store the integer value from a VFP register. */
+    return emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si dst, sljit_sw dstw,
+    sljit_si src, sljit_sw srcw)
+{
+    sljit_si dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
+
+    if (src & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+        if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
+            srcw = (sljit_si)srcw;
+#endif
+        FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+        src = TMP_REG1;
+        srcw = 0;
+    }
+
+    if (FAST_IS_REG(src)) {
+        FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET));
+        src = SLJIT_MEM1(SLJIT_LOCALS_REG);
+        srcw = FLOAT_TMP_MEM_OFFSET;
+    }
+
+    FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
+    FAIL_IF(push_inst(compiler, SELECT_FOP(op, FITOS, FITOD) | DA(dst_r) | S2A(TMP_FREG1), MOVABLE_INS));
+
+    if (dst & SLJIT_MEM)
+        return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
+    return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si src1, sljit_sw src1w,
+    sljit_si src2, sljit_sw src2w)
+{
+    if (src1 & SLJIT_MEM) {
+        FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+        src1 = TMP_FREG1;
+    }
+    else
+        src1 <<= 1;
+
+    if (src2 & SLJIT_MEM) {
+        FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
+        src2 = TMP_FREG2;
+    }
+    else
+        src2 <<= 1;
+
+    return push_inst(compiler, SELECT_FOP(op, FCMPS, FCMPD) | S1A(src1) | S2A(src2), FCC_IS_SET | MOVABLE_INS);
+}
+
 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
     sljit_si dst, sljit_sw dstw,
     sljit_si src, sljit_sw srcw)
 {
-    sljit_si dst_fr;
+    sljit_si dst_r;


     CHECK_ERROR();
-    check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
-    SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
-
     compiler->cache_arg = 0;
     compiler->cache_argw = 0;


-    if (GET_OPCODE(op) == SLJIT_CMPD) {
-        if (dst & SLJIT_MEM) {
-            FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw));
-            dst = TMP_FREG1;
-        }
-        else
-            dst <<= 1;
+    SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
+    SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);


-        if (src & SLJIT_MEM) {
-            FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0));
-            src = TMP_FREG2;
-        }
-        else
-            src <<= 1;
+    if (GET_OPCODE(op) == SLJIT_CONVD_FROMS)
+        op ^= SLJIT_SINGLE_OP;


-        return push_inst(compiler, SELECT_FOP(op, FCMPS, FCMPD) | S1A(dst) | S2A(src), FCC_IS_SET | MOVABLE_INS);
-    }
+    dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;


-    dst_fr = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
-
     if (src & SLJIT_MEM) {
-        FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_fr, src, srcw, dst, dstw));
-        src = dst_fr;
+        FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
+        src = dst_r;
     }
     else
         src <<= 1;


     switch (GET_OPCODE(op)) {
-        case SLJIT_MOVD:
-            if (src != dst_fr && dst_fr != TMP_FREG1) {
-                FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr) | S2A(src), MOVABLE_INS));
+    case SLJIT_MOVD:
+        if (src != dst_r) {
+            if (dst_r != TMP_FREG1) {
+                FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r) | S2A(src), MOVABLE_INS));
                 if (!(op & SLJIT_SINGLE_OP))
-                    FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS));
+                    FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS));
             }
-            break;
-        case SLJIT_NEGD:
-            FAIL_IF(push_inst(compiler, FNEGS | DA(dst_fr) | S2A(src), MOVABLE_INS));
-            if (dst_fr != src && !(op & SLJIT_SINGLE_OP))
-                FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS));
-            break;
-        case SLJIT_ABSD:
-            FAIL_IF(push_inst(compiler, FABSS | DA(dst_fr) | S2A(src), MOVABLE_INS));
-            if (dst_fr != src && !(op & SLJIT_SINGLE_OP))
-                FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS));
-            break;
+            else
+                dst_r = src;
+        }
+        break;
+    case SLJIT_NEGD:
+        FAIL_IF(push_inst(compiler, FNEGS | DA(dst_r) | S2A(src), MOVABLE_INS));
+        if (dst_r != src && !(op & SLJIT_SINGLE_OP))
+            FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS));
+        break;
+    case SLJIT_ABSD:
+        FAIL_IF(push_inst(compiler, FABSS | DA(dst_r) | S2A(src), MOVABLE_INS));
+        if (dst_r != src && !(op & SLJIT_SINGLE_OP))
+            FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS));
+        break;
+    case SLJIT_CONVD_FROMS:
+        FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOD, FDTOS) | DA(dst_r) | S2A(src), MOVABLE_INS));
+        op ^= SLJIT_SINGLE_OP;
+        break;
     }


-    if (dst_fr == TMP_FREG1) {
-        if (GET_OPCODE(op) == SLJIT_MOVD)
-            dst_fr = src;
-        FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_fr, dst, dstw, 0, 0));
-    }
-
+    if (dst & SLJIT_MEM)
+        FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0));
     return SLJIT_SUCCESS;
 }


@@ -1028,15 +1099,18 @@
     sljit_si src1, sljit_sw src1w,
     sljit_si src2, sljit_sw src2w)
 {
-    sljit_si dst_fr, flags = 0;
+    sljit_si dst_r, flags = 0;


     CHECK_ERROR();
     check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+    ADJUST_LOCAL_OFFSET(dst, dstw);
+    ADJUST_LOCAL_OFFSET(src1, src1w);
+    ADJUST_LOCAL_OFFSET(src2, src2w);


     compiler->cache_arg = 0;
     compiler->cache_argw = 0;


-    dst_fr = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2;
+    dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2;


     if (src1 & SLJIT_MEM) {
         if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
@@ -1080,23 +1154,23 @@


     switch (GET_OPCODE(op)) {
     case SLJIT_ADDD:
-        FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADDD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS));
+        FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADDD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
         break;


     case SLJIT_SUBD:
-        FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUBD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS));
+        FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUBD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
         break;


     case SLJIT_MULD:
-        FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMULD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS));
+        FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMULD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
         break;


     case SLJIT_DIVD:
-        FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIVD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS));
+        FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIVD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
         break;
     }


-    if (dst_fr == TMP_FREG2)
+    if (dst_r == TMP_FREG2)
         FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));


     return SLJIT_SUCCESS;


Modified: code/trunk/sljit/sljitNativeX86_32.c
===================================================================
--- code/trunk/sljit/sljitNativeX86_32.c    2014-05-28 11:10:58 UTC (rev 1482)
+++ code/trunk/sljit/sljitNativeX86_32.c    2014-06-17 15:48:37 UTC (rev 1483)
@@ -280,21 +280,17 @@
     SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
     /* Both size flags cannot be switched on. */
     SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
     /* SSE2 and immediate is not possible. */
     SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
     SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
         && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
         && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
-#endif


     size &= 0xf;
     inst_size = size;


-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
     if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
         inst_size++;
-#endif
     if (flags & EX86_PREF_66)
         inst_size++;


@@ -348,12 +344,10 @@

     /* Encoding the byte. */
     INC_SIZE(inst_size);
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
     if (flags & EX86_PREF_F2)
         *inst++ = 0xf2;
     if (flags & EX86_PREF_F3)
         *inst++ = 0xf3;
-#endif
     if (flags & EX86_PREF_66)
         *inst++ = 0x66;


@@ -366,15 +360,10 @@

         if ((a & SLJIT_IMM) || (a == 0))
             *buf_ptr = 0;
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
-        else if (!(flags & EX86_SSE2))
+        else if (!(flags & EX86_SSE2_OP1))
             *buf_ptr = reg_map[a] << 3;
         else
             *buf_ptr = a << 3;
-#else
-        else
-            *buf_ptr = reg_map[a] << 3;
-#endif
     }
     else {
         if (a & SLJIT_IMM) {
@@ -388,11 +377,7 @@
     }


     if (!(b & SLJIT_MEM))
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
-        *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2)) ? reg_map[b] : b);
-#else
-        *buf_ptr++ |= MOD_REG + reg_map[b];
-#endif
+        *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_map[b] : b);
     else if ((b & REG_MASK) != SLJIT_UNUSED) {
         if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_LOCALS_REG)) {
             if (immb != 0) {


Modified: code/trunk/sljit/sljitNativeX86_64.c
===================================================================
--- code/trunk/sljit/sljitNativeX86_64.c    2014-05-28 11:10:58 UTC (rev 1482)
+++ code/trunk/sljit/sljitNativeX86_64.c    2014-06-17 15:48:37 UTC (rev 1483)
@@ -156,9 +156,9 @@
         }
 #ifdef _WIN64
         if (scratches >= 5) {
-            SLJIT_COMPILE_ASSERT(reg_map[SLJIT_TEMPORARY_EREG2] >= 8, temporary_ereg2_is_hireg);
+            SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_EREG2] >= 8, temporary_ereg2_is_hireg);
             *inst++ = REX_B;
-            PUSH_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]);
+            PUSH_REG(reg_lmap[SLJIT_SCRATCH_EREG2]);
         }
 #endif


@@ -340,7 +340,7 @@
 #ifdef _WIN64
     if (compiler->scratches >= 5) {
         *inst++ = REX_B;
-        POP_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]);
+        POP_REG(reg_lmap[SLJIT_SCRATCH_EREG2]);
     }
 #endif
     if (compiler->saveds >= 1)
@@ -409,72 +409,64 @@
     SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
     /* Both size flags cannot be switched on. */
     SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
     /* SSE2 and immediate is not possible. */
     SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
     SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
         && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
         && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
-#endif


     size &= 0xf;
     inst_size = size;


-    if ((b & SLJIT_MEM) && !(b & OFFS_REG_MASK) && NOT_HALFWORD(immb)) {
-        if (emit_load_imm64(compiler, TMP_REG3, immb))
-            return NULL;
-        immb = 0;
-        if (b & REG_MASK)
-            b |= TO_OFFS_REG(TMP_REG3);
-        else
-            b |= TMP_REG3;
-    }
-
     if (!compiler->mode32 && !(flags & EX86_NO_REXW))
         rex |= REX_W;
     else if (flags & EX86_REX)
         rex |= REX;


-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
     if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
         inst_size++;
-#endif
     if (flags & EX86_PREF_66)
         inst_size++;


     /* Calculate size of b. */
     inst_size += 1; /* mod r/m byte. */
     if (b & SLJIT_MEM) {
+        if (!(b & OFFS_REG_MASK)) {
+            if (NOT_HALFWORD(immb)) {
+                if (emit_load_imm64(compiler, TMP_REG3, immb))
+                    return NULL;
+                immb = 0;
+                if (b & REG_MASK)
+                    b |= TO_OFFS_REG(TMP_REG3);
+                else
+                    b |= TMP_REG3;
+            }
+            else if (reg_lmap[b & REG_MASK] == 4)
+                b |= TO_OFFS_REG(SLJIT_LOCALS_REG);
+        }
+
         if ((b & REG_MASK) == SLJIT_UNUSED)
             inst_size += 1 + sizeof(sljit_si); /* SIB byte required to avoid RIP based addressing. */
         else {
             if (reg_map[b & REG_MASK] >= 8)
                 rex |= REX_B;
-            if (immb != 0 && !(b & OFFS_REG_MASK)) {
+            if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_LOCALS_REG))) {
                 /* Immediate operand. */
                 if (immb <= 127 && immb >= -128)
                     inst_size += sizeof(sljit_sb);
                 else
                     inst_size += sizeof(sljit_si);
             }
-        }


-        if ((b & REG_MASK) == SLJIT_LOCALS_REG && !(b & OFFS_REG_MASK))
-            b |= TO_OFFS_REG(SLJIT_LOCALS_REG);
-
-        if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) {
-            inst_size += 1; /* SIB byte. */
-            if (reg_map[OFFS_REG(b)] >= 8)
-                rex |= REX_X;
+            if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) {
+                inst_size += 1; /* SIB byte. */
+                if (reg_map[OFFS_REG(b)] >= 8)
+                    rex |= REX_X;
+            }
         }
     }
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
-    else if (!(flags & EX86_SSE2) && reg_map[b] >= 8)
+    else if (!(flags & EX86_SSE2_OP2) && reg_map[b] >= 8)
         rex |= REX_B;
-#else
-    else if (reg_map[b] >= 8)
-        rex |= REX_B;
-#endif


     if (a & SLJIT_IMM) {
         if (flags & EX86_BIN_INS) {
@@ -500,13 +492,8 @@
     else {
         SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
         /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
-        if (!(flags & EX86_SSE2) && reg_map[a] >= 8)
+        if (!(flags & EX86_SSE2_OP1) && reg_map[a] >= 8)
             rex |= REX_R;
-#else
-        if (reg_map[a] >= 8)
-            rex |= REX_R;
-#endif
     }


     if (rex)
@@ -517,12 +504,10 @@


     /* Encoding the byte. */
     INC_SIZE(inst_size);
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
     if (flags & EX86_PREF_F2)
         *inst++ = 0xf2;
     if (flags & EX86_PREF_F3)
         *inst++ = 0xf3;
-#endif
     if (flags & EX86_PREF_66)
         *inst++ = 0x66;
     if (rex)
@@ -536,15 +521,10 @@


         if ((a & SLJIT_IMM) || (a == 0))
             *buf_ptr = 0;
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
-        else if (!(flags & EX86_SSE2))
+        else if (!(flags & EX86_SSE2_OP1))
             *buf_ptr = reg_lmap[a] << 3;
         else
             *buf_ptr = a << 3;
-#else
-        else
-            *buf_ptr = reg_lmap[a] << 3;
-#endif
     }
     else {
         if (a & SLJIT_IMM) {
@@ -558,11 +538,7 @@
     }


     if (!(b & SLJIT_MEM))
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
-        *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2)) ? reg_lmap[b] : b);
-#else
-        *buf_ptr++ |= MOD_REG + reg_lmap[b];
-#endif
+        *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : b);
     else if ((b & REG_MASK) != SLJIT_UNUSED) {
         if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_LOCALS_REG)) {
             if (immb != 0) {


Modified: code/trunk/sljit/sljitNativeX86_common.c
===================================================================
--- code/trunk/sljit/sljitNativeX86_common.c    2014-05-28 11:10:58 UTC (rev 1482)
+++ code/trunk/sljit/sljitNativeX86_common.c    2014-06-17 15:48:37 UTC (rev 1483)
@@ -71,8 +71,8 @@
 };


 #define CHECK_EXTRA_REGS(p, w, do) \
-    if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \
-        w = compiler->scratches_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_sw); \
+    if (p >= SLJIT_SCRATCH_EREG1 && p <= SLJIT_SCRATCH_EREG2) { \
+        w = compiler->scratches_start + (p - SLJIT_SCRATCH_EREG1) * sizeof(sljit_sw); \
         p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
         do; \
     } \
@@ -133,9 +133,7 @@


#endif /* SLJIT_CONFIG_X86_32 */

-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
 #define TMP_FREG    (0)
-#endif


 /* Size flags for emit_x86_instruction: */
 #define EX86_BIN_INS        0x0010
@@ -145,13 +143,12 @@
 #define EX86_BYTE_ARG        0x0100
 #define EX86_HALF_ARG        0x0200
 #define EX86_PREF_66        0x0400
+#define EX86_PREF_F2        0x0800
+#define EX86_PREF_F3        0x1000
+#define EX86_SSE2_OP1        0x2000
+#define EX86_SSE2_OP2        0x4000
+#define EX86_SSE2        (EX86_SSE2_OP1 | EX86_SSE2_OP2)


-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
-#define EX86_SSE2        0x0800
-#define EX86_PREF_F2        0x1000
-#define EX86_PREF_F3        0x2000
-#endif
-
 /* --------------------------------------------------------------------- */
 /*  Instrucion forms                                                     */
 /* --------------------------------------------------------------------- */
@@ -179,6 +176,9 @@
 #define CMP_EAX_i32    0x3d
 #define CMP_r_rm    0x3b
 #define CMP_rm_r    0x39
+#define CVTPD2PS_x_xm    0x5a
+#define CVTSI2SD_x_rm    0x2a
+#define CVTTSD2SI_r_xm    0x2c
 #define DIV        (/* GROUP_F7 */ 6 << 3)
 #define DIVSD_x_xm    0x5e
 #define INT3        0xcc
@@ -239,6 +239,7 @@
 #define TEST_EAX_i32    0xa9
 #define TEST_rm_r    0x85
 #define UCOMISD_x_xm    0x2e
+#define UNPCKLPD_x_xm    0x14
 #define XCHG_EAX_r    0x90
 #define XCHG_r_rm    0x87
 #define XOR        (/* BINARY */ 6 << 3)
@@ -271,7 +272,7 @@
 /* Multithreading does not affect these static variables, since they store
    built-in CPU features. Therefore they can be overwritten by different threads
    if they detect the CPU features in the same time. */
-#if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
 static sljit_si cpu_has_sse2 = -1;
 #endif
 static sljit_si cpu_has_cmov = -1;
@@ -325,7 +326,7 @@


#endif /* _MSC_VER && _MSC_VER >= 1400 */

-#if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
     cpu_has_sse2 = (features >> 26) & 0x1;
 #endif
     cpu_has_cmov = (features >> 15) & 0x1;
@@ -2215,7 +2216,7 @@
 {
     check_sljit_get_register_index(reg);
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-    if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2
+    if (reg == SLJIT_SCRATCH_EREG1 || reg == SLJIT_SCRATCH_EREG2
             || reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2)
         return -1;
 #endif
@@ -2248,8 +2249,6 @@
 /*  Floating point operators                                             */
 /* --------------------------------------------------------------------- */


-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
-
 /* Alignment + 2 * 16 bytes. */
 static sljit_si sse2_data[3 + (4 + 4) * 2];
 static sljit_si *sse2_buffer;
@@ -2267,27 +2266,19 @@
     sse2_buffer[13] = 0x7fffffff;
 }


-#endif
-
 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
 {
 #ifdef SLJIT_IS_FPU_AVAILABLE
     return SLJIT_IS_FPU_AVAILABLE;
-#elif (defined SLJIT_SSE2 && SLJIT_SSE2)
-#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
     if (cpu_has_sse2 == -1)
         get_cpu_features();
     return cpu_has_sse2;
 #else /* SLJIT_DETECT_SSE2 */
     return 1;
 #endif /* SLJIT_DETECT_SSE2 */
-#else /* SLJIT_SSE2 */
-    return 0;
-#endif
 }


-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
-
 static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
     sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
 {
@@ -2324,31 +2315,89 @@
     return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
 }


-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
     sljit_si dst, sljit_sw dstw,
     sljit_si src, sljit_sw srcw)
 {
-    sljit_si dst_r;
+    sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+    sljit_ub *inst;


-    CHECK_ERROR();
-    check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+    if (GET_OPCODE(op) == SLJIT_CONVW_FROMD)
+        compiler->mode32 = 0;
+#endif


+    inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
+    FAIL_IF(!inst);
+    *inst++ = GROUP_0F;
+    *inst = CVTTSD2SI_r_xm;
+
+    if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
+        return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
+    return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si dst, sljit_sw dstw,
+    sljit_si src, sljit_sw srcw)
+{
+    sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
+    sljit_ub *inst;
+
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+    if (GET_OPCODE(op) == SLJIT_CONVD_FROMW)
+        compiler->mode32 = 0;
+#endif
+
+    if (src & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+        if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
+            srcw = (sljit_si)srcw;
+#endif
+        EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
+        src = TMP_REG1;
+        srcw = 0;
+    }
+
+    inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
+    FAIL_IF(!inst);
+    *inst++ = GROUP_0F;
+    *inst = CVTSI2SD_x_rm;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
     compiler->mode32 = 1;
 #endif
+    if (dst_r == TMP_FREG)
+        return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
+    return SLJIT_SUCCESS;
+}


-    if (GET_OPCODE(op) == SLJIT_CMPD) {
-        compiler->flags_saved = 0;
-        if (FAST_IS_REG(dst))
-            dst_r = dst;
-        else {
-            dst_r = TMP_FREG;
-            FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, dst, dstw));
-        }
-        return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), dst_r, src, srcw);
+static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si src1, sljit_sw src1w,
+    sljit_si src2, sljit_sw src2w)
+{
+    compiler->flags_saved = 0;
+    if (!FAST_IS_REG(src1)) {
+        FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
+        src1 = TMP_FREG;
     }
+    return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), src1, src2, src2w);
+}


-    if (op == SLJIT_MOVD) {
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+    sljit_si dst, sljit_sw dstw,
+    sljit_si src, sljit_sw srcw)
+{
+    sljit_si dst_r;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+    compiler->mode32 = 1;
+#endif
+
+    CHECK_ERROR();
+    SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
+
+    if (GET_OPCODE(op) == SLJIT_MOVD) {
         if (FAST_IS_REG(dst))
             return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw);
         if (FAST_IS_REG(src))
@@ -2357,6 +2406,25 @@
         return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
     }


+    if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) {
+        dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
+        if (FAST_IS_REG(src)) {
+            /* We overwrite the high bits of source. From SLJIT point of view,
+               this is not an issue.
+               Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
+            FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_SINGLE_OP, src, src, 0));
+        }
+        else {
+            FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_SINGLE_OP), TMP_FREG, src, srcw));
+            src = TMP_FREG;
+        }
+
+        FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_SINGLE_OP, dst_r, src, 0));
+        if (dst_r == TMP_FREG)
+            return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
+        return SLJIT_SUCCESS;
+    }
+
     if (SLOW_IS_REG(dst)) {
         dst_r = dst;
         if (dst != src)
@@ -2391,6 +2459,9 @@


     CHECK_ERROR();
     check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+    ADJUST_LOCAL_OFFSET(dst, dstw);
+    ADJUST_LOCAL_OFFSET(src1, src1w);
+    ADJUST_LOCAL_OFFSET(src2, src2w);


 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
     compiler->mode32 = 1;
@@ -2440,33 +2511,6 @@
     return SLJIT_SUCCESS;
 }


-#else
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
-    sljit_si dst, sljit_sw dstw,
-    sljit_si src, sljit_sw srcw)
-{
-    CHECK_ERROR();
-    /* Should cause an assertion fail. */
-    check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
-    compiler->error = SLJIT_ERR_UNSUPPORTED;
-    return SLJIT_ERR_UNSUPPORTED;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
-    sljit_si dst, sljit_sw dstw,
-    sljit_si src1, sljit_sw src1w,
-    sljit_si src2, sljit_sw src2w)
-{
-    CHECK_ERROR();
-    /* Should cause an assertion fail. */
-    check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
-    compiler->error = SLJIT_ERR_UNSUPPORTED;
-    return SLJIT_ERR_UNSUPPORTED;
-}
-
-#endif
-
 /* --------------------------------------------------------------------- */
 /*  Conditional instructions                                             */
 /* --------------------------------------------------------------------- */