[Pcre-svn] [1149] code/trunk: Major JIT compiler update with…

Kezdőlap
Üzenet törlése
Szerző: Subversion repository
Dátum:  
Címzett: pcre-svn
Tárgy: [Pcre-svn] [1149] code/trunk: Major JIT compiler update with experimental Sparc 32 support.
Revision: 1149
          http://vcs.pcre.org/viewvc?view=rev&revision=1149
Author:   zherczeg
Date:     2012-10-20 22:33:38 +0100 (Sat, 20 Oct 2012)


Log Message:
-----------
Major JIT compiler update with experimental Sparc 32 support.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/sljit/sljitConfig.h
    code/trunk/sljit/sljitConfigInternal.h
    code/trunk/sljit/sljitLir.c
    code/trunk/sljit/sljitLir.h
    code/trunk/sljit/sljitNativeARM_Thumb2.c
    code/trunk/sljit/sljitNativeARM_v5.c
    code/trunk/sljit/sljitNativeMIPS_32.c
    code/trunk/sljit/sljitNativeMIPS_common.c
    code/trunk/sljit/sljitNativePPC_32.c
    code/trunk/sljit/sljitNativePPC_64.c
    code/trunk/sljit/sljitNativePPC_common.c
    code/trunk/sljit/sljitNativeX86_32.c
    code/trunk/sljit/sljitNativeX86_64.c
    code/trunk/sljit/sljitNativeX86_common.c


Added Paths:
-----------
    code/trunk/sljit/sljitNativeSPARC_32.c
    code/trunk/sljit/sljitNativeSPARC_common.c


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2012-10-20 20:52:52 UTC (rev 1148)
+++ code/trunk/ChangeLog    2012-10-20 21:33:38 UTC (rev 1149)
@@ -122,7 +122,10 @@


24. Add support for 32-bit character strings, and UTF-32

+25. Major JIT compiler update (code refactoring and bugfixing).
+    Experimental Sparc 32 support is added.


+
Version 8.31 06-July-2012
-------------------------


Modified: code/trunk/sljit/sljitConfig.h
===================================================================
--- code/trunk/sljit/sljitConfig.h    2012-10-20 20:52:52 UTC (rev 1148)
+++ code/trunk/sljit/sljitConfig.h    2012-10-20 21:33:38 UTC (rev 1149)
@@ -47,6 +47,7 @@
 /* #define SLJIT_CONFIG_PPC_32 1 */
 /* #define SLJIT_CONFIG_PPC_64 1 */
 /* #define SLJIT_CONFIG_MIPS_32 1 */
+/* #define SLJIT_CONFIG_SPARC_32 1 */


/* #define SLJIT_CONFIG_AUTO 1 */
/* #define SLJIT_CONFIG_UNSUPPORTED 1 */

Modified: code/trunk/sljit/sljitConfigInternal.h
===================================================================
--- code/trunk/sljit/sljitConfigInternal.h    2012-10-20 20:52:52 UTC (rev 1148)
+++ code/trunk/sljit/sljitConfigInternal.h    2012-10-20 21:33:38 UTC (rev 1149)
@@ -39,6 +39,7 @@
    SLJIT_BIG_ENDIAN : big endian architecture
    SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!)
    SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() for more information
+   SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address


    Types and useful macros:
    sljit_b, sljit_ub : signed and unsigned 8 bit byte
@@ -57,6 +58,7 @@
     || (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
     || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
     || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
+    || (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
     || (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \
     || (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED))
 #error "An architecture must be selected"
@@ -71,6 +73,7 @@
     + (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
     + (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
     + (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
+    + (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
     + (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \
     + (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) >= 2
 #error "Multiple architectures are selected"
@@ -99,6 +102,8 @@
 #define SLJIT_CONFIG_PPC_32 1
 #elif defined(__mips__)
 #define SLJIT_CONFIG_MIPS_32 1
+#elif defined(__sparc__) || defined(__sparc)
+#define SLJIT_CONFIG_SPARC_32 1
 #else
 /* Unsupported architecture */
 #define SLJIT_CONFIG_UNSUPPORTED 1
@@ -309,7 +314,9 @@
 #if !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN)


 /* These macros are useful for the application. */
-#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
+    || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
+    || (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
 #define SLJIT_BIG_ENDIAN 1


#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
@@ -335,12 +342,22 @@
#error "Exactly one endianness must be selected"
#endif

+#ifndef SLJIT_INDIRECT_CALL
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32 && defined _AIX)
 /* It seems certain ppc compilers use an indirect addressing for functions
    which makes things complicated. */
 #define SLJIT_INDIRECT_CALL 1
 #endif
+#endif /* SLJIT_INDIRECT_CALL */


+#ifndef SLJIT_RETURN_ADDRESS_OFFSET
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#define SLJIT_RETURN_ADDRESS_OFFSET 8
+#else
+#define SLJIT_RETURN_ADDRESS_OFFSET 0
+#endif
+#endif /* SLJIT_RETURN_ADDRESS_OFFSET */
+
#ifndef SLJIT_SSE2

#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)

Modified: code/trunk/sljit/sljitLir.c
===================================================================
--- code/trunk/sljit/sljitLir.c    2012-10-20 20:52:52 UTC (rev 1148)
+++ code/trunk/sljit/sljitLir.c    2012-10-20 21:33:38 UTC (rev 1149)
@@ -122,7 +122,7 @@
 #endif


 #if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
-    #define IS_CONDITIONAL    0x04
+    #define IS_COND        0x04
     #define IS_BL        0x08
     /* cannot be encoded as branch */
     #define B_TYPE0        0x00
@@ -159,13 +159,35 @@
     #define PATCH_J        0x80


     /* instruction types */
-    #define UNMOVABLE_INS    0
+    #define MOVABLE_INS    0
     /* 1 - 31 last destination register */
-    #define FCSR_FCC    32
     /* no destination (i.e: store) */
-    #define MOVABLE_INS    33
+    #define UNMOVABLE_INS    32
+    /* FPU status register */
+    #define FCSR_FCC    33
 #endif


+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+    #define IS_MOVABLE    0x04
+    #define IS_COND        0x08
+    #define IS_CALL        0x10
+
+    #define PATCH_B        0x20
+    #define PATCH_CALL    0x40
+
+    /* instruction types */
+    #define MOVABLE_INS    0
+    /* 1 - 31 last destination register */
+    /* no destination (i.e: store) */
+    #define UNMOVABLE_INS    32
+
+    #define DST_INS_MASK    0xff
+
+    /* ICC_SET is the same as SET_FLAGS. */
+    #define ICC_IS_SET    (1 << 23)
+    #define FCC_IS_SET    (1 << 24)
+#endif
+
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 #define SLJIT_HAS_VARIABLE_LOCALS_OFFSET 1
 #endif
@@ -179,11 +201,6 @@
 #endif
 #endif


-#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1
-#define FIXED_LOCALS_OFFSET (4 * sizeof(sljit_w))
-#endif
-
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1
#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
@@ -198,6 +215,16 @@
#define FIXED_LOCALS_OFFSET ((6 + 8) * sizeof(sljit_w))
#endif

+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1
+#define FIXED_LOCALS_OFFSET (4 * sizeof(sljit_w))
+#endif
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1
+#define FIXED_LOCALS_OFFSET (23 * sizeof(sljit_w))
+#endif
+
#if (defined SLJIT_HAS_VARIABLE_LOCALS_OFFSET && SLJIT_HAS_VARIABLE_LOCALS_OFFSET)

 #define ADJUST_LOCAL_OFFSET(p, i) \
@@ -299,6 +326,10 @@
     compiler->delay_slot = UNMOVABLE_INS;
 #endif


+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+    compiler->delay_slot = UNMOVABLE_INS;
+#endif
+
 #if (defined SLJIT_NEEDS_COMPILER_INIT && SLJIT_NEEDS_COMPILER_INIT)
     if (!compiler_initialized) {
         init_compiler();
@@ -1092,7 +1123,7 @@


     SLJIT_ASSERT(sljit_is_fpu_available());
     SLJIT_ASSERT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP)));
-    SLJIT_ASSERT((type & 0xff) >= SLJIT_C_FLOAT_EQUAL && (type & 0xff) <= SLJIT_C_FLOAT_NOT_NAN);
+    SLJIT_ASSERT((type & 0xff) >= SLJIT_C_FLOAT_EQUAL && (type & 0xff) <= SLJIT_C_FLOAT_ORDERED);
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
     FUNCTION_FCHECK(src1, src1w);
     FUNCTION_FCHECK(src2, src2w);
@@ -1255,6 +1286,8 @@
     #include "sljitNativePPC_common.c"
 #elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
     #include "sljitNativeMIPS_common.c"
+#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+    #include "sljitNativeSPARC_common.c"
 #endif


#if !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
@@ -1375,7 +1408,7 @@

/* Empty function bodies for those machines, which are not (yet) supported. */

-SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name()
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
 {
     return "unsupported";
 }


Modified: code/trunk/sljit/sljitLir.h
===================================================================
--- code/trunk/sljit/sljitLir.h    2012-10-20 20:52:52 UTC (rev 1148)
+++ code/trunk/sljit/sljitLir.h    2012-10-20 21:33:38 UTC (rev 1149)
@@ -260,6 +260,12 @@
     sljit_w cache_argw;
 #endif


+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+    int delay_slot;
+    int cache_arg;
+    sljit_w cache_argw;
+#endif
+
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     FILE* verbose;
 #endif
@@ -542,7 +548,9 @@
 /* Flags: I | E | O | K */
 #define SLJIT_NEG            21
 /* Count leading zeroes
-   Flags: I | E | K */
+   Flags: I | E | K
+   Important note! Sparc 32 does not support K flag, since
+   the required popc instruction is introduced only in sparc 64. */
 #define SLJIT_CLZ            22


SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op,
@@ -617,7 +625,7 @@
SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void);

 /* Note: dst is the left and src is the right operand for SLJIT_FCMP.
-   Note: NaN check is always performed. If SLJIT_C_FLOAT_NAN is set,
+   Note: NaN check is always performed. If SLJIT_C_FLOAT_UNORDERED is set,
          the comparison result is unpredictable.
    Flags: E | S (see SLJIT_C_FLOAT_*) */
 #define SLJIT_FCMP            34
@@ -677,8 +685,8 @@
 #define SLJIT_C_FLOAT_GREATER_EQUAL    17
 #define SLJIT_C_FLOAT_GREATER        18
 #define SLJIT_C_FLOAT_LESS_EQUAL    19
-#define SLJIT_C_FLOAT_NAN        20
-#define SLJIT_C_FLOAT_NOT_NAN        21
+#define SLJIT_C_FLOAT_UNORDERED        20
+#define SLJIT_C_FLOAT_ORDERED        21


 #define SLJIT_JUMP            22
 #define SLJIT_FAST_CALL            23
@@ -716,7 +724,7 @@
    sljit_emit_jump. However some architectures (i.e: MIPS) may employ
    special optimizations here. It is suggested to use this comparison form
    when appropriate.
-    type must be between SLJIT_C_FLOAT_EQUAL and SLJIT_C_FLOAT_NOT_NAN
+    type must be between SLJIT_C_FLOAT_EQUAL and SLJIT_C_FLOAT_ORDERED
     type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP
    Flags: destroy flags.
    Note: if either operand is NaN, the behaviour is undefined for
@@ -741,7 +749,7 @@


 /* If op == SLJIT_MOV:
      Set dst to 1 if condition is fulfilled, 0 otherwise
-       type must be between SLJIT_C_EQUAL and SLJIT_C_FLOAT_NOT_NAN
+       type must be between SLJIT_C_EQUAL and SLJIT_C_FLOAT_ORDERED
      Flags: - (never set any flags)
    If op == SLJIT_OR
      Dst is used as src as well, and set its lowest bit to 1 if


Modified: code/trunk/sljit/sljitNativeARM_Thumb2.c
===================================================================
--- code/trunk/sljit/sljitNativeARM_Thumb2.c    2012-10-20 20:52:52 UTC (rev 1148)
+++ code/trunk/sljit/sljitNativeARM_Thumb2.c    2012-10-20 21:33:38 UTC (rev 1149)
@@ -24,11 +24,14 @@
  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */


-SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name()
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
 {
     return "ARM-Thumb2" SLJIT_CPUINFO;
 }


+/* Length of an instruction word. */
+typedef sljit_ui sljit_ins;
+
 /* Last register + 1. */
 #define TMP_REG1    (SLJIT_NO_REGISTERS + 1)
 #define TMP_REG2    (SLJIT_NO_REGISTERS + 2)
@@ -40,7 +43,7 @@


 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
-  0, 0, 1, 2, 12, 5, 6, 7, 8, 10, 11, 13, 3, 4, 14, 15
+    0, 0, 1, 2, 12, 5, 6, 7, 8, 10, 11, 13, 3, 4, 14, 15
 };


 #define COPY_BITS(src, from, to, bits) \
@@ -75,8 +78,6 @@
 #define IMM12(imm) \
     (COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))


-typedef sljit_ui sljit_ins;
-
 /* --------------------------------------------------------------------- */
 /*  Instrucion forms                                                     */
 /* --------------------------------------------------------------------- */
@@ -234,7 +235,7 @@
         diff = ((sljit_w)(code + jump->u.label->size) - (sljit_w)(code_ptr + 2)) >> 1;
     }


-    if (jump->flags & IS_CONDITIONAL) {
+    if (jump->flags & IS_COND) {
         SLJIT_ASSERT(!(jump->flags & IS_BL));
         if (diff <= 127 && diff >= -128) {
             jump->flags |= B_TYPE1;
@@ -303,24 +304,24 @@
     switch (type) {
     case 1:
         /* Encoding T1 of 'B' instruction */
-        SLJIT_ASSERT(diff <= 127 && diff >= -128 && (jump->flags & IS_CONDITIONAL));
+        SLJIT_ASSERT(diff <= 127 && diff >= -128 && (jump->flags & IS_COND));
         jump_inst[0] = 0xd000 | (jump->flags & 0xf00) | (diff & 0xff);
         return;
     case 2:
         /* Encoding T3 of 'B' instruction */
-        SLJIT_ASSERT(diff <= 524287 && diff >= -524288 && (jump->flags & IS_CONDITIONAL));
+        SLJIT_ASSERT(diff <= 524287 && diff >= -524288 && (jump->flags & IS_COND));
         jump_inst[0] = 0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1);
         jump_inst[1] = 0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | (diff & 0x7ff);
         return;
     case 3:
-        SLJIT_ASSERT(jump->flags & IS_CONDITIONAL);
+        SLJIT_ASSERT(jump->flags & IS_COND);
         *jump_inst++ = IT | ((jump->flags >> 4) & 0xf0) | 0x8;
         diff--;
         type = 5;
         break;
     case 4:
         /* Encoding T2 of 'B' instruction */
-        SLJIT_ASSERT(diff <= 1023 && diff >= -1024 && !(jump->flags & IS_CONDITIONAL));
+        SLJIT_ASSERT(diff <= 1023 && diff >= -1024 && !(jump->flags & IS_COND));
         jump_inst[0] = 0xe000 | (diff & 0x7ff);
         return;
     }
@@ -385,7 +386,7 @@
                 label = label->next;
             }
             if (jump && jump->addr == half_count) {
-                    jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_CONDITIONAL) ? 10 : 8);
+                    jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8);
                     code_ptr -= detect_jump_type(jump, code_ptr, code);
                     jump = jump->next;
             }
@@ -1121,6 +1122,10 @@
     return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
 }


+/* --------------------------------------------------------------------- */
+/*  Entry, exit                                                          */
+/* --------------------------------------------------------------------- */
+
 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size)
 {
     int size;
@@ -1201,7 +1206,6 @@


     CHECK_ERROR();
     check_sljit_emit_return(compiler, op, src, srcw);
-    ADJUST_LOCAL_OFFSET(src, srcw);


     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));


@@ -1766,11 +1770,11 @@
         return 0xd;


     case SLJIT_C_OVERFLOW:
-    case SLJIT_C_FLOAT_NAN:
+    case SLJIT_C_FLOAT_UNORDERED:
         return 0x6;


     case SLJIT_C_NOT_OVERFLOW:
-    case SLJIT_C_FLOAT_NOT_NAN:
+    case SLJIT_C_FLOAT_ORDERED:
         return 0x7;


     default: /* SLJIT_JUMP */
@@ -1810,7 +1814,7 @@
     /* In ARM, we don't need to touch the arguments. */
     PTR_FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0));
     if (type < SLJIT_JUMP) {
-        jump->flags |= IS_CONDITIONAL;
+        jump->flags |= IS_COND;
         cc = get_cc(type);
         jump->flags |= cc << 8;
         PTR_FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));


Modified: code/trunk/sljit/sljitNativeARM_v5.c
===================================================================
--- code/trunk/sljit/sljitNativeARM_v5.c    2012-10-20 20:52:52 UTC (rev 1148)
+++ code/trunk/sljit/sljitNativeARM_v5.c    2012-10-20 21:33:38 UTC (rev 1149)
@@ -24,7 +24,7 @@
  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */


-SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name()
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
 {
 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
     return "ARMv7" SLJIT_CPUINFO;
@@ -56,7 +56,7 @@


 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
-  0, 0, 1, 2, 10, 11, 4, 5, 6, 7, 8, 13, 3, 12, 14, 15
+    0, 0, 1, 2, 10, 11, 4, 5, 6, 7, 8, 13, 3, 12, 14, 15
 };


 #define RM(rm) (reg_map[rm])
@@ -793,6 +793,10 @@
     return code;
 }


+/* --------------------------------------------------------------------- */
+/*  Entry, exit                                                          */
+/* --------------------------------------------------------------------- */
+
 /* emit_op inp_flags.
    WRITE_BACK must be the first, since it is a flag. */
 #define WRITE_BACK    0x01
@@ -902,7 +906,6 @@


     CHECK_ERROR();
     check_sljit_emit_return(compiler, op, src, srcw);
-    ADJUST_LOCAL_OFFSET(src, srcw);


     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));


@@ -1011,6 +1014,74 @@
     sljit_w mul_inst;


     switch (GET_OPCODE(op)) {
+    case SLJIT_MOV:
+        SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
+        if (dst != src2) {
+            if (src2 & SRC2_IMM) {
+                if (flags & INV_IMM)
+                    EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
+                EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
+            }
+            EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, reg_map[src2]);
+        }
+        return SLJIT_SUCCESS;
+
+    case SLJIT_MOV_UB:
+    case SLJIT_MOV_SB:
+        SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
+        if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) {
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+            if (op == SLJIT_MOV_UB)
+                return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff));
+            EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2]));
+            return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_UB ? 0x20 : 0x40) | reg_map[dst]));
+#else
+            return push_inst(compiler, (op == SLJIT_MOV_UB ? UXTB : SXTB) | RD(dst) | RM(src2));
+#endif
+        }
+        else if (dst != src2) {
+            SLJIT_ASSERT(src2 & SRC2_IMM);
+            if (flags & INV_IMM)
+                EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
+            EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
+        }
+        return SLJIT_SUCCESS;
+
+    case SLJIT_MOV_UH:
+    case SLJIT_MOV_SH:
+        SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
+        if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) {
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+            EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2]));
+            return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_UH ? 0x20 : 0x40) | reg_map[dst]));
+#else
+            return push_inst(compiler, (op == SLJIT_MOV_UH ? UXTH : SXTH) | RD(dst) | RM(src2));
+#endif
+        }
+        else if (dst != src2) {
+            SLJIT_ASSERT(src2 & SRC2_IMM);
+            if (flags & INV_IMM)
+                EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
+            EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
+        }
+        return SLJIT_SUCCESS;
+
+    case SLJIT_NOT:
+        if (src2 & SRC2_IMM) {
+            if (flags & INV_IMM)
+                EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
+            EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
+        }
+        EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, RM(src2));
+
+    case SLJIT_CLZ:
+        SLJIT_ASSERT(!(flags & INV_IMM));
+        SLJIT_ASSERT(!(src2 & SRC2_IMM));
+        FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2)));
+        if (flags & SET_FLAGS)
+            EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(CMP_DP, SLJIT_UNUSED, dst, SRC2_IMM);
+        return SLJIT_SUCCESS;
+
     case SLJIT_ADD:
         SLJIT_ASSERT(!(flags & INV_IMM));
         EMIT_DATA_PROCESS_INS_AND_RETURN(ADD_DP);
@@ -1080,74 +1151,6 @@


     case SLJIT_ASHR:
         EMIT_SHIFT_INS_AND_RETURN(2);
-
-    case SLJIT_MOV:
-        SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
-        if (dst != src2) {
-            if (src2 & SRC2_IMM) {
-                if (flags & INV_IMM)
-                    EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
-                EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
-            }
-            EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, reg_map[src2]);
-        }
-        return SLJIT_SUCCESS;
-
-    case SLJIT_MOV_UB:
-    case SLJIT_MOV_SB:
-        SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
-        if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) {
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
-            if (op == SLJIT_MOV_UB)
-                return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff));
-            EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2]));
-            return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_UB ? 0x20 : 0x40) | reg_map[dst]));
-#else
-            return push_inst(compiler, (op == SLJIT_MOV_UB ? UXTB : SXTB) | RD(dst) | RM(src2));
-#endif
-        }
-        else if (dst != src2) {
-            SLJIT_ASSERT(src2 & SRC2_IMM);
-            if (flags & INV_IMM)
-                EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
-            EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
-        }
-        return SLJIT_SUCCESS;
-
-    case SLJIT_MOV_UH:
-    case SLJIT_MOV_SH:
-        SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
-        if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) {
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
-            EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2]));
-            return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_UH ? 0x20 : 0x40) | reg_map[dst]));
-#else
-            return push_inst(compiler, (op == SLJIT_MOV_UH ? UXTH : SXTH) | RD(dst) | RM(src2));
-#endif
-        }
-        else if (dst != src2) {
-            SLJIT_ASSERT(src2 & SRC2_IMM);
-            if (flags & INV_IMM)
-                EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
-            EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
-        }
-        return SLJIT_SUCCESS;
-
-    case SLJIT_NOT:
-        if (src2 & SRC2_IMM) {
-            if (flags & INV_IMM)
-                EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
-            EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
-        }
-        EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, RM(src2));
-
-    case SLJIT_CLZ:
-        SLJIT_ASSERT(!(flags & INV_IMM));
-        SLJIT_ASSERT(!(src2 & SRC2_IMM));
-        FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2)));
-        if (flags & SET_FLAGS)
-            EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(CMP_DP, SLJIT_UNUSED, dst, SRC2_IMM);
-        return SLJIT_SUCCESS;
     }
     SLJIT_ASSERT_STOP();
     return SLJIT_SUCCESS;
@@ -1982,7 +1985,7 @@
    1 - vfp */
 static int arm_fpu_type = -1;


-static void init_compiler()
+static void init_compiler(void)
 {
     if (arm_fpu_type != -1)
         return;
@@ -2080,7 +2083,7 @@
     int dst, sljit_w dstw,
     int src, sljit_w srcw)
 {
-    int dst_freg;
+    int dst_fr;


     CHECK_ERROR();
     check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
@@ -2102,28 +2105,31 @@
         return SLJIT_SUCCESS;
     }


-    dst_freg = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst;
+    dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst;


     if (src > SLJIT_FLOAT_REG4) {
-        FAIL_IF(emit_fpu_data_transfer(compiler, dst_freg, 1, src, srcw));
-        src = dst_freg;
+        FAIL_IF(emit_fpu_data_transfer(compiler, dst_fr, 1, src, srcw));
+        src = dst_fr;
     }


     switch (op) {
         case SLJIT_FMOV:
-            if (src != dst_freg && dst_freg != TMP_FREG1)
-                EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMOV_F64, dst_freg, src, 0));
+            if (src != dst_fr && dst_fr != TMP_FREG1)
+                EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMOV_F64, dst_fr, src, 0));
             break;
         case SLJIT_FNEG:
-            EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VNEG_F64, dst_freg, src, 0));
+            EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VNEG_F64, dst_fr, src, 0));
             break;
         case SLJIT_FABS:
-            EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VABS_F64, dst_freg, src, 0));
+            EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VABS_F64, dst_fr, src, 0));
             break;
     }


-    if (dst_freg == TMP_FREG1)
-        FAIL_IF(emit_fpu_data_transfer(compiler, src, 0, dst, dstw));
+    if (dst_fr == TMP_FREG1) {
+        if (op == SLJIT_FMOV)
+            dst_fr = src;
+        FAIL_IF(emit_fpu_data_transfer(compiler, dst_fr, 0, dst, dstw));
+    }


     return SLJIT_SUCCESS;
 }
@@ -2133,7 +2139,7 @@
     int src1, sljit_w src1w,
     int src2, sljit_w src2w)
 {
-    int dst_freg;
+    int dst_fr;


     CHECK_ERROR();
     check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
@@ -2141,7 +2147,7 @@
     compiler->cache_arg = 0;
     compiler->cache_argw = 0;


-    dst_freg = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst;
+    dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst;


     if (src2 > SLJIT_FLOAT_REG4) {
         FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src2, src2w));
@@ -2155,23 +2161,23 @@


     switch (op) {
     case SLJIT_FADD:
-        EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VADD_F64, dst_freg, src2, src1));
+        EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VADD_F64, dst_fr, src2, src1));
         break;


     case SLJIT_FSUB:
-        EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VSUB_F64, dst_freg, src2, src1));
+        EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VSUB_F64, dst_fr, src2, src1));
         break;


     case SLJIT_FMUL:
-        EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMUL_F64, dst_freg, src2, src1));
+        EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMUL_F64, dst_fr, src2, src1));
         break;


     case SLJIT_FDIV:
-        EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VDIV_F64, dst_freg, src2, src1));
+        EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VDIV_F64, dst_fr, src2, src1));
         break;
     }


-    if (dst_freg == TMP_FREG1)
+    if (dst_fr == TMP_FREG1)
         FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 0, dst, dstw));


     return SLJIT_SUCCESS;
@@ -2270,11 +2276,11 @@
         return 0xd0000000;


     case SLJIT_C_OVERFLOW:
-    case SLJIT_C_FLOAT_NAN:
+    case SLJIT_C_FLOAT_UNORDERED:
         return 0x60000000;


     case SLJIT_C_NOT_OVERFLOW:
-    case SLJIT_C_FLOAT_NOT_NAN:
+    case SLJIT_C_FLOAT_ORDERED:
         return 0x70000000;


     default: /* SLJIT_JUMP */


Modified: code/trunk/sljit/sljitNativeMIPS_32.c
===================================================================
--- code/trunk/sljit/sljitNativeMIPS_32.c    2012-10-20 20:52:52 UTC (rev 1148)
+++ code/trunk/sljit/sljitNativeMIPS_32.c    2012-10-20 21:33:38 UTC (rev 1149)
@@ -72,6 +72,85 @@
     int overflow_ra = 0;


     switch (GET_OPCODE(op)) {
+    case SLJIT_MOV:
+    case SLJIT_MOV_UI:
+    case SLJIT_MOV_SI:
+        SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+        if (dst != src2)
+            return push_inst(compiler, ADDU | S(src2) | TA(0) | D(dst), DR(dst));
+        return SLJIT_SUCCESS;
+
+    case SLJIT_MOV_UB:
+    case SLJIT_MOV_SB:
+        SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+        if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+            if (op == SLJIT_MOV_SB) {
+#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
+                return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst));
+#else
+                FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
+                return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst));
+#endif
+            }
+            return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
+        }
+        else if (dst != src2)
+            SLJIT_ASSERT_STOP();
+        return SLJIT_SUCCESS;
+
+    case SLJIT_MOV_UH:
+    case SLJIT_MOV_SH:
+        SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+        if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+            if (op == SLJIT_MOV_SH) {
+#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
+                return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst));
+#else
+                FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
+                return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst));
+#endif
+            }
+            return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
+        }
+        else if (dst != src2)
+            SLJIT_ASSERT_STOP();
+        return SLJIT_SUCCESS;
+
+    case SLJIT_NOT:
+        SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+        if (op & SLJIT_SET_E)
+            FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+        if (CHECK_FLAGS(SLJIT_SET_E))
+            FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst)));
+        return SLJIT_SUCCESS;
+
+    case SLJIT_CLZ:
+        SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
+        if (op & SLJIT_SET_E)
+            FAIL_IF(push_inst(compiler, CLZ | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
+        if (CHECK_FLAGS(SLJIT_SET_E))
+            FAIL_IF(push_inst(compiler, CLZ | S(src2) | T(dst) | D(dst), DR(dst)));
+#else
+        if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) {
+            FAIL_IF(push_inst(compiler, SRL | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG));
+            return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG);
+        }
+        /* Nearly all instructions are unmovable in the following sequence. */
+        FAIL_IF(push_inst(compiler, ADDU_W | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
+        /* Check zero. */
+        FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS));
+        FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM(32), UNMOVABLE_INS));
+        FAIL_IF(push_inst(compiler, ADDIU_W | SA(0) | T(dst) | IMM(-1), DR(dst)));
+        /* Loop for searching the highest bit. */
+        FAIL_IF(push_inst(compiler, ADDIU_W | S(dst) | T(dst) | IMM(1), DR(dst)));
+        FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
+        FAIL_IF(push_inst(compiler, SLL | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS));
+        if (op & SLJIT_SET_E)
+            return push_inst(compiler, ADDU_W | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG);
+#endif
+        return SLJIT_SUCCESS;
+
     case SLJIT_ADD:
         if (flags & SRC2_IMM) {
             if (op & SLJIT_SET_O) {
@@ -293,97 +372,16 @@
     case SLJIT_ASHR:
         EMIT_SHIFT(SRA, SRAV);
         return SLJIT_SUCCESS;
-
-    case SLJIT_MOV:
-    case SLJIT_MOV_UI:
-    case SLJIT_MOV_SI:
-        SLJIT_ASSERT(src1 == TMP_REG1);
-        if (dst != src2)
-            return push_inst(compiler, ADDU | S(src2) | TA(0) | D(dst), DR(dst));
-        return SLJIT_SUCCESS;
-
-    case SLJIT_MOV_UB:
-    case SLJIT_MOV_SB:
-        SLJIT_ASSERT(src1 == TMP_REG1);
-        if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
-            if (op == SLJIT_MOV_SB) {
-#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
-                return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst));
-#else
-                FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
-                return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst));
-#endif
-            }
-            return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
-        }
-        else if (dst != src2)
-            SLJIT_ASSERT_STOP();
-        return SLJIT_SUCCESS;
-
-    case SLJIT_MOV_UH:
-    case SLJIT_MOV_SH:
-        SLJIT_ASSERT(src1 == TMP_REG1);
-        if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
-            if (op == SLJIT_MOV_SH) {
-#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
-                return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst));
-#else
-                FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
-                return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst));
-#endif
-            }
-            return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
-        }
-        else if (dst != src2)
-            SLJIT_ASSERT_STOP();
-        return SLJIT_SUCCESS;
-
-    case SLJIT_NOT:
-        SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
-        if (op & SLJIT_SET_E)
-            FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
-        if (CHECK_FLAGS(SLJIT_SET_E))
-            FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst)));
-        return SLJIT_SUCCESS;
-
-    case SLJIT_CLZ:
-        SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
-#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
-        if (op & SLJIT_SET_E)
-            FAIL_IF(push_inst(compiler, CLZ | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
-        if (CHECK_FLAGS(SLJIT_SET_E))
-            FAIL_IF(push_inst(compiler, CLZ | S(src2) | T(dst) | D(dst), DR(dst)));
-#else
-        if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) {
-            FAIL_IF(push_inst(compiler, SRL | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG));
-            return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG);
-        }
-        /* Nearly all instructions are unmovable in the following sequence. */
-        FAIL_IF(push_inst(compiler, ADDU_W | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
-        /* Check zero. */
-        FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(6), UNMOVABLE_INS));
-        FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM(32), UNMOVABLE_INS));
-        /* Check sign bit. */
-        FAIL_IF(push_inst(compiler, BLTZ | S(TMP_REG1) | IMM(4), UNMOVABLE_INS));
-        FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM(0), UNMOVABLE_INS));
-        /* Loop for searching the highest bit. */
-        FAIL_IF(push_inst(compiler, SLL | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), DR(TMP_REG1)));
-        FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
-        FAIL_IF(push_inst(compiler, ADDIU_W | S(dst) | T(dst) | IMM(1), UNMOVABLE_INS));
-        if (op & SLJIT_SET_E)
-            return push_inst(compiler, ADDU_W | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG);
-#endif
-        return SLJIT_SUCCESS;
     }


     SLJIT_ASSERT_STOP();
     return SLJIT_SUCCESS;
 }


-static SLJIT_INLINE int emit_const(struct sljit_compiler *compiler, int reg, sljit_w init_value)
+static SLJIT_INLINE int emit_const(struct sljit_compiler *compiler, int dst, sljit_w init_value)
 {
-    FAIL_IF(push_inst(compiler, LUI | T(reg) | IMM(init_value >> 16), DR(reg)));
-    return push_inst(compiler, ORI | S(reg) | T(reg) | IMM(init_value), DR(reg));
+    FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 16), DR(dst)));
+    return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst));
 }


SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)

Modified: code/trunk/sljit/sljitNativeMIPS_common.c
===================================================================
--- code/trunk/sljit/sljitNativeMIPS_common.c    2012-10-20 20:52:52 UTC (rev 1148)
+++ code/trunk/sljit/sljitNativeMIPS_common.c    2012-10-20 21:33:38 UTC (rev 1149)
@@ -24,7 +24,7 @@
  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */


-SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name()
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
 {
     return "MIPS" SLJIT_CPUINFO;
 }
@@ -41,15 +41,15 @@
 #define TMP_REG3    (SLJIT_NO_REGISTERS + 3)


 /* For position independent code, t9 must contain the function address. */
-#define PIC_ADDR_REG        TMP_REG2
+#define PIC_ADDR_REG    TMP_REG2


 /* TMP_EREG1 is used mainly for literal encoding on 64 bit. */
-#define TMP_EREG1        15
-#define TMP_EREG2        24
+#define TMP_EREG1    15
+#define TMP_EREG2    24
 /* Floating point status register. */
-#define FCSR_REG        31
+#define FCSR_REG    31
 /* Return address register. */
-#define RETURN_ADDR_REG        31
+#define RETURN_ADDR_REG    31


 /* Flags are keept in volatile registers. */
 #define EQUAL_FLAG    7
@@ -60,9 +60,13 @@
 #define GREATER_FLAG    13
 #define OVERFLOW_FLAG    14


-#define TMP_FREG1    (SLJIT_FLOAT_REG4 + 1)
-#define TMP_FREG2    (SLJIT_FLOAT_REG4 + 2)
+#define TMP_FREG1    ((SLJIT_FLOAT_REG4 + 1) << 1)
+#define TMP_FREG2    ((SLJIT_FLOAT_REG4 + 2) << 1)


+static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
+    0, 2, 5, 6, 3, 8, 16, 17, 18, 19, 20, 29, 4, 25, 9
+};
+
 /* --------------------------------------------------------------------- */
 /*  Instrucion forms                                                     */
 /* --------------------------------------------------------------------- */
@@ -74,9 +78,9 @@
 #define SA(s)        ((s) << 21)
 #define TA(t)        ((t) << 16)
 #define DA(d)        ((d) << 11)
-#define FT(t)        ((t) << (16 + 1))
-#define FS(s)        ((s) << (11 + 1))
-#define FD(d)        ((d) << (6 + 1))
+#define FT(t)        ((t) << 16)
+#define FS(s)        ((s) << 11)
+#define FD(d)        ((d) << 6)
 #define IMM(imm)    ((imm) & 0xffff)
 #define SH_IMM(imm)    ((imm & 0x1f) << 6)


@@ -114,7 +118,6 @@
 #define JALR        (HI(0) | LO(9))
 #define JR        (HI(0) | LO(8))
 #define LD        (HI(55))
-#define LDC1        (HI(53))
 #define LUI        (HI(15))
 #define LW        (HI(35))
 #define NEG_D        (HI(17) | FMT_D | LO(7))
@@ -132,7 +135,6 @@
 #define OR        (HI(0) | LO(37))
 #define ORI        (HI(13))
 #define SD        (HI(63))
-#define SDC1        (HI(61))
 #define SLT        (HI(0) | LO(42))
 #define SLTI        (HI(10))
 #define SLTIU        (HI(11))
@@ -172,14 +174,12 @@
 #define SIMM_MIN    (-0x8000)
 #define UIMM_MAX    (0xffff)


-static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = {
-  0, 2, 5, 6, 3, 8, 16, 17, 18, 19, 20, 29, 4, 25, 9
-};
-
 /* dest_reg is the absolute name of the register
    Useful for reordering instructions in the delay slot. */
 static int push_inst(struct sljit_compiler *compiler, sljit_ins ins, int delay_slot)
 {
+    SLJIT_ASSERT(delay_slot == MOVABLE_INS || delay_slot >= UNMOVABLE_INS
+        || delay_slot == ((ins >> 11) & 0x1f) || delay_slot == ((ins >> 16) & 0x1f));
     sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
     FAIL_IF(!ptr);
     *ptr = ins;
@@ -335,7 +335,7 @@
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
                 jump->addr = (sljit_uw)(code_ptr - 3);
 #else
-                jump->addr = (sljit_uw)(code_ptr - 6);
+#error "Implementation required"
 #endif
                 code_ptr = optimize_jump(jump, code_ptr, code);
                 jump = jump->next;
@@ -386,10 +386,7 @@
             buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
             buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
 #else
-            buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff);
-            buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff);
-            buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff);
-            buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff);
+#error "Implementation required"
 #endif
         } while (0);
         jump = jump->next;
@@ -406,42 +403,43 @@
     return code;
 }


+/* --------------------------------------------------------------------- */
+/*  Entry, exit                                                          */
+/* --------------------------------------------------------------------- */
+
 /* Creates an index in data_transfer_insts array. */
+#define LOAD_DATA    0x01
 #define WORD_DATA    0x00
-#define BYTE_DATA    0x01
-#define HALF_DATA    0x02
-#define INT_DATA    0x03
-#define SIGNED_DATA    0x04
-#define LOAD_DATA    0x08
+#define BYTE_DATA    0x02
+#define HALF_DATA    0x04
+#define INT_DATA    0x06
+#define SIGNED_DATA    0x08
+/* Separates integer and floating point registers */
+#define GPR_REG        0x0f
+#define DOUBLE_DATA    0x10


-#define MEM_MASK    0x0f
+#define MEM_MASK    0x1f


-#define WRITE_BACK    0x00010
-#define ARG_TEST    0x00020
-#define CUMULATIVE_OP    0x00040
-#define LOGICAL_OP    0x00080
-#define IMM_OP        0x00100
-#define SRC2_IMM    0x00200
+#define WRITE_BACK    0x00020
+#define ARG_TEST    0x00040
+#define CUMULATIVE_OP    0x00080
+#define LOGICAL_OP    0x00100
+#define IMM_OP        0x00200
+#define SRC2_IMM    0x00400


-#define UNUSED_DEST    0x00400
-#define REG_DEST    0x00800
-#define REG1_SOURCE    0x01000
-#define REG2_SOURCE    0x02000
-#define SLOW_SRC1    0x04000
-#define SLOW_SRC2    0x08000
-#define SLOW_DEST    0x10000
+#define UNUSED_DEST    0x00800
+#define REG_DEST    0x01000
+#define REG1_SOURCE    0x02000
+#define REG2_SOURCE    0x04000
+#define SLOW_SRC1    0x08000
+#define SLOW_SRC2    0x10000
+#define SLOW_DEST    0x20000


 /* Only these flags are set. UNUSED_DEST is not set when no flags should be set. */
 #define CHECK_FLAGS(list) \
     (!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list))))


 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-#include "sljitNativeMIPS_32.c"
-#else
-#include "sljitNativeMIPS_64.c"
-#endif
-
-#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
 #define STACK_STORE    SW
 #define STACK_LOAD    LW
 #else
@@ -449,10 +447,11 @@
 #define STACK_LOAD    LD
 #endif


-static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags,
-    int dst, sljit_w dstw,
-    int src1, sljit_w src1w,
-    int src2, sljit_w src2w);
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#include "sljitNativeMIPS_32.c"
+#else
+#include "sljitNativeMIPS_64.c"
+#endif


SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size)
{
@@ -528,7 +527,6 @@

     CHECK_ERROR();
     check_sljit_emit_return(compiler, op, src, srcw);
-    ADJUST_LOCAL_OFFSET(src, srcw);


     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));


@@ -569,33 +567,36 @@
/* --------------------------------------------------------------------- */

 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-#define ARCH_DEPEND(a, b)    a
+#define ARCH_32_64(a, b)    a
 #else
-#define ARCH_DEPEND(a, b)    b
+#define ARCH_32_64(a, b)    b
 #endif


-static SLJIT_CONST sljit_ins data_transfer_insts[16] = {
-/* s u w */ ARCH_DEPEND(HI(43) /* sw */, HI(63) /* sd */),
-/* s u b */ HI(40) /* sb */,
-/* s u h */ HI(41) /* sh*/,
-/* s u i */ HI(43) /* sw */,
+static SLJIT_CONST sljit_ins data_transfer_insts[16 + 2] = {
+/* u w s */ ARCH_32_64(HI(43) /* sw */, HI(63) /* sd */),
+/* u w l */ ARCH_32_64(HI(35) /* lw */, HI(55) /* ld */),
+/* u b s */ HI(40) /* sb */,
+/* u b l */ HI(36) /* lbu */,
+/* u h s */ HI(41) /* sh */,
+/* u h l */ HI(37) /* lhu */,
+/* u i s */ HI(43) /* sw */,
+/* u i l */ ARCH_32_64(HI(35) /* lw */, HI(39) /* lwu */),

-/* s s w */ ARCH_DEPEND(HI(43) /* sw */, HI(63) /* sd */),
-/* s s b */ HI(40) /* sb */,
-/* s s h */ HI(41) /* sh*/,
-/* s s i */ HI(43) /* sw */,
+/* s w s */ ARCH_32_64(HI(43) /* sw */, HI(63) /* sd */),
+/* s w l */ ARCH_32_64(HI(35) /* lw */, HI(55) /* ld */),
+/* s b s */ HI(40) /* sb */,
+/* s b l */ HI(32) /* lb */,
+/* s h s */ HI(41) /* sh */,
+/* s h l */ HI(33) /* lh */,
+/* s i s */ HI(43) /* sw */,
+/* s i l */ HI(35) /* lw */,

-/* l u w */ ARCH_DEPEND(HI(35) /* lw */, HI(55) /* ld */),
-/* l u b */ HI(36) /* lbu */,
-/* l u h */ HI(37) /* lhu */,
-/* l u i */ ARCH_DEPEND(HI(35) /* lw */, HI(39) /* lwu */),
-
-/* l s w */ ARCH_DEPEND(HI(35) /* lw */, HI(55) /* ld */),
-/* l s b */ HI(32) /* lb */,
-/* l s h */ HI(33) /* lh */,
-/* l s i */ HI(35) /* lw */,
+/* d s */ HI(61) /* sdc1 */,
+/* d l */ HI(53) /* ldc1 */,
};

+#undef ARCH_32_64
+
/* reg_ar is an absoulute register! */

 /* Can perform an operation using at most 1 instruction. */
@@ -607,10 +608,11 @@
         /* Works for both absoulte and relative addresses. */
         if (SLJIT_UNLIKELY(flags & ARG_TEST))
             return 1;
-        FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(arg & 0xf) | TA(reg_ar) | IMM(argw), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS));
+        FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(arg & 0xf)
+            | TA(reg_ar) | IMM(argw), ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? reg_ar : MOVABLE_INS));
         return -1;
     }
-    return (flags & ARG_TEST) ? SLJIT_SUCCESS : 0;
+    return 0;
 }


 /* See getput_arg below.
@@ -618,8 +620,7 @@
    operators always uses word arguments without write back. */
 static int can_cache(int arg, sljit_w argw, int next_arg, sljit_w next_argw)
 {
-    if (!(next_arg & SLJIT_MEM))
-        return 0;
+    SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));


     /* Simple operation except for updates. */
     if (arg & 0xf0) {
@@ -631,7 +632,7 @@
     }


     if (arg == next_arg) {
-        if (((sljit_uw)(next_argw - argw) <= SIMM_MAX && (sljit_uw)(next_argw - argw) >= SIMM_MIN))
+        if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN))
             return 1;
         return 0;
     }
@@ -642,8 +643,7 @@
 /* Emit the necessary instructions. See can_cache above. */
 static int getput_arg(struct sljit_compiler *compiler, int flags, int reg_ar, int arg, sljit_w argw, int next_arg, sljit_w next_argw)
 {
-    int tmp_ar;
-    int base;
+    int tmp_ar, base, delay_slot;


     SLJIT_ASSERT(arg & SLJIT_MEM);
     if (!(next_arg & SLJIT_MEM)) {
@@ -651,7 +651,13 @@
         next_argw = 0;
     }


-    tmp_ar = (flags & LOAD_DATA) ? reg_ar : DR(TMP_REG3);
+    if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) {
+        tmp_ar = reg_ar;
+        delay_slot = reg_ar;
+    } else {
+        tmp_ar = DR(TMP_REG1);
+        delay_slot = MOVABLE_INS;
+    }
     base = arg & 0xf;


     if (SLJIT_UNLIKELY(arg & 0xf0)) {
@@ -666,22 +672,22 @@
         if (argw == compiler->cache_argw) {
             if (!(flags & WRITE_BACK)) {
                 if (arg == compiler->cache_arg)
-                    return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS);
+                    return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot);
                 if ((SLJIT_MEM | (arg & 0xf0)) == compiler->cache_arg) {
                     if (arg == next_arg && argw == (next_argw & 0x3)) {
                         compiler->cache_arg = arg;
                         compiler->cache_argw = argw;
                         FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(TMP_REG3), DR(TMP_REG3)));
-                        return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS);
+                        return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot);
                     }
                     FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | DA(tmp_ar), tmp_ar));
-                    return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS);
+                    return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
                 }
             }
             else {
                 if ((SLJIT_MEM | (arg & 0xf0)) == compiler->cache_arg) {
                     FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base)));
-                    return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS);
+                    return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot);
                 }
             }
         }
@@ -701,10 +707,10 @@
             }
             else
                 FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? ((arg >> 4) & 0xf) : TMP_REG3) | DA(tmp_ar), tmp_ar));
-            return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS);
+            return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
         }
         FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? ((arg >> 4) & 0xf) : TMP_REG3) | D(base), DR(base)));
-        return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS);
+        return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot);
     }


     if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) {
@@ -740,7 +746,7 @@
                 FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base)));
             }
         }
-        return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS);
+        return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot);
     }


     if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) {
@@ -748,7 +754,7 @@
             FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3)));
             compiler->cache_argw = argw;
         }
-        return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS);
+        return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot);
     }


     if (compiler->cache_arg == SLJIT_MEM && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) {
@@ -762,16 +768,16 @@
     compiler->cache_argw = argw;


     if (!base)
-        return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS);
+        return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot);


     if (arg == next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN) {
         compiler->cache_arg = arg;
         FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | D(TMP_REG3), DR(TMP_REG3)));
-        return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS);
+        return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot);
     }


     FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | DA(tmp_ar), tmp_ar));
-    return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS);
+    return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
 }


 static SLJIT_INLINE int emit_op_mem(struct sljit_compiler *compiler, int flags, int reg_ar, int arg, sljit_w argw)
@@ -783,6 +789,13 @@
     return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0);
 }


+static SLJIT_INLINE int emit_op_mem2(struct sljit_compiler *compiler, int flags, int reg, int arg1, sljit_w arg1w, int arg2, sljit_w arg2w)
+{
+    if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
+        return compiler->error;
+    return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
+}
+
 static int emit_op(struct sljit_compiler *compiler, int op, int flags,
     int dst, sljit_w dstw,
     int src1, sljit_w src1w,
@@ -823,7 +836,7 @@
                 src2_r = src2w;
             }
         }
-        if ((src1 & SLJIT_IMM) && src1w && (flags & CUMULATIVE_OP) && !(flags & SRC2_IMM)) {
+        if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) {
             if ((!(flags & LOGICAL_OP) && (src1w <= SIMM_MAX && src1w >= SIMM_MIN))
                 || ((flags & LOGICAL_OP) && !(src1w & ~UIMM_MAX))) {
                 flags |= SRC2_IMM;
@@ -948,7 +961,7 @@
     int src, sljit_w srcw)
 {
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-    #define inp_flags 0
+    #define flags 0
 #endif


     CHECK_ERROR();
@@ -960,60 +973,60 @@


     switch (GET_OPCODE(op)) {
     case SLJIT_MOV:
-        return emit_op(compiler, SLJIT_MOV, inp_flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+        return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);


     case SLJIT_MOV_UI:
-        return emit_op(compiler, SLJIT_MOV_UI, inp_flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+        return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw);


     case SLJIT_MOV_SI:
-        return emit_op(compiler, SLJIT_MOV_SI, inp_flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+        return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw);


     case SLJIT_MOV_UB:
-        return emit_op(compiler, SLJIT_MOV_UB, inp_flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw);
+        return emit_op(compiler, SLJIT_MOV_UB, flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw);


     case SLJIT_MOV_SB:
-        return emit_op(compiler, SLJIT_MOV_SB, inp_flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw);
+        return emit_op(compiler, SLJIT_MOV_SB, flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw);


     case SLJIT_MOV_UH:
-        return emit_op(compiler, SLJIT_MOV_UH, inp_flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw);
+        return emit_op(compiler, SLJIT_MOV_UH, flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw);


     case SLJIT_MOV_SH:
-        return emit_op(compiler, SLJIT_MOV_SH, inp_flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw);
+        return emit_op(compiler, SLJIT_MOV_SH, flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw);


     case SLJIT_MOVU:
-        return emit_op(compiler, SLJIT_MOV, inp_flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+        return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);


     case SLJIT_MOVU_UI:
-        return emit_op(compiler, SLJIT_MOV_UI, inp_flags | INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+        return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);


     case SLJIT_MOVU_SI:
-        return emit_op(compiler, SLJIT_MOV_SI, inp_flags | INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+        return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);


     case SLJIT_MOVU_UB:
-        return emit_op(compiler, SLJIT_MOV_UB, inp_flags | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw);
+        return emit_op(compiler, SLJIT_MOV_UB, flags | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw);


     case SLJIT_MOVU_SB:
-        return emit_op(compiler, SLJIT_MOV_SB, inp_flags | BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw);
+        return emit_op(compiler, SLJIT_MOV_SB, flags | BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw);


     case SLJIT_MOVU_UH:
-        return emit_op(compiler, SLJIT_MOV_UH, inp_flags | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw);
+        return emit_op(compiler, SLJIT_MOV_UH, flags | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw);


     case SLJIT_MOVU_SH:
-        return emit_op(compiler, SLJIT_MOV_SH, inp_flags | HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw);
+        return emit_op(compiler, SLJIT_MOV_SH, flags | HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw);


     case SLJIT_NOT:
-        return emit_op(compiler, op, inp_flags, dst, dstw, TMP_REG1, 0, src, srcw);
+        return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);


     case SLJIT_NEG:
-        return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), inp_flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw);
+        return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw);


     case SLJIT_CLZ:
-        return emit_op(compiler, op, inp_flags, dst, dstw, TMP_REG1, 0, src, srcw);
+        return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
     }


     return SLJIT_SUCCESS;
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-    #undef inp_flags
+    #undef flags
 #endif
 }


@@ -1023,7 +1036,7 @@
     int src2, sljit_w src2w)
 {
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-    #define inp_flags 0
+    #define flags 0
 #endif


     CHECK_ERROR();
@@ -1035,19 +1048,19 @@
     switch (GET_OPCODE(op)) {
     case SLJIT_ADD:
     case SLJIT_ADDC:
-        return emit_op(compiler, op, inp_flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+        return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);


     case SLJIT_SUB:
     case SLJIT_SUBC:
-        return emit_op(compiler, op, inp_flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+        return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);


     case SLJIT_MUL:
-        return emit_op(compiler, op, inp_flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);
+        return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);


     case SLJIT_AND:
     case SLJIT_OR:
     case SLJIT_XOR:
-        return emit_op(compiler, op, inp_flags | CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+        return emit_op(compiler, op, flags | CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);


     case SLJIT_SHL:
     case SLJIT_LSHR:
@@ -1056,15 +1069,14 @@
         if (src2 & SLJIT_IMM)
             src2w &= 0x1f;
 #else
-        if (src2 & SLJIT_IMM)
-            src2w &= 0x3f;
+        SLJIT_ASSERT_STOP();
 #endif
-        return emit_op(compiler, op, inp_flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+        return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
     }


     return SLJIT_SUCCESS;
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-    #undef inp_flags
+    #undef flags
 #endif
 }


@@ -1102,44 +1114,6 @@
#endif
}

-static int emit_fpu_data_transfer(struct sljit_compiler *compiler, int fpu_reg, int load, int arg, sljit_w argw)
-{
-    int hi_reg;
-
-    SLJIT_ASSERT(arg & SLJIT_MEM);
-
-    /* Fast loads and stores. */
-    if (!(arg & 0xf0)) {
-        /* Both for (arg & 0xf) == SLJIT_UNUSED and (arg & 0xf) != SLJIT_UNUSED. */
-        if (argw <= SIMM_MAX && argw >= SIMM_MIN)
-            return push_inst(compiler, (load ? LDC1 : SDC1) | S(arg & 0xf) | FT(fpu_reg) | IMM(argw), MOVABLE_INS);
-    }
-
-    if (arg & 0xf0) {
-        argw &= 0x3;
-        hi_reg = (arg >> 4) & 0xf;
-        if (argw) {
-            FAIL_IF(push_inst(compiler, SLL_W | T(hi_reg) | D(TMP_REG1) | SH_IMM(argw), DR(TMP_REG1)));
-            hi_reg = TMP_REG1;
-        }
-        FAIL_IF(push_inst(compiler, ADDU_W | S(hi_reg) | T(arg & 0xf) | D(TMP_REG1), DR(TMP_REG1)));
-        return push_inst(compiler, (load ? LDC1 : SDC1) | S(TMP_REG1) | FT(fpu_reg) | IMM(0), MOVABLE_INS);
-    }
-
-    /* Use cache. */
-    if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN)
-        return push_inst(compiler, (load ? LDC1 : SDC1) | S(TMP_REG3) | FT(fpu_reg) | IMM(argw - compiler->cache_argw), MOVABLE_INS);
-
-    /* Put value to cache. */
-    compiler->cache_arg = arg;
-    compiler->cache_argw = argw;
-
-    FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw));
-    if (arg & 0xf)
-        FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(arg & 0xf) | D(TMP_REG3), DR(TMP_REG3)));
-    return push_inst(compiler, (load ? LDC1 : SDC1) | S(TMP_REG3) | FT(fpu_reg) | IMM(0), MOVABLE_INS);
-}
-
 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
     int dst, sljit_w dstw,
     int src, sljit_w srcw)
@@ -1154,13 +1128,18 @@


     if (GET_OPCODE(op) == SLJIT_FCMP) {
         if (dst > SLJIT_FLOAT_REG4) {
-            FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 1, dst, dstw));
+            FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw));
             dst = TMP_FREG1;
         }
+        else
+            dst <<= 1;
+
         if (src > SLJIT_FLOAT_REG4) {
-            FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src, srcw));
+            FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0));
             src = TMP_FREG2;
         }
+        else
+            src <<= 1;


         /* src and dst are swapped. */
         if (op & SLJIT_SET_E) {
@@ -1183,12 +1162,14 @@
         return push_inst(compiler, C_UN_D | FT(src) | FS(dst), FCSR_FCC);
     }


-    dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst;
+    dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : (dst << 1);


     if (src > SLJIT_FLOAT_REG4) {
-        FAIL_IF(emit_fpu_data_transfer(compiler, dst_fr, 1, src, srcw));
+        FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, dst_fr, src, srcw, dst, dstw));
         src = dst_fr;
     }
+    else
+        src <<= 1;


     switch (op) {
         case SLJIT_FMOV:
@@ -1203,8 +1184,11 @@
             break;
     }


-    if (dst_fr == TMP_FREG1)
-        FAIL_IF(emit_fpu_data_transfer(compiler, src, 0, dst, dstw));
+    if (dst_fr == TMP_FREG1) {
+        if (op == SLJIT_FMOV)
+            dst_fr = src;
+        FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, dst_fr, dst, dstw, 0, 0));
+    }


     return SLJIT_SUCCESS;
 }
@@ -1214,7 +1198,7 @@
     int src1, sljit_w src1w,
     int src2, sljit_w src2w)
 {
-    int dst_fr;
+    int dst_fr, flags = 0;


     CHECK_ERROR();
     check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
@@ -1222,18 +1206,48 @@
     compiler->cache_arg = 0;
     compiler->cache_argw = 0;


-    dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst;
+    dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG2 : (dst << 1);


+    if (src1 > SLJIT_FLOAT_REG4) {
+        if (getput_arg_fast(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w)) {
+            FAIL_IF(compiler->error);
+            src1 = TMP_FREG1;
+        } else
+            flags |= SLOW_SRC1;
+    }
+    else
+        src1 <<= 1;
+
     if (src2 > SLJIT_FLOAT_REG4) {
-        FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src2, src2w));
-        src2 = TMP_FREG2;
+        if (getput_arg_fast(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w)) {
+            FAIL_IF(compiler->error);
+            src2 = TMP_FREG2;
+        } else
+            flags |= SLOW_SRC2;
     }
+    else
+        src2 <<= 1;


-    if (src1 > SLJIT_FLOAT_REG4) {
-        FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 1, src1, src1w));
-        src1 = TMP_FREG1;
+    if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
+        if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+            FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
+            FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
+        }
+        else {
+            FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+            FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
+        }
     }
+    else if (flags & SLOW_SRC1)
+        FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
+    else if (flags & SLOW_SRC2)
+        FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));


+    if (flags & SLOW_SRC1)
+        src1 = TMP_FREG1;
+    if (flags & SLOW_SRC2)
+        src2 = TMP_FREG2;
+
     switch (op) {
     case SLJIT_FADD:
         FAIL_IF(push_inst(compiler, ADD_D | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS));
@@ -1252,8 +1266,8 @@
         break;
     }


-    if (dst_fr == TMP_FREG1)
-        FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 0, dst, dstw));
+    if (dst_fr == TMP_FREG2)
+        FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG2, dst, dstw, 0, 0));


     return SLJIT_SUCCESS;
 }
@@ -1272,6 +1286,8 @@
         return push_inst(compiler, ADDU_W | SA(RETURN_ADDR_REG) | TA(0) | D(dst), DR(dst));
     else if (dst & SLJIT_MEM)
         return emit_op_mem(compiler, WORD_DATA, RETURN_ADDR_REG, dst, dstw);
+
+    /* SLJIT_UNUSED is also possible, although highly unlikely. */
     return SLJIT_SUCCESS;
 }


@@ -1316,7 +1332,7 @@
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
 #define JUMP_LENGTH    4
 #else
-#define JUMP_LENGTH    7
+#error "Implementation required"
 #endif


 #define BR_Z(src) \
@@ -1399,10 +1415,10 @@
     case SLJIT_C_MUL_NOT_OVERFLOW:
         BR_NZ(OVERFLOW_FLAG);
         break;
-    case SLJIT_C_FLOAT_NAN:
+    case SLJIT_C_FLOAT_UNORDERED:
         BR_F();
         break;
-    case SLJIT_C_FLOAT_NOT_NAN:
+    case SLJIT_C_FLOAT_ORDERED:
         BR_T();
         break;
     default:
@@ -1472,17 +1488,11 @@
     compiler->cache_argw = 0;
     flags = ((type & SLJIT_INT_OP) ? INT_DATA : WORD_DATA) | LOAD_DATA;
     if (src1 & SLJIT_MEM) {
-        if (getput_arg_fast(compiler, flags, DR(TMP_REG1), src1, src1w))
-            PTR_FAIL_IF(compiler->error);
-        else
-            PTR_FAIL_IF(getput_arg(compiler, flags, DR(TMP_REG1), src1, src1w, src2, src2w));
+        PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG1), src1, src1w, src2, src2w));
         src1 = TMP_REG1;
     }
     if (src2 & SLJIT_MEM) {
-        if (getput_arg_fast(compiler, flags, DR(TMP_REG2), src2, src2w))
-            PTR_FAIL_IF(compiler->error);
-        else
-            PTR_FAIL_IF(getput_arg(compiler, flags, DR(TMP_REG2), src2, src2w, 0, 0));
+        PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG2), src2, src2w, 0, 0));
         src2 = TMP_REG2;
     }


@@ -1597,13 +1607,18 @@
     compiler->cache_argw = 0;


     if (src1 > SLJIT_FLOAT_REG4) {
-        PTR_FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 1, src1, src1w));
+        PTR_FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
         src1 = TMP_FREG1;
     }
+    else
+        src1 <<= 1;
+
     if (src2 > SLJIT_FLOAT_REG4) {
-        PTR_FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src2, src2w));
+        PTR_FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
         src2 = TMP_FREG2;
     }
+    else
+        src2 <<= 1;


     jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
     PTR_FAIL_IF(!jump);
@@ -1636,11 +1651,11 @@
         inst = C_ULE_D;
         if_true = 1;
         break;
-    case SLJIT_C_FLOAT_NAN:
+    case SLJIT_C_FLOAT_UNORDERED:
         inst = C_UN_D;
         if_true = 1;
         break;
-    case SLJIT_C_FLOAT_NOT_NAN:
+    case SLJIT_C_FLOAT_ORDERED:
     default: /* Make compilers happy. */
         inst = C_UN_D;
         if_true = 0;
@@ -1775,8 +1790,8 @@
         dst_ar = EQUAL_FLAG;
         break;


-    case SLJIT_C_FLOAT_NAN:
-    case SLJIT_C_FLOAT_NOT_NAN:
+    case SLJIT_C_FLOAT_UNORDERED:
+    case SLJIT_C_FLOAT_ORDERED:
         FAIL_IF(push_inst(compiler, CFC1 | TA(sugg_dst_ar) | DA(FCSR_REG), sugg_dst_ar));
         FAIL_IF(push_inst(compiler, SRL | TA(sugg_dst_ar) | DA(sugg_dst_ar) | SH_IMM(23), sugg_dst_ar));
         FAIL_IF(push_inst(compiler, ANDI | SA(sugg_dst_ar) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));


Modified: code/trunk/sljit/sljitNativePPC_32.c
===================================================================
--- code/trunk/sljit/sljitNativePPC_32.c    2012-10-20 20:52:52 UTC (rev 1148)
+++ code/trunk/sljit/sljitNativePPC_32.c    2012-10-20 21:33:38 UTC (rev 1149)
@@ -45,6 +45,54 @@
     int dst, int src1, int src2)
 {
     switch (op) {
+    case SLJIT_MOV:
+    case SLJIT_MOV_UI:
+    case SLJIT_MOV_SI:
+        SLJIT_ASSERT(src1 == TMP_REG1);
+        if (dst != src2)
+            return push_inst(compiler, OR | S(src2) | A(dst) | B(src2));
+        return SLJIT_SUCCESS;
+
+    case SLJIT_MOV_UB:
+    case SLJIT_MOV_SB:
+        SLJIT_ASSERT(src1 == TMP_REG1);
+        if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+            if (op == SLJIT_MOV_SB)
+                return push_inst(compiler, EXTSB | S(src2) | A(dst));
+            return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24));
+        }
+        else if ((flags & REG_DEST) && op == SLJIT_MOV_SB)
+            return push_inst(compiler, EXTSB | S(src2) | A(dst));
+        else {
+            SLJIT_ASSERT(dst == src2);
+        }
+        return SLJIT_SUCCESS;
+
+    case SLJIT_MOV_UH:
+    case SLJIT_MOV_SH:
+        SLJIT_ASSERT(src1 == TMP_REG1);
+        if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+            if (op == SLJIT_MOV_SH)
+                return push_inst(compiler, EXTSH | S(src2) | A(dst));
+            return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16));
+        }
+        else {
+            SLJIT_ASSERT(dst == src2);
+        }
+        return SLJIT_SUCCESS;
+
+    case SLJIT_NOT:
+        SLJIT_ASSERT(src1 == TMP_REG1);
+        return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2));
+
+    case SLJIT_NEG:
+        SLJIT_ASSERT(src1 == TMP_REG1);
+        return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2));
+
+    case SLJIT_CLZ:
+        SLJIT_ASSERT(src1 == TMP_REG1);
+        return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst));
+
     case SLJIT_ADD:
         if (flags & ALT_FORM1) {
             /* Flags does not set: BIN_IMM_EXTS unnecessary. */
@@ -185,52 +233,6 @@
             return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11));
         }
         return push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2));
-
-    case SLJIT_MOV:
-    case SLJIT_MOV_UI:
-    case SLJIT_MOV_SI:
-        SLJIT_ASSERT(src1 == TMP_REG1);
-        if (dst != src2)
-            return push_inst(compiler, OR | S(src2) | A(dst) | B(src2));
-        return SLJIT_SUCCESS;
-
-    case SLJIT_MOV_UB:
-    case SLJIT_MOV_SB:
-        SLJIT_ASSERT(src1 == TMP_REG1);
-        if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
-            if (op == SLJIT_MOV_SB)
-                return push_inst(compiler, EXTSB | S(src2) | A(dst));
-            return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24));
-        }
-        else if ((flags & REG_DEST) && op == SLJIT_MOV_SB)
-            return push_inst(compiler, EXTSB | S(src2) | A(dst));
-        else if (dst != src2)
-            SLJIT_ASSERT_STOP();
-        return SLJIT_SUCCESS;
-
-    case SLJIT_MOV_UH:
-    case SLJIT_MOV_SH:
-        SLJIT_ASSERT(src1 == TMP_REG1);
-        if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
-            if (op == SLJIT_MOV_SH)
-                return push_inst(compiler, EXTSH | S(src2) | A(dst));
-            return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16));
-        }
-        else if (dst != src2)
-            SLJIT_ASSERT_STOP();
-        return SLJIT_SUCCESS;
-
-    case SLJIT_NOT:
-        SLJIT_ASSERT(src1 == TMP_REG1);
-        return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2));
-
-    case SLJIT_NEG:
-        SLJIT_ASSERT(src1 == TMP_REG1);
-        return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2));
-
-    case SLJIT_CLZ:
-        SLJIT_ASSERT(src1 == TMP_REG1);
-        return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst));
     }


     SLJIT_ASSERT_STOP();


Modified: code/trunk/sljit/sljitNativePPC_64.c
===================================================================
--- code/trunk/sljit/sljitNativePPC_64.c    2012-10-20 20:52:52 UTC (rev 1148)
+++ code/trunk/sljit/sljitNativePPC_64.c    2012-10-20 21:33:38 UTC (rev 1149)
@@ -149,6 +149,69 @@
     int dst, int src1, int src2)
 {
     switch (op) {
+    case SLJIT_MOV:
+        SLJIT_ASSERT(src1 == TMP_REG1);
+        if (dst != src2)
+            return push_inst(compiler, OR | S(src2) | A(dst) | B(src2));
+        return SLJIT_SUCCESS;
+
+    case SLJIT_MOV_UI:
+    case SLJIT_MOV_SI:
+        SLJIT_ASSERT(src1 == TMP_REG1);
+        if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+            if (op == SLJIT_MOV_SI)
+                return push_inst(compiler, EXTSW | S(src2) | A(dst));
+            return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 0));
+        }
+        else {
+            SLJIT_ASSERT(dst == src2);
+        }
+        return SLJIT_SUCCESS;
+
+    case SLJIT_MOV_UB:
+    case SLJIT_MOV_SB:
+        SLJIT_ASSERT(src1 == TMP_REG1);
+        if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+            if (op == SLJIT_MOV_SB)
+                return push_inst(compiler, EXTSB | S(src2) | A(dst));
+            return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24));
+        }
+        else if ((flags & REG_DEST) && op == SLJIT_MOV_SB)
+            return push_inst(compiler, EXTSB | S(src2) | A(dst));
+        else {
+            SLJIT_ASSERT(dst == src2);
+        }
+        return SLJIT_SUCCESS;
+
+    case SLJIT_MOV_UH:
+    case SLJIT_MOV_SH:
+        SLJIT_ASSERT(src1 == TMP_REG1);
+        if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+            if (op == SLJIT_MOV_SH)
+                return push_inst(compiler, EXTSH | S(src2) | A(dst));
+            return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16));
+        }
+        else {
+            SLJIT_ASSERT(dst == src2);
+        }
+        return SLJIT_SUCCESS;
+
+    case SLJIT_NOT:
+        SLJIT_ASSERT(src1 == TMP_REG1);
+        UN_EXTS();
+        return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2));
+
+    case SLJIT_NEG:
+        SLJIT_ASSERT(src1 == TMP_REG1);
+        UN_EXTS();
+        return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2));
+
+    case SLJIT_CLZ:
+        SLJIT_ASSERT(src1 == TMP_REG1);
+        if (flags & ALT_FORM1)
+            return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst));
+        return push_inst(compiler, CNTLZD | RC(flags) | S(src2) | A(dst));
+
     case SLJIT_ADD:
         if (flags & ALT_FORM1) {
             /* Flags does not set: BIN_IMM_EXTS unnecessary. */
@@ -321,66 +384,6 @@
         if (flags & ALT_FORM2)
             return push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2));
         return push_inst(compiler, SRAD | RC(flags) | S(src1) | A(dst) | B(src2));
-
-    case SLJIT_MOV:
-        SLJIT_ASSERT(src1 == TMP_REG1);
-        if (dst != src2)
-            return push_inst(compiler, OR | S(src2) | A(dst) | B(src2));
-        return SLJIT_SUCCESS;
-
-    case SLJIT_MOV_UI:
-    case SLJIT_MOV_SI:
-        SLJIT_ASSERT(src1 == TMP_REG1);
-        if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
-            if (op == SLJIT_MOV_SI)
-                return push_inst(compiler, EXTSW | S(src2) | A(dst));
-            return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 0));
-        }
-        else if (dst != src2)
-            SLJIT_ASSERT_STOP();
-        return SLJIT_SUCCESS;
-
-    case SLJIT_MOV_UB:
-    case SLJIT_MOV_SB:
-        SLJIT_ASSERT(src1 == TMP_REG1);
-        if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
-            if (op == SLJIT_MOV_SB)
-                return push_inst(compiler, EXTSB | S(src2) | A(dst));
-            return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24));
-        }
-        else if ((flags & REG_DEST) && op == SLJIT_MOV_SB)
-            return push_inst(compiler, EXTSB | S(src2) | A(dst));
-        else if (dst != src2)
-            SLJIT_ASSERT_STOP();
-        return SLJIT_SUCCESS;
-
-    case SLJIT_MOV_UH:
-    case SLJIT_MOV_SH:
-        SLJIT_ASSERT(src1 == TMP_REG1);
-        if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
-            if (op == SLJIT_MOV_SH)
-                return push_inst(compiler, EXTSH | S(src2) | A(dst));
-            return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16));
-        }
-        else if (dst != src2)
-            SLJIT_ASSERT_STOP();
-        return SLJIT_SUCCESS;
-
-    case SLJIT_NOT:
-        SLJIT_ASSERT(src1 == TMP_REG1);
-        UN_EXTS();
-        return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2));
-
-    case SLJIT_NEG:
-        SLJIT_ASSERT(src1 == TMP_REG1);
-        UN_EXTS();
-        return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2));
-
-    case SLJIT_CLZ:
-        SLJIT_ASSERT(src1 == TMP_REG1);
-        if (flags & ALT_FORM1)
-            return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst));
-        return push_inst(compiler, CNTLZD | RC(flags) | S(src2) | A(dst));
     }


     SLJIT_ASSERT_STOP();


Modified: code/trunk/sljit/sljitNativePPC_common.c
===================================================================
--- code/trunk/sljit/sljitNativePPC_common.c    2012-10-20 20:52:52 UTC (rev 1148)
+++ code/trunk/sljit/sljitNativePPC_common.c    2012-10-20 21:33:38 UTC (rev 1149)
@@ -24,7 +24,7 @@
  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */


-SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name()
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
 {
     return "PowerPC" SLJIT_CPUINFO;
 }
@@ -86,6 +86,10 @@
 #define TMP_FREG1    (SLJIT_FLOAT_REG4 + 1)
 #define TMP_FREG2    (SLJIT_FLOAT_REG4 + 2)


+static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
+    0, 3, 4, 5, 6, 7, 30, 29, 28, 27, 26, 1, 8, 9, 10, 31
+};
+
 /* --------------------------------------------------------------------- */
 /*  Instrucion forms                                                     */
 /* --------------------------------------------------------------------- */
@@ -146,9 +150,6 @@
 #define FNEG        (HI(63) | LO(40))
 #define FSUB        (HI(63) | LO(20))
 #define LD        (HI(58) | 0)
-#define LFD        (HI(50))
-#define LFDUX        (HI(31) | LO(631))
-#define LFDX        (HI(31) | LO(599))
 #define LWZ        (HI(32))
 #define MFCR        (HI(31) | LO(19))
 #define MFLR        (HI(31) | LO(339) | 0x80000)
@@ -182,9 +183,6 @@
 #define STD        (HI(62) | 0)
 #define STDU        (HI(62) | 1)
 #define STDUX        (HI(31) | LO(181))
-#define STFD        (HI(54))
-#define STFDUX        (HI(31) | LO(759))
-#define STFDX        (HI(31) | LO(727))
 #define STW        (HI(36))
 #define STWU        (HI(37))
 #define STWUX        (HI(31) | LO(183))
@@ -200,10 +198,6 @@
 #define SIMM_MIN    (-0x8000)
 #define UIMM_MAX    (0xffff)


-static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = {
- 0, 3, 4, 5, 6, 7, 30, 29, 28, 27, 26, 1, 8, 9, 10, 31
-};
-
#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_w addr, void* func)
{
@@ -423,19 +417,26 @@
#endif
}

+/* --------------------------------------------------------------------- */
+/*  Entry, exit                                                          */
+/* --------------------------------------------------------------------- */
+
 /* inp_flags: */


 /* Creates an index in data_transfer_insts array. */
+#define LOAD_DATA    0x01
+#define INDEXED        0x02
+#define WRITE_BACK    0x04
 #define WORD_DATA    0x00
-#define BYTE_DATA    0x01
-#define HALF_DATA    0x02
-#define INT_DATA    0x03
-#define SIGNED_DATA    0x04
-#define LOAD_DATA    0x08
-#define WRITE_BACK    0x10
-#define INDEXED        0x20
+#define BYTE_DATA    0x08
+#define HALF_DATA    0x10
+#define INT_DATA    0x18
+#define SIGNED_DATA    0x20
+/* Separates integer and floating point registers */
+#define GPR_REG        0x3f
+#define DOUBLE_DATA    0x40


-#define MEM_MASK    0x3f
+#define MEM_MASK    0x7f


/* Other inp_flags. */

@@ -480,11 +481,6 @@
 #define STACK_LOAD    LD
 #endif


-static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags,
-    int dst, sljit_w dstw,
-    int src1, sljit_w src1w,
-    int src2, sljit_w src2w);
-
 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size)
 {
     CHECK_ERROR();
@@ -567,7 +563,6 @@
 {
     CHECK_ERROR();
     check_sljit_emit_return(compiler, op, src, srcw);
-    ADJUST_LOCAL_OFFSET(src, srcw);


     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));


@@ -617,110 +612,127 @@
 #define UPDATE_REQ    0x20000


 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
-#define ARCH_DEPEND(a, b)    a
-#define GET_INST_CODE(inst)    (inst)
+#define ARCH_32_64(a, b)    a
+#define INST_CODE_AND_DST(inst, flags, reg) \
+    ((inst) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
 #else
-#define ARCH_DEPEND(a, b)    b
-#define GET_INST_CODE(index)    ((inst) & ~(ADDR_MODE2 | UPDATE_REQ))
+#define ARCH_32_64(a, b)    b
+#define INST_CODE_AND_DST(inst, flags, reg) \
+    (((inst) & ~(ADDR_MODE2 | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
 #endif


-static SLJIT_CONST sljit_ins data_transfer_insts[64] = {
+static SLJIT_CONST sljit_ins data_transfer_insts[64 + 4] = {

-/* No write-back. */
+/* -------- Unsigned -------- */

-/* i n s u w */ ARCH_DEPEND(HI(36) /* stw */, HI(62) | ADDR_MODE2 | 0x0 /* std */),
-/* i n s u b */ HI(38) /* stb */,
-/* i n s u h */ HI(44) /* sth*/,
-/* i n s u i */ HI(36) /* stw */,
+/* Word. */

-/* i n s s w */ ARCH_DEPEND(HI(36) /* stw */, HI(62) | ADDR_MODE2 | 0x0 /* std */),
-/* i n s s b */ HI(38) /* stb */,
-/* i n s s h */ HI(44) /* sth*/,
-/* i n s s i */ HI(36) /* stw */,
+/* u w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | ADDR_MODE2 | 0x0 /* std */),
+/* u w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | ADDR_MODE2 | 0x0 /* ld */),
+/* u w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
+/* u w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),

-/* i n l u w */ ARCH_DEPEND(HI(32) /* lwz */, HI(58) | ADDR_MODE2 | 0x0 /* ld */),
-/* i n l u b */ HI(34) /* lbz */,
-/* i n l u h */ HI(40) /* lhz */,
-/* i n l u i */ HI(32) /* lwz */,
+/* u w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | ADDR_MODE2 | 0x1 /* stdu */),
+/* u w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | ADDR_MODE2 | 0x1 /* ldu */),
+/* u w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
+/* u w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),

-/* i n l s w */ ARCH_DEPEND(HI(32) /* lwz */, HI(58) | ADDR_MODE2 | 0x0 /* ld */),
-/* i n l s b */ HI(34) /* lbz */ /* EXTS_REQ */,
-/* i n l s h */ HI(42) /* lha */,
-/* i n l s i */ ARCH_DEPEND(HI(32) /* lwz */, HI(58) | ADDR_MODE2 | 0x2 /* lwa */),
+/* Byte. */

-/* Write-back. */
+/* u b n i s */ HI(38) /* stb */,
+/* u b n i l */ HI(34) /* lbz */,
+/* u b n x s */ HI(31) | LO(215) /* stbx */,
+/* u b n x l */ HI(31) | LO(87) /* lbzx */,

-/* i w s u w */ ARCH_DEPEND(HI(37) /* stwu */, HI(62) | ADDR_MODE2 | 0x1 /* stdu */),
-/* i w s u b */ HI(39) /* stbu */,
-/* i w s u h */ HI(45) /* sthu */,
-/* i w s u i */ HI(37) /* stwu */,
+/* u b w i s */ HI(39) /* stbu */,
+/* u b w i l */ HI(35) /* lbzu */,
+/* u b w x s */ HI(31) | LO(247) /* stbux */,
+/* u b w x l */ HI(31) | LO(119) /* lbzux */,

-/* i w s s w */ ARCH_DEPEND(HI(37) /* stwu */, HI(62) | ADDR_MODE2 | 0x1 /* stdu */),
-/* i w s s b */ HI(39) /* stbu */,
-/* i w s s h */ HI(45) /* sthu */,
-/* i w s s i */ HI(37) /* stwu */,
+/* Half. */

-/* i w l u w */ ARCH_DEPEND(HI(33) /* lwzu */, HI(58) | ADDR_MODE2 | 0x1 /* ldu */),
-/* i w l u b */ HI(35) /* lbzu */,
-/* i w l u h */ HI(41) /* lhzu */,
-/* i w l u i */ HI(33) /* lwzu */,
+/* u h n i s */ HI(44) /* sth */,
+/* u h n i l */ HI(40) /* lhz */,
+/* u h n x s */ HI(31) | LO(407) /* sthx */,
+/* u h n x l */ HI(31) | LO(279) /* lhzx */,

-/* i w l s w */ ARCH_DEPEND(HI(33) /* lwzu */, HI(58) | ADDR_MODE2 | 0x1 /* ldu */),
-/* i w l s b */ HI(35) /* lbzu */ /* EXTS_REQ */,
-/* i w l s h */ HI(43) /* lhau */,
-/* i w l s i */ ARCH_DEPEND(HI(33) /* lwzu */, HI(58) | ADDR_MODE2 | UPDATE_REQ | 0x2 /* lwa */),
+/* u h w i s */ HI(45) /* sthu */,
+/* u h w i l */ HI(41) /* lhzu */,
+/* u h w x s */ HI(31) | LO(439) /* sthux */,
+/* u h w x l */ HI(31) | LO(311) /* lhzux */,

-/* ---------- */
-/* Indexed */
-/* ---------- */
+/* Int. */

-/* No write-back. */
+/* u i n i s */ HI(36) /* stw */,
+/* u i n i l */ HI(32) /* lwz */,
+/* u i n x s */ HI(31) | LO(151) /* stwx */,
+/* u i n x l */ HI(31) | LO(23) /* lwzx */,

-/* x n s u w */ ARCH_DEPEND(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
-/* x n s u b */ HI(31) | LO(215) /* stbx */,
-/* x n s u h */ HI(31) | LO(407) /* sthx */,
-/* x n s u i */ HI(31) | LO(151) /* stwx */,
+/* u i w i s */ HI(37) /* stwu */,
+/* u i w i l */ HI(33) /* lwzu */,
+/* u i w x s */ HI(31) | LO(183) /* stwux */,
+/* u i w x l */ HI(31) | LO(55) /* lwzux */,

-/* x n s s w */ ARCH_DEPEND(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
-/* x n s s b */ HI(31) | LO(215) /* stbx */,
-/* x n s s h */ HI(31) | LO(407) /* sthx */,
-/* x n s s i */ HI(31) | LO(151) /* stwx */,
+/* -------- Signed -------- */

-/* x n l u w */ ARCH_DEPEND(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
-/* x n l u b */ HI(31) | LO(87) /* lbzx */,
-/* x n l u h */ HI(31) | LO(279) /* lhzx */,
-/* x n l u i */ HI(31) | LO(23) /* lwzx */,
+/* Word. */

-/* x n l s w */ ARCH_DEPEND(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
-/* x n l s b */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */,
-/* x n l s h */ HI(31) | LO(343) /* lhax */,
-/* x n l s i */ ARCH_DEPEND(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */),
+/* s w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | ADDR_MODE2 | 0x0 /* std */),
+/* s w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | ADDR_MODE2 | 0x0 /* ld */),
+/* s w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
+/* s w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),

-/* Write-back. */
+/* s w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | ADDR_MODE2 | 0x1 /* stdu */),
+/* s w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | ADDR_MODE2 | 0x1 /* ldu */),
+/* s w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
+/* s w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),

-/* x w s u w */ ARCH_DEPEND(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
-/* x w s u b */ HI(31) | LO(247) /* stbux */,
-/* x w s u h */ HI(31) | LO(439) /* sthux */,
-/* x w s u i */ HI(31) | LO(183) /* stwux */,
+/* Byte. */

-/* x w s s w */ ARCH_DEPEND(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
-/* x w s s b */ HI(31) | LO(247) /* stbux */,
-/* x w s s h */ HI(31) | LO(439) /* sthux */,
-/* x w s s i */ HI(31) | LO(183) /* stwux */,
+/* s b n i s */ HI(38) /* stb */,
+/* s b n i l */ HI(34) /* lbz */ /* EXTS_REQ */,
+/* s b n x s */ HI(31) | LO(215) /* stbx */,
+/* s b n x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */,

-/* x w l u w */ ARCH_DEPEND(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
-/* x w l u b */ HI(31) | LO(119) /* lbzux */,
-/* x w l u h */ HI(31) | LO(311) /* lhzux */,
-/* x w l u i */ HI(31) | LO(55) /* lwzux */,
+/* s b w i s */ HI(39) /* stbu */,
+/* s b w i l */ HI(35) /* lbzu */ /* EXTS_REQ */,
+/* s b w x s */ HI(31) | LO(247) /* stbux */,
+/* s b w x l */ HI(31) | LO(119) /* lbzux */ /* EXTS_REQ */,

-/* x w l s w */ ARCH_DEPEND(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
-/* x w l s b */ HI(31) | LO(119) /* lbzux */ /* EXTS_REQ */,
-/* x w l s h */ HI(31) | LO(375) /* lhaux */,
-/* x w l s i */ ARCH_DEPEND(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */)
+/* Half. */

+/* s h n i s */ HI(44) /* sth */,
+/* s h n i l */ HI(42) /* lha */,
+/* s h n x s */ HI(31) | LO(407) /* sthx */,
+/* s h n x l */ HI(31) | LO(343) /* lhax */,
+
+/* s h w i s */ HI(45) /* sthu */,
+/* s h w i l */ HI(43) /* lhau */,
+/* s h w x s */ HI(31) | LO(439) /* sthux */,
+/* s h w x l */ HI(31) | LO(375) /* lhaux */,
+
+/* Int. */
+
+/* s i n i s */ HI(36) /* stw */,
+/* s i n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | ADDR_MODE2 | 0x2 /* lwa */),
+/* s i n x s */ HI(31) | LO(151) /* stwx */,
+/* s i n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */),
+
+/* s i w i s */ HI(37) /* stwu */,
+/* s i w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | ADDR_MODE2 | UPDATE_REQ | 0x2 /* lwa */),
+/* s i w x s */ HI(31) | LO(183) /* stwux */,
+/* s i w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */),
+
+/* -------- Double -------- */
+
+/* d n i s */ HI(54) /* stfd */,
+/* d n i l */ HI(50) /* lfd */,
+/* d n x s */ HI(31) | LO(727) /* stfdx */,
+/* d n x l */ HI(31) | LO(599) /* lfdx */,
+
};

-#undef ARCH_DEPEND
+#undef ARCH_32_64

/* Simple cases, (no caching is required). */
static int getput_arg_fast(struct sljit_compiler *compiler, int inp_flags, int reg, int arg, sljit_w argw)
@@ -739,7 +751,7 @@

             inst = data_transfer_insts[(inp_flags & ~WRITE_BACK) & MEM_MASK];
             SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ)));
-            push_inst(compiler, GET_INST_CODE(inst) | D(reg) | IMM(argw));
+            push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | IMM(argw));
             return -1;
         }
 #else
@@ -749,11 +761,11 @@
             if (inp_flags & ARG_TEST)
                 return 1;


-            push_inst(compiler, GET_INST_CODE(inst) | D(reg) | IMM(argw));
+            push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | IMM(argw));
             return -1;
         }
 #endif
-        return (inp_flags & ARG_TEST) ? SLJIT_SUCCESS : 0;
+        return 0;
     }


     if (!(arg & 0xf0)) {
@@ -764,7 +776,7 @@


             inst = data_transfer_insts[inp_flags & MEM_MASK];
             SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ)));
-            push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(arg & 0xf) | IMM(argw));
+            push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | IMM(argw));
             return -1;
         }
 #else
@@ -780,7 +792,7 @@
                 arg = tmp_reg | SLJIT_MEM;
                 argw = 0;
             }
-            push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(arg & 0xf) | IMM(argw));
+            push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | IMM(argw));
             return -1;
         }
 #endif
@@ -790,10 +802,10 @@
             return 1;
         inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
         SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ)));
-        push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(arg & 0xf) | B((arg >> 4) & 0xf));
+        push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | B((arg >> 4) & 0xf));
         return -1;
     }
-    return (inp_flags & ARG_TEST) ? SLJIT_SUCCESS : 0;
+    return 0;
 }


 /* See getput_arg below.
@@ -801,17 +813,13 @@
    uses word arguments without write back. */
 static int can_cache(int arg, sljit_w argw, int next_arg, sljit_w next_argw)
 {
-    SLJIT_ASSERT(arg & SLJIT_MEM);
-    SLJIT_ASSERT(next_arg & SLJIT_MEM);
+    SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));


-    if (!(arg & 0xf)) {
-        if ((next_arg & SLJIT_MEM) && ((sljit_uw)argw - (sljit_uw)next_argw <= SIMM_MAX || (sljit_uw)next_argw - (sljit_uw)argw <= SIMM_MAX))
-            return 1;
-        return 0;
-    }
+    if (!(arg & 0xf))
+        return (next_arg & SLJIT_MEM) && ((sljit_uw)argw - (sljit_uw)next_argw <= SIMM_MAX || (sljit_uw)next_argw - (sljit_uw)argw <= SIMM_MAX);


     if (arg & 0xf0)
-        return 0;
+        return ((arg & 0xf0) == (next_arg & 0xf0) && (argw & 0x3) == (next_argw & 0x3));


     if (argw <= SIMM_MAX && argw >= SIMM_MIN) {
         if (arg == next_arg && (next_argw >= SIMM_MAX && next_argw <= SIMM_MIN))
@@ -844,14 +852,10 @@


     SLJIT_ASSERT(arg & SLJIT_MEM);


-    tmp_r = (inp_flags & LOAD_DATA) ? reg : TMP_REG3;
-    if ((arg & 0xf) == tmp_r) {
-        /* Special case for "mov reg, [reg, ... ]".
-           Caching would not happen anyway. */
-        tmp_r = TMP_REG3;
-        compiler->cache_arg = 0;
-        compiler->cache_argw = 0;
-    }
+    tmp_r = ((inp_flags & LOAD_DATA) && ((inp_flags) & MEM_MASK) <= GPR_REG) ? reg : TMP_REG1;
+    /* Special case for "mov reg, [reg, ... ]". */
+    if ((arg & 0xf) == tmp_r)
+        tmp_r = TMP_REG1;


     if (!(arg & 0xf)) {
         inst = data_transfer_insts[(inp_flags & ~WRITE_BACK) & MEM_MASK];
@@ -859,7 +863,7 @@
             argw = argw - compiler->cache_argw;
             ADJUST_CACHED_IMM(argw);
             SLJIT_ASSERT(!(inst & UPDATE_REQ));
-            return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(TMP_REG3) | IMM(argw));
+            return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(argw));
         }


         if ((next_arg & SLJIT_MEM) && (argw - next_argw <= SIMM_MAX || next_argw - argw <= SIMM_MAX)) {
@@ -871,21 +875,31 @@
         }


         FAIL_IF(load_immediate(compiler, tmp_r, argw));
-        return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(tmp_r));
+        return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r));
     }


     if (SLJIT_UNLIKELY(arg & 0xf0)) {
         argw &= 0x3;
         /* Otherwise getput_arg_fast would capture it. */
         SLJIT_ASSERT(argw);
+
+        if ((SLJIT_MEM | (arg & 0xf0)) == compiler->cache_arg && argw == compiler->cache_argw)
+            tmp_r = TMP_REG3;
+        else {
+            if ((arg & 0xf0) == (next_arg & 0xf0) && argw == (next_argw & 0x3)) {
+                compiler->cache_arg = SLJIT_MEM | (arg & 0xf0);
+                compiler->cache_argw = argw;
+                tmp_r = TMP_REG3;
+            }
 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
-        FAIL_IF(push_inst(compiler, RLWINM | S((arg >> 4) & 0xf) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1)));
+            FAIL_IF(push_inst(compiler, RLWINM | S((arg >> 4) & 0xf) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1)));
 #else
-        FAIL_IF(push_inst(compiler, RLDI(tmp_r, (arg >> 4) & 0xf, argw, 63 - argw, 1)));
+            FAIL_IF(push_inst(compiler, RLDI(tmp_r, (arg >> 4) & 0xf, argw, 63 - argw, 1)));
 #endif
+        }
         inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
         SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ)));
-        return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(arg & 0xf) | B(tmp_r));
+        return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | B(tmp_r));
     }


     inst = data_transfer_insts[inp_flags & MEM_MASK];
@@ -894,13 +908,13 @@
         SLJIT_ASSERT(!(inp_flags & WRITE_BACK));
         argw = argw - compiler->cache_argw;
         ADJUST_CACHED_IMM(argw);
-        return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(TMP_REG3) | IMM(argw));
+        return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(argw));
     }


     if ((compiler->cache_arg & SLJIT_IMM) && compiler->cache_argw == argw) {
         inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
         SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ)));
-        return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(arg & 0xf) | B(TMP_REG3));
+        return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | B(TMP_REG3));
     }


     if (argw == next_argw && (next_arg & SLJIT_MEM)) {
@@ -912,7 +926,7 @@


         inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
         SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ)));
-        return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(arg & 0xf) | B(TMP_REG3));
+        return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | B(TMP_REG3));
     }


     if (arg == next_arg && !(inp_flags & WRITE_BACK) && ((sljit_uw)argw - (sljit_uw)next_argw <= SIMM_MAX || (sljit_uw)next_argw - (sljit_uw)argw <= SIMM_MAX)) {
@@ -923,17 +937,24 @@
         compiler->cache_arg = arg;
         compiler->cache_argw = argw;


-        return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(TMP_REG3));
+        return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3));
     }


     /* Get the indexed version instead of the normal one. */
     inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
     SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ)));
     FAIL_IF(load_immediate(compiler, tmp_r, argw));
-    return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(arg & 0xf) | B(tmp_r));
+    return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | B(tmp_r));
 }


-static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags,
+static SLJIT_INLINE int emit_op_mem2(struct sljit_compiler *compiler, int flags, int reg, int arg1, sljit_w arg1w, int arg2, sljit_w arg2w)
+{
+    if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
+        return compiler->error;
+    return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
+}
+
+static int emit_op(struct sljit_compiler *compiler, int op, int input_flags,
     int dst, sljit_w dstw,
     int src1, sljit_w src1w,
     int src2, sljit_w src2w)
@@ -946,7 +967,7 @@
     int src1_r;
     int src2_r;
     int sugg_src2_r = TMP_REG2;
-    int flags = inp_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS);
+    int flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS);


     compiler->cache_arg = 0;
     compiler->cache_argw = 0;
@@ -965,7 +986,7 @@
     }
     else {
         SLJIT_ASSERT(dst & SLJIT_MEM);
-        if (getput_arg_fast(compiler, inp_flags | ARG_TEST, TMP_REG2, dst, dstw)) {
+        if (getput_arg_fast(compiler, input_flags | ARG_TEST, TMP_REG2, dst, dstw)) {
             flags |= FAST_DEST;
             dst_r = TMP_REG2;
         }
@@ -982,8 +1003,9 @@
     }
     else if (src1 & SLJIT_IMM) {
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
-        if ((inp_flags & 0x3) == INT_DATA) {
-            if (inp_flags & SIGNED_DATA)
+        SLJIT_COMPILE_ASSERT(INT_DATA == 0x18, int_data_check1);
+        if ((input_flags & 0x18) == INT_DATA) {
+            if (input_flags & SIGNED_DATA)
                 src1w = (signed int)src1w;
             else
                 src1w = (unsigned int)src1w;
@@ -992,7 +1014,7 @@
         FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
         src1_r = TMP_REG1;
     }
-    else if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
+    else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
         FAIL_IF(compiler->error);
         src1_r = TMP_REG1;
     }
@@ -1008,8 +1030,9 @@
     }
     else if (src2 & SLJIT_IMM) {
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
-        if ((inp_flags & 0x3) == INT_DATA) {
-            if (inp_flags & SIGNED_DATA)
+        SLJIT_COMPILE_ASSERT(INT_DATA == 0x18, int_data_check2);
+        if ((input_flags & 0x18) == INT_DATA) {
+            if (input_flags & SIGNED_DATA)
                 src2w = (signed int)src2w;
             else
                 src2w = (unsigned int)src2w;
@@ -1018,7 +1041,7 @@
         FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
         src2_r = sugg_src2_r;
     }
-    else if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) {
+    else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) {
         FAIL_IF(compiler->error);
         src2_r = sugg_src2_r;
     }
@@ -1029,26 +1052,26 @@
        All arguments are complex addressing modes, and it is a binary operator. */
     if (src1_r == 0 && src2_r == 0 && dst_r == 0) {
         if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
-            FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
-            FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
+            FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
+            FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
         }
         else {
-            FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
-            FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
+            FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
+            FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
         }
         src1_r = TMP_REG1;
         src2_r = TMP_REG2;
     }
     else if (src1_r == 0 && src2_r == 0) {
-        FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
+        FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
         src1_r = TMP_REG1;
     }
     else if (src1_r == 0 && dst_r == 0) {
-        FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
+        FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
         src1_r = TMP_REG1;
     }
     else if (src2_r == 0 && dst_r == 0) {
-        FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
+        FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
         src2_r = sugg_src2_r;
     }


@@ -1056,12 +1079,12 @@
         dst_r = TMP_REG2;


     if (src1_r == 0) {
-        FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0));
+        FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0));
         src1_r = TMP_REG1;
     }


     if (src2_r == 0) {
-        FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0));
+        FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0));
         src2_r = sugg_src2_r;
     }


@@ -1069,9 +1092,9 @@

     if (flags & (FAST_DEST | SLOW_DEST)) {
         if (flags & FAST_DEST)
-            FAIL_IF(getput_arg_fast(compiler, inp_flags, dst_r, dst, dstw));
+            FAIL_IF(getput_arg_fast(compiler, input_flags, dst_r, dst, dstw));
         else
-            FAIL_IF(getput_arg(compiler, inp_flags, dst_r, dst, dstw, 0, 0));
+            FAIL_IF(getput_arg(compiler, input_flags, dst_r, dst, dstw, 0, 0));
     }
     return SLJIT_SUCCESS;
 }
@@ -1118,23 +1141,26 @@
     return SLJIT_SUCCESS;
 }


+#define EMIT_MOV(type, type_flags, type_cast) \
+    emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw)
+
 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op,
     int dst, sljit_w dstw,
     int src, sljit_w srcw)
 {
-    int inp_flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
+    int flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;


     CHECK_ERROR();
     check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
     ADJUST_LOCAL_OFFSET(dst, dstw);
     ADJUST_LOCAL_OFFSET(src, srcw);


-    if ((src & SLJIT_IMM) && srcw == 0)
+    if ((src & SLJIT_IMM) && srcw == 0 && GET_OPCODE(op) >= SLJIT_NOT)
         src = ZERO_REG;


 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
     if (op & SLJIT_INT_OP) {
-        inp_flags |= INT_DATA | SIGNED_DATA;
+        flags |= INT_DATA | SIGNED_DATA;
         if (src & SLJIT_IMM)
             srcw = (int)srcw;
     }
@@ -1144,64 +1170,66 @@


     switch (GET_OPCODE(op)) {
     case SLJIT_MOV:
-        return emit_op(compiler, SLJIT_MOV, inp_flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+        return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);


     case SLJIT_MOV_UI:
-        return emit_op(compiler, SLJIT_MOV_UI, inp_flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+        return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw);


     case SLJIT_MOV_SI:
-        return emit_op(compiler, SLJIT_MOV_SI, inp_flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+        return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw);


     case SLJIT_MOV_UB:
-        return emit_op(compiler, SLJIT_MOV_UB, inp_flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw);
+        return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA, (unsigned char));


     case SLJIT_MOV_SB:
-        return emit_op(compiler, SLJIT_MOV_SB, inp_flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw);
+        return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, (signed char));


     case SLJIT_MOV_UH:
-        return emit_op(compiler, SLJIT_MOV_UH, inp_flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw);
+        return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA, (unsigned short));


     case SLJIT_MOV_SH:
-        return emit_op(compiler, SLJIT_MOV_SH, inp_flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw);
+        return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, (signed short));


     case SLJIT_MOVU:
-        return emit_op(compiler, SLJIT_MOV, inp_flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+        return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);


     case SLJIT_MOVU_UI:
-        return emit_op(compiler, SLJIT_MOV_UI, inp_flags | INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+        return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);


     case SLJIT_MOVU_SI:
-        return emit_op(compiler, SLJIT_MOV_SI, inp_flags | INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+        return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);


     case SLJIT_MOVU_UB:
-        return emit_op(compiler, SLJIT_MOV_UB, inp_flags | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw);
+        return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, (unsigned char));


     case SLJIT_MOVU_SB:
-        return emit_op(compiler, SLJIT_MOV_SB, inp_flags | BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw);
+        return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, (signed char));


     case SLJIT_MOVU_UH:
-        return emit_op(compiler, SLJIT_MOV_UH, inp_flags | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw);
+        return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, (unsigned short));


     case SLJIT_MOVU_SH:
-        return emit_op(compiler, SLJIT_MOV_SH, inp_flags | HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw);
+        return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, (signed short));


     case SLJIT_NOT:
-        return emit_op(compiler, SLJIT_NOT, inp_flags, dst, dstw, TMP_REG1, 0, src, srcw);
+        return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);


     case SLJIT_NEG:
-        return emit_op(compiler, SLJIT_NEG, inp_flags, dst, dstw, TMP_REG1, 0, src, srcw);
+        return emit_op(compiler, SLJIT_NEG, flags, dst, dstw, TMP_REG1, 0, src, srcw);


     case SLJIT_CLZ:
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
-        return emit_op(compiler, SLJIT_CLZ, inp_flags | (!(op & SLJIT_INT_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
+        return emit_op(compiler, SLJIT_CLZ, flags | (!(op & SLJIT_INT_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
 #else
-        return emit_op(compiler, SLJIT_CLZ, inp_flags, dst, dstw, TMP_REG1, 0, src, srcw);
+        return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw);
 #endif
     }


     return SLJIT_SUCCESS;
 }


+#undef EMIT_MOV
+
 #define TEST_SL_IMM(src, srcw) \
     (((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)


@@ -1240,7 +1268,7 @@
     int src1, sljit_w src1w,
     int src2, sljit_w src2w)
 {
-    int inp_flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
+    int flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;


     CHECK_ERROR();
     check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
@@ -1255,13 +1283,13 @@


 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
     if (op & SLJIT_INT_OP) {
-        inp_flags |= INT_DATA | SIGNED_DATA;
+        flags |= INT_DATA | SIGNED_DATA;
         if (src1 & SLJIT_IMM)
             src1w = (src1w << 32) >> 32;
         if (src2 & SLJIT_IMM)
             src2w = (src2w << 32) >> 32;
         if (GET_FLAGS(op))
-            inp_flags |= ALT_SIGN_EXT;
+            flags |= ALT_SIGN_EXT;
     }
 #endif
     if (op & SLJIT_SET_O)
@@ -1272,63 +1300,63 @@
         if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
             if (TEST_SL_IMM(src2, src2w)) {
                 compiler->imm = src2w & 0xffff;
-                return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
             }
             if (TEST_SL_IMM(src1, src1w)) {
                 compiler->imm = src1w & 0xffff;
-                return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
             }
             if (TEST_SH_IMM(src2, src2w)) {
                 compiler->imm = (src2w >> 16) & 0xffff;
-                return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
             }
             if (TEST_SH_IMM(src1, src1w)) {
                 compiler->imm = (src1w >> 16) & 0xffff;
-                return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
             }
             /* Range between -1 and -32768 is covered above. */
             if (TEST_ADD_IMM(src2, src2w)) {
                 compiler->imm = src2w & 0xffffffff;
-                return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
             }
             if (TEST_ADD_IMM(src1, src1w)) {
                 compiler->imm = src1w & 0xffffffff;
-                return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0);
             }
         }
         if (!(GET_FLAGS(op) & (SLJIT_SET_E | SLJIT_SET_O))) {
             if (TEST_SL_IMM(src2, src2w)) {
                 compiler->imm = src2w & 0xffff;
-                return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
             }
             if (TEST_SL_IMM(src1, src1w)) {
                 compiler->imm = src1w & 0xffff;
-                return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
             }
         }
-        return emit_op(compiler, SLJIT_ADD, inp_flags, dst, dstw, src1, src1w, src2, src2w);
+        return emit_op(compiler, SLJIT_ADD, flags, dst, dstw, src1, src1w, src2, src2w);


     case SLJIT_ADDC:
-        return emit_op(compiler, SLJIT_ADDC, inp_flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
+        return emit_op(compiler, SLJIT_ADDC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);


     case SLJIT_SUB:
         if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
             if (TEST_SL_IMM(src2, -src2w)) {
                 compiler->imm = (-src2w) & 0xffff;
-                return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
             }
             if (TEST_SL_IMM(src1, src1w)) {
                 compiler->imm = src1w & 0xffff;
-                return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
             }
             if (TEST_SH_IMM(src2, -src2w)) {
                 compiler->imm = ((-src2w) >> 16) & 0xffff;
-                return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
             }
             /* Range between -1 and -32768 is covered above. */
             if (TEST_ADD_IMM(src2, -src2w)) {
                 compiler->imm = -src2w & 0xffffffff;
-                return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
             }
         }
         if (dst == SLJIT_UNUSED && (op & (SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U)) && !(op & (SLJIT_SET_O | SLJIT_SET_C))) {
@@ -1336,55 +1364,55 @@
                 /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
                 if (TEST_SL_IMM(src2, src2w)) {
                     compiler->imm = src2w & 0xffff;
-                    return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
+                    return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
                 }
                 if (GET_FLAGS(op) == SLJIT_SET_E && TEST_SL_IMM(src1, src1w)) {
                     compiler->imm = src1w & 0xffff;
-                    return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
+                    return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
                 }
             }
             if (!(op & (SLJIT_SET_E | SLJIT_SET_S))) {
                 /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
                 if (TEST_UL_IMM(src2, src2w)) {
                     compiler->imm = src2w & 0xffff;
-                    return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
+                    return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
                 }
-                return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w);
+                return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w);
             }
             if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= 0x7fff) {
                 compiler->imm = src2w;
-                return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
             }
-            return emit_op(compiler, SLJIT_SUB, inp_flags | ((op & SLJIT_SET_U) ? ALT_FORM4 : 0) | ((op & (SLJIT_SET_E | SLJIT_SET_S)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
+            return emit_op(compiler, SLJIT_SUB, flags | ((op & SLJIT_SET_U) ? ALT_FORM4 : 0) | ((op & (SLJIT_SET_E | SLJIT_SET_S)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
         }
         if (!(op & (SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_O))) {
             if (TEST_SL_IMM(src2, -src2w)) {
                 compiler->imm = (-src2w) & 0xffff;
-                return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
             }
         }
         /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
-        return emit_op(compiler, SLJIT_SUB, inp_flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM6), dst, dstw, src1, src1w, src2, src2w);
+        return emit_op(compiler, SLJIT_SUB, flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM6), dst, dstw, src1, src1w, src2, src2w);


     case SLJIT_SUBC:
-        return emit_op(compiler, SLJIT_SUBC, inp_flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
+        return emit_op(compiler, SLJIT_SUBC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);


     case SLJIT_MUL:
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
         if (op & SLJIT_INT_OP)
-            inp_flags |= ALT_FORM2;
+            flags |= ALT_FORM2;
 #endif
         if (!GET_FLAGS(op)) {
             if (TEST_SL_IMM(src2, src2w)) {
                 compiler->imm = src2w & 0xffff;
-                return emit_op(compiler, SLJIT_MUL, inp_flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
             }
             if (TEST_SL_IMM(src1, src1w)) {
                 compiler->imm = src1w & 0xffff;
-                return emit_op(compiler, SLJIT_MUL, inp_flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
+                return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
             }
         }
-        return emit_op(compiler, SLJIT_MUL, inp_flags, dst, dstw, src1, src1w, src2, src2w);
+        return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w);


     case SLJIT_AND:
     case SLJIT_OR:
@@ -1393,45 +1421,45 @@
         if (!GET_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) {
             if (TEST_UL_IMM(src2, src2w)) {
                 compiler->imm = src2w;
-                return emit_op(compiler, GET_OPCODE(op), inp_flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
+                return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
             }
             if (TEST_UL_IMM(src1, src1w)) {
                 compiler->imm = src1w;
-                return emit_op(compiler, GET_OPCODE(op), inp_flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
+                return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
             }
             if (TEST_UH_IMM(src2, src2w)) {
                 compiler->imm = (src2w >> 16) & 0xffff;
-                return emit_op(compiler, GET_OPCODE(op), inp_flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
+                return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
             }
             if (TEST_UH_IMM(src1, src1w)) {
                 compiler->imm = (src1w >> 16) & 0xffff;
-                return emit_op(compiler, GET_OPCODE(op), inp_flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
+                return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
             }
         }
         if (!GET_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) {
             if (TEST_UI_IMM(src2, src2w)) {
                 compiler->imm = src2w;
-                return emit_op(compiler, GET_OPCODE(op), inp_flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
+                return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
             }
             if (TEST_UI_IMM(src1, src1w)) {
                 compiler->imm = src1w;
-                return emit_op(compiler, GET_OPCODE(op), inp_flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
+                return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
             }
         }
-        return emit_op(compiler, GET_OPCODE(op), inp_flags, dst, dstw, src1, src1w, src2, src2w);
+        return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);


     case SLJIT_SHL:
     case SLJIT_LSHR:
     case SLJIT_ASHR:
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
         if (op & SLJIT_INT_OP)
-            inp_flags |= ALT_FORM2;
+            flags |= ALT_FORM2;
 #endif
         if (src2 & SLJIT_IMM) {
             compiler->imm = src2w;
-            return emit_op(compiler, GET_OPCODE(op), inp_flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
+            return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
         }
-        return emit_op(compiler, GET_OPCODE(op), inp_flags, dst, dstw, src1, src1w, src2, src2w);
+        return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
     }


     return SLJIT_SUCCESS;
@@ -1463,44 +1491,6 @@
     return 1;
 }


-static int emit_fpu_data_transfer(struct sljit_compiler *compiler, int fpu_reg, int load, int arg, sljit_w argw)
-{
-    SLJIT_ASSERT(arg & SLJIT_MEM);
-
-    /* Fast loads and stores. */
-    if (!(arg & 0xf0)) {
-        /* Both for (arg & 0xf) == SLJIT_UNUSED and (arg & 0xf) != SLJIT_UNUSED. */
-        if (argw <= SIMM_MAX && argw >= SIMM_MIN)
-            return push_inst(compiler, (load ? LFD : STFD) | FD(fpu_reg) | A(arg & 0xf) | IMM(argw));
-    }
-
-    if (arg & 0xf0) {
-        argw &= 0x3;
-        if (argw) {
-#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
-            FAIL_IF(push_inst(compiler, RLWINM | S((arg >> 4) & 0xf) | A(TMP_REG2) | (argw << 11) | ((31 - argw) << 1)));
-#else
-            FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, (arg >> 4) & 0xf, argw, 63 - argw, 1)));
-#endif
-            return push_inst(compiler, (load ? LFDX : STFDX) | FD(fpu_reg) | A(arg & 0xf) | B(TMP_REG2));
-        }
-        return push_inst(compiler, (load ? LFDX : STFDX) | FD(fpu_reg) | A(arg & 0xf) | B((arg >> 4) & 0xf));
-    }
-
-    /* Use cache. */
-    if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN)
-        return push_inst(compiler, (load ? LFD : STFD) | FD(fpu_reg) | A(TMP_REG3) | IMM(argw - compiler->cache_argw));
-
-    /* Put value to cache. */
-    compiler->cache_arg = arg;
-    compiler->cache_argw = argw;
-
-    FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
-    if (!(arg & 0xf))
-        return push_inst(compiler, (load ? LFDX : STFDX) | FD(fpu_reg) | A(0) | B(TMP_REG3));
-    return push_inst(compiler, (load ? LFDUX : STFDUX) | FD(fpu_reg) | A(TMP_REG3) | B(arg & 0xf));
-}
-
 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
     int dst, sljit_w dstw,
     int src, sljit_w srcw)
@@ -1515,20 +1505,22 @@


     if (GET_OPCODE(op) == SLJIT_FCMP) {
         if (dst > SLJIT_FLOAT_REG4) {
-            FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 1, dst, dstw));
+            FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw));
             dst = TMP_FREG1;
         }
+
         if (src > SLJIT_FLOAT_REG4) {
-            FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src, srcw));
+            FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0));
             src = TMP_FREG2;
         }
+
         return push_inst(compiler, FCMPU | CRD(4) | FA(dst) | FB(src));
     }


     dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst;


     if (src > SLJIT_FLOAT_REG4) {
-        FAIL_IF(emit_fpu_data_transfer(compiler, dst_fr, 1, src, srcw));
+        FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, dst_fr, src, srcw, dst, dstw));
         src = dst_fr;
     }


@@ -1545,8 +1537,11 @@
             break;
     }


-    if (dst_fr == TMP_FREG1)
-        FAIL_IF(emit_fpu_data_transfer(compiler, src, 0, dst, dstw));
+    if (dst_fr == TMP_FREG1) {
+        if (op == SLJIT_FMOV)
+            dst_fr = src;
+        FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, dst_fr, dst, dstw, 0, 0));
+    }


     return SLJIT_SUCCESS;
 }
@@ -1556,7 +1551,7 @@
     int src1, sljit_w src1w,
     int src2, sljit_w src2w)
 {
-    int dst_fr;
+    int dst_fr, flags = 0;


     CHECK_ERROR();
     check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
@@ -1564,18 +1559,44 @@
     compiler->cache_arg = 0;
     compiler->cache_argw = 0;


-    dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst;
+    dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG2 : dst;


+    if (src1 > SLJIT_FLOAT_REG4) {
+        if (getput_arg_fast(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w)) {
+            FAIL_IF(compiler->error);
+            src1 = TMP_FREG1;
+        } else
+            flags |= ALT_FORM1;
+    }
+
     if (src2 > SLJIT_FLOAT_REG4) {
-        FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src2, src2w));
-        src2 = TMP_FREG2;
+        if (getput_arg_fast(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w)) {
+            FAIL_IF(compiler->error);
+            src2 = TMP_FREG2;
+        } else
+            flags |= ALT_FORM2;
     }


-    if (src1 > SLJIT_FLOAT_REG4) {
-        FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 1, src1, src1w));
-        src1 = TMP_FREG1;
+    if ((flags & (ALT_FORM1 | ALT_FORM2)) == (ALT_FORM1 | ALT_FORM2)) {
+        if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+            FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
+            FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
+        }
+        else {
+            FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+            FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
+        }
     }
+    else if (flags & ALT_FORM1)
+        FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
+    else if (flags & ALT_FORM2)
+        FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));


+    if (flags & ALT_FORM1)
+        src1 = TMP_FREG1;
+    if (flags & ALT_FORM2)
+        src2 = TMP_FREG2;
+
     switch (op) {
     case SLJIT_FADD:
         FAIL_IF(push_inst(compiler, FADD | FD(dst_fr) | FA(src1) | FB(src2)));
@@ -1594,8 +1615,8 @@
         break;
     }


-    if (dst_fr == TMP_FREG1)
-        FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 0, dst, dstw));
+    if (dst_fr == TMP_FREG2)
+        FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG2, dst, dstw, 0, 0));


     return SLJIT_SUCCESS;
 }
@@ -1617,6 +1638,7 @@
         return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
     }


+    /* SLJIT_UNUSED is also possible, although highly unlikely. */
     return SLJIT_SUCCESS;
 }


@@ -1658,7 +1680,7 @@
     return label;
 }


-static sljit_ins get_bo_bi_flags(struct sljit_compiler *compiler, int type)
+static sljit_ins get_bo_bi_flags(int type)
 {
     switch (type) {
     case SLJIT_C_EQUAL:
@@ -1709,10 +1731,10 @@
     case SLJIT_C_FLOAT_NOT_EQUAL:
         return (4 << 21) | ((4 + 2) << 16);


-    case SLJIT_C_FLOAT_NAN:
+    case SLJIT_C_FLOAT_UNORDERED:
         return (12 << 21) | ((4 + 3) << 16);


-    case SLJIT_C_FLOAT_NOT_NAN:
+    case SLJIT_C_FLOAT_ORDERED:
         return (4 << 21) | ((4 + 3) << 16);


     default:
@@ -1729,7 +1751,7 @@
     CHECK_ERROR_PTR();
     check_sljit_emit_jump(compiler, type);


-    bo_bi_flags = get_bo_bi_flags(compiler, type & 0xff);
+    bo_bi_flags = get_bo_bi_flags(type & 0xff);
     if (!bo_bi_flags)
         return NULL;


@@ -1751,7 +1773,6 @@

 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw)
 {
-    sljit_ins bo_bi_flags;
     struct sljit_jump *jump = NULL;
     int src_r;


@@ -1759,9 +1780,6 @@
     check_sljit_emit_ijump(compiler, type, src, srcw);
     ADJUST_LOCAL_OFFSET(src, srcw);


-    bo_bi_flags = get_bo_bi_flags(compiler, type);
-    FAIL_IF(!bo_bi_flags);
-
     if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS)
         src_r = src;
     else if (src & SLJIT_IMM) {
@@ -1781,7 +1799,7 @@
     FAIL_IF(push_inst(compiler, MTCTR | S(src_r)));
     if (jump)
         jump->addr = compiler->size;
-    return push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0));
+    return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0));
 }


 /* Get a bit from CR, all other bits are zeroed. */
@@ -1875,11 +1893,11 @@
         INVERT_BIT(reg);
         break;


-    case SLJIT_C_FLOAT_NAN:
+    case SLJIT_C_FLOAT_UNORDERED:
         GET_CR_BIT(4 + 3, reg);
         break;


-    case SLJIT_C_FLOAT_NOT_NAN:
+    case SLJIT_C_FLOAT_ORDERED:
         GET_CR_BIT(4 + 3, reg);
         INVERT_BIT(reg);
         break;
@@ -1890,11 +1908,9 @@
     }


     if (GET_OPCODE(op) == SLJIT_OR)
-        return emit_op(compiler, GET_OPCODE(op), GET_FLAGS(op) ? ALT_SET_FLAGS : 0, dst, dstw, dst, dstw, TMP_REG2, 0);
+        return emit_op(compiler, SLJIT_OR, GET_FLAGS(op) ? ALT_SET_FLAGS : 0, dst, dstw, dst, dstw, TMP_REG2, 0);


-    if (reg == TMP_REG2)
-        return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
-    return SLJIT_SUCCESS;
+    return (reg == TMP_REG2) ? emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0) : SLJIT_SUCCESS;
 }


SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value)

Added: code/trunk/sljit/sljitNativeSPARC_32.c
===================================================================
--- code/trunk/sljit/sljitNativeSPARC_32.c                            (rev 0)
+++ code/trunk/sljit/sljitNativeSPARC_32.c    2012-10-20 21:33:38 UTC (rev 1149)
@@ -0,0 +1,163 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+static int load_immediate(struct sljit_compiler *compiler, int dst, sljit_w imm)
+{
+    if (imm <= SIMM_MAX && imm >= SIMM_MIN)
+        return push_inst(compiler, OR | D(dst) | S1(0) | IMM(imm), DR(dst));
+
+    FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((imm >> 10) & 0x3fffff), DR(dst)));
+    return (imm & 0x3ff) ? push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (imm & 0x3ff), DR(dst)) : SLJIT_SUCCESS;
+}
+
+#define ARG2(flags, src2) ((flags & SRC2_IMM) ? IMM(src2) : S2(src2))
+
+static SLJIT_INLINE int emit_single_op(struct sljit_compiler *compiler, int op, int flags,
+    int dst, int src1, sljit_w src2)
+{
+    SLJIT_COMPILE_ASSERT(ICC_IS_SET == SET_FLAGS, icc_is_set_and_set_flags_must_be_the_same);
+
+    switch (op) {
+    case SLJIT_MOV:
+    case SLJIT_MOV_UI:
+    case SLJIT_MOV_SI:
+        SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+        if (dst != src2)
+            return push_inst(compiler, OR | D(dst) | S1(0) | S2(src2), DR(dst));
+        return SLJIT_SUCCESS;
+
+    case SLJIT_MOV_UB:
+    case SLJIT_MOV_SB:
+        SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+        if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+            if (op == SLJIT_MOV_UB)
+                return push_inst(compiler, AND | D(dst) | S1(src2) | IMM(0xff), DR(dst));
+            FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(24), DR(dst)));
+            return push_inst(compiler, SRA | D(dst) | S1(dst) | IMM(24), DR(dst));
+        }
+        else if (dst != src2)
+            SLJIT_ASSERT_STOP();
+        return SLJIT_SUCCESS;
+
+    case SLJIT_MOV_UH:
+    case SLJIT_MOV_SH:
+        SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+        if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+            FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(16), DR(dst)));
+            return push_inst(compiler, (op == SLJIT_MOV_SH ? SRA : SRL) | D(dst) | S1(dst) | IMM(16), DR(dst));
+        }
+        else if (dst != src2)
+            SLJIT_ASSERT_STOP();
+        return SLJIT_SUCCESS;
+
+    case SLJIT_NOT:
+        SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+        return push_inst(compiler, XNOR | (flags & SET_FLAGS) | D(dst) | S1(0) | S2(src2), DR(dst) | (flags & SET_FLAGS));
+
+    case SLJIT_CLZ:
+        SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+        /* sparc 32 does not support SLJIT_KEEP_FLAGS. Not sure I can fix this. */
+        FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(src2) | S2(0), SET_FLAGS));
+        FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2(src2), DR(TMP_REG1)));
+        FAIL_IF(push_inst(compiler, BICC | DA(0x1) | (7 & DISP_MASK), UNMOVABLE_INS));
+        FAIL_IF(push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(0) | IMM(32), UNMOVABLE_INS | (flags & SET_FLAGS)));
+        FAIL_IF(push_inst(compiler, OR | D(dst) | S1(0) | IMM(-1), DR(dst)));
+
+        /* Loop. */
+        FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(0), SET_FLAGS));
+        FAIL_IF(push_inst(compiler, SLL | D(TMP_REG1) | S1(TMP_REG1) | IMM(1), DR(TMP_REG1)));
+        FAIL_IF(push_inst(compiler, BICC | DA(0xe) | (-2 & DISP_MASK), UNMOVABLE_INS));
+        return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS | (flags & SET_FLAGS));
+
+    case SLJIT_ADD:
+        return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+
+    case SLJIT_ADDC:
+        return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+
+    case SLJIT_SUB:
+        return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+
+    case SLJIT_SUBC:
+        return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+
+    case SLJIT_MUL:
+        FAIL_IF(push_inst(compiler, SMUL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst)));
+        if (!(flags & SET_FLAGS))
+            return SLJIT_SUCCESS;
+        FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(dst) | IMM(31), DR(TMP_REG1)));
+        FAIL_IF(push_inst(compiler, RDY | D(TMP_REG4), DR(TMP_REG4)));
+        return push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(TMP_REG4), MOVABLE_INS | SET_FLAGS);
+
+    case SLJIT_AND:
+        return push_inst(compiler, AND | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+
+    case SLJIT_OR:
+        return push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+
+    case SLJIT_XOR:
+        return push_inst(compiler, XOR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+
+    case SLJIT_SHL:
+        FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst)));
+        return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS);
+
+    case SLJIT_LSHR:
+        FAIL_IF(push_inst(compiler, SRL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst)));
+        return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS);
+
+    case SLJIT_ASHR:
+        FAIL_IF(push_inst(compiler, SRA | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst)));
+        return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS);
+    }
+
+    SLJIT_ASSERT_STOP();
+    return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE int emit_const(struct sljit_compiler *compiler, int dst, sljit_w init_value)
+{
+    FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((init_value >> 10) & 0x3fffff), DR(dst)));
+    return push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (init_value & 0x3ff), DR(dst));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+{
+    sljit_ins *inst = (sljit_ins*)addr;
+
+    inst[0] = (inst[0] & 0xffc00000) | ((new_addr >> 10) & 0x3fffff);
+    inst[1] = (inst[1] & 0xfffffc00) | (new_addr & 0x3ff);
+    SLJIT_CACHE_FLUSH(inst, inst + 2);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_w new_constant)
+{
+    sljit_ins *inst = (sljit_ins*)addr;
+
+    inst[0] = (inst[0] & 0xffc00000) | ((new_constant >> 10) & 0x3fffff);
+    inst[1] = (inst[1] & 0xfffffc00) | (new_constant & 0x3ff);
+    SLJIT_CACHE_FLUSH(inst, inst + 2);
+}


Added: code/trunk/sljit/sljitNativeSPARC_common.c
===================================================================
--- code/trunk/sljit/sljitNativeSPARC_common.c                            (rev 0)
+++ code/trunk/sljit/sljitNativeSPARC_common.c    2012-10-20 21:33:38 UTC (rev 1149)
@@ -0,0 +1,1286 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
+{
+    return "SPARC" SLJIT_CPUINFO;
+}
+
+/* Length of an instruction word
+   Both for sparc-32 and sparc-64 */
+typedef sljit_ui sljit_ins;
+
+/* TMP_REG2 is not used by getput_arg */
+#define TMP_REG1    (SLJIT_NO_REGISTERS + 1)
+#define TMP_REG2    (SLJIT_NO_REGISTERS + 2)
+#define TMP_REG3    (SLJIT_NO_REGISTERS + 3)
+#define TMP_REG4    (SLJIT_NO_REGISTERS + 4)
+#define LINK_REG    (SLJIT_NO_REGISTERS + 5)
+
+#define TMP_FREG1    ((SLJIT_FLOAT_REG4 + 1) << 1)
+#define TMP_FREG2    ((SLJIT_FLOAT_REG4 + 2) << 1)
+
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 7] = {
+    0, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 14, 1, 24, 25, 26, 15
+};
+
+/* --------------------------------------------------------------------- */
+/*  Instrucion forms                                                     */
+/* --------------------------------------------------------------------- */
+
+#define D(d)        (reg_map[d] << 25)
+#define DA(d)        ((d) << 25)
+#define S1(s1)        (reg_map[s1] << 14)
+#define S2(s2)        (reg_map[s2])
+#define S1A(s1)        ((s1) << 14)
+#define S2A(s2)        (s2)
+#define IMM_ARG        0x2000
+#define DOP(op)        ((op) << 5)
+#define IMM(imm)    (((imm) & 0x1fff) | IMM_ARG)
+
+#define DR(dr)        (reg_map[dr])
+#define OPC1(opcode)    ((opcode) << 30)
+#define OPC2(opcode)    ((opcode) << 22)
+#define OPC3(opcode)    ((opcode) << 19)
+#define SET_FLAGS    OPC3(0x10)
+
+#define ADD        (OPC1(0x2) | OPC3(0x00))
+#define ADDC        (OPC1(0x2) | OPC3(0x08))
+#define AND        (OPC1(0x2) | OPC3(0x01))
+#define ANDN        (OPC1(0x2) | OPC3(0x05))
+#define CALL        (OPC1(0x1))
+#define FABSS        (OPC1(0x2) | OPC3(0x34) | DOP(0x09))
+#define FADDD        (OPC1(0x2) | OPC3(0x34) | DOP(0x42))
+#define FCMPD        (OPC1(0x2) | OPC3(0x35) | DOP(0x52))
+#define FDIVD        (OPC1(0x2) | OPC3(0x34) | DOP(0x4e))
+#define FMOVS        (OPC1(0x2) | OPC3(0x34) | DOP(0x01))
+#define FMULD        (OPC1(0x2) | OPC3(0x34) | DOP(0x4a))
+#define FNEGS        (OPC1(0x2) | OPC3(0x34) | DOP(0x05))
+#define FSUBD        (OPC1(0x2) | OPC3(0x34) | DOP(0x46))
+#define JMPL        (OPC1(0x2) | OPC3(0x38))
+#define NOP        (OPC1(0x0) | OPC2(0x04))
+#define OR        (OPC1(0x2) | OPC3(0x02))
+#define ORN        (OPC1(0x2) | OPC3(0x06))
+#define RDY        (OPC1(0x2) | OPC3(0x28) | S1A(0))
+#define RESTORE        (OPC1(0x2) | OPC3(0x3d))
+#define SAVE        (OPC1(0x2) | OPC3(0x3c))
+#define SETHI        (OPC1(0x0) | OPC2(0x04))
+#define SLL        (OPC1(0x2) | OPC3(0x25))
+#define SLLX        (OPC1(0x2) | OPC3(0x25) | (1 << 12))
+#define SRA        (OPC1(0x2) | OPC3(0x27))
+#define SRAX        (OPC1(0x2) | OPC3(0x27) | (1 << 12))
+#define SRL        (OPC1(0x2) | OPC3(0x26))
+#define SRLX        (OPC1(0x2) | OPC3(0x26) | (1 << 12))
+#define SUB        (OPC1(0x2) | OPC3(0x04))
+#define SUBC        (OPC1(0x2) | OPC3(0x0c))
+#define TA        (OPC1(0x2) | OPC3(0x3a) | (8 << 25))
+#define WRY        (OPC1(0x2) | OPC3(0x30) | DA(0))
+#define XOR        (OPC1(0x2) | OPC3(0x03))
+#define XNOR        (OPC1(0x2) | OPC3(0x07))
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#define MAX_DISP    (0x1fffff)
+#define MIN_DISP    (-0x200000)
+#define DISP_MASK    (0x3fffff)
+
+#define BICC        (OPC1(0x0) | OPC2(0x2))
+#define FBFCC        (OPC1(0x0) | OPC2(0x6))
+#define SLL_W        SLL
+#define SDIV        (OPC1(0x2) | OPC3(0x0f))
+#define SMUL        (OPC1(0x2) | OPC3(0x0b))
+#define UDIV        (OPC1(0x2) | OPC3(0x0e))
+#define UMUL        (OPC1(0x2) | OPC3(0x0a))
+#else
+#define SLL_W        SLLX
+#endif
+
+#define SIMM_MAX    (0x0fff)
+#define SIMM_MIN    (-0x1000)
+
+/* dest_reg is the absolute name of the register
+   Useful for reordering instructions in the delay slot. */
+static int push_inst(struct sljit_compiler *compiler, sljit_ins ins, int delay_slot)
+{
+    SLJIT_ASSERT((delay_slot & DST_INS_MASK) == UNMOVABLE_INS
+        || (delay_slot & DST_INS_MASK) == MOVABLE_INS
+        || (delay_slot & DST_INS_MASK) == ((ins >> 25) & 0x1f));
+    sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
+    FAIL_IF(!ptr);
+    *ptr = ins;
+    compiler->size++;
+    compiler->delay_slot = delay_slot;
+    return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_ins* optimize_jump(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
+{
+    sljit_w diff;
+    sljit_uw target_addr;
+    sljit_ins *inst;
+    sljit_ins saved_inst;
+
+    if (jump->flags & SLJIT_REWRITABLE_JUMP)
+        return code_ptr;
+
+    if (jump->flags & JUMP_ADDR)
+        target_addr = jump->u.target;
+    else {
+        SLJIT_ASSERT(jump->flags & JUMP_LABEL);
+        target_addr = (sljit_uw)(code + jump->u.label->size);
+    }
+    inst = (sljit_ins*)jump->addr;
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+    if (jump->flags & IS_CALL) {
+        /* Call is always patchable on sparc 32. */
+        jump->flags |= PATCH_CALL;
+        if (jump->flags & IS_MOVABLE) {
+            inst[0] = inst[-1];
+            inst[-1] = CALL;
+            jump->addr -= sizeof(sljit_ins);
+            return inst;
+        }
+        inst[0] = CALL;
+        inst[1] = NOP;
+        return inst + 1;
+    }
+#else
+    /* Both calls and BPr instructions shall not pass this point. */
+#error "Implementation required"
+#endif
+
+    if (jump->flags & IS_COND)
+        inst--;
+
+    if (jump->flags & IS_MOVABLE) {
+        diff = ((sljit_w)target_addr - (sljit_w)(inst - 1)) >> 2;
+        if (diff <= MAX_DISP && diff >= MIN_DISP) {
+            jump->flags |= PATCH_B;
+            inst--;
+            if (jump->flags & IS_COND) {
+                saved_inst = inst[0];
+                inst[0] = inst[1] ^ (1 << 28);
+                inst[1] = saved_inst;
+            } else {
+                inst[1] = inst[0];
+                inst[0] = BICC | DA(0x8);
+            }
+            jump->addr = (sljit_uw)inst;
+            return inst + 1;
+        }
+    }
+
+    diff = ((sljit_w)target_addr - (sljit_w)(inst)) >> 2;
+    if (diff <= MAX_DISP && diff >= MIN_DISP) {
+        jump->flags |= PATCH_B;
+        if (jump->flags & IS_COND)
+            inst[0] ^= (1 << 28);
+        else
+            inst[0] = BICC | DA(0x8);
+        inst[1] = NOP;
+        jump->addr = (sljit_uw)inst;
+        return inst + 1;
+    }
+
+    return code_ptr;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
+{
+    struct sljit_memory_fragment *buf;
+    sljit_ins *code;
+    sljit_ins *code_ptr;
+    sljit_ins *buf_ptr;
+    sljit_ins *buf_end;
+    sljit_uw word_count;
+    sljit_uw addr;
+
+    struct sljit_label *label;
+    struct sljit_jump *jump;
+    struct sljit_const *const_;
+
+    CHECK_ERROR_PTR();
+    check_sljit_generate_code(compiler);
+    reverse_buf(compiler);
+
+    code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
+    PTR_FAIL_WITH_EXEC_IF(code);
+    buf = compiler->buf;
+
+    code_ptr = code;
+    word_count = 0;
+    label = compiler->labels;
+    jump = compiler->jumps;
+    const_ = compiler->consts;
+    do {
+        buf_ptr = (sljit_ins*)buf->memory;
+        buf_end = buf_ptr + (buf->used_size >> 2);
+        do {
+            *code_ptr = *buf_ptr++;
+            SLJIT_ASSERT(!label || label->size >= word_count);
+            SLJIT_ASSERT(!jump || jump->addr >= word_count);
+            SLJIT_ASSERT(!const_ || const_->addr >= word_count);
+            /* These structures are ordered by their address. */
+            if (label && label->size == word_count) {
+                /* Just recording the address. */
+                label->addr = (sljit_uw)code_ptr;
+                label->size = code_ptr - code;
+                label = label->next;
+            }
+            if (jump && jump->addr == word_count) {
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+                jump->addr = (sljit_uw)(code_ptr - 3);
+#else
+                jump->addr = (sljit_uw)(code_ptr - 6);
+#endif
+                code_ptr = optimize_jump(jump, code_ptr, code);
+                jump = jump->next;
+            }
+            if (const_ && const_->addr == word_count) {
+                /* Just recording the address. */
+                const_->addr = (sljit_uw)code_ptr;
+                const_ = const_->next;
+            }
+            code_ptr ++;
+            word_count ++;
+        } while (buf_ptr < buf_end);
+
+        buf = buf->next;
+    } while (buf);
+
+    if (label && label->size == word_count) {
+        label->addr = (sljit_uw)code_ptr;
+        label->size = code_ptr - code;
+        label = label->next;
+    }
+
+    SLJIT_ASSERT(!label);
+    SLJIT_ASSERT(!jump);
+    SLJIT_ASSERT(!const_);
+    SLJIT_ASSERT(code_ptr - code <= (int)compiler->size);
+
+    jump = compiler->jumps;
+    while (jump) {
+        do {
+            addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
+            buf_ptr = (sljit_ins*)jump->addr;
+
+            if (jump->flags & PATCH_CALL) {
+                addr = (sljit_w)(addr - jump->addr) >> 2;
+                SLJIT_ASSERT((sljit_w)addr <= 0x1fffffff && (sljit_w)addr >= -0x20000000);
+                buf_ptr[0] = CALL | (addr & 0x3fffffff);
+                break;
+            }
+            if (jump->flags & PATCH_B) {
+                addr = (sljit_w)(addr - jump->addr) >> 2;
+                SLJIT_ASSERT((sljit_w)addr <= MAX_DISP && (sljit_w)addr >= MIN_DISP);
+                buf_ptr[0] = (buf_ptr[0] & ~DISP_MASK) | (addr & DISP_MASK);
+                break;
+            }
+
+            /* Set the fields of immediate loads. */
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+            buf_ptr[0] = (buf_ptr[0] & 0xffc00000) | ((addr >> 10) & 0x3fffff);
+            buf_ptr[1] = (buf_ptr[1] & 0xfffffc00) | (addr & 0x3ff);
+#else
+#error "Implementation required"
+#endif
+        } while (0);
+        jump = jump->next;
+    }
+
+
+    compiler->error = SLJIT_ERR_COMPILED;
+    compiler->executable_size = compiler->size * sizeof(sljit_ins);
+    SLJIT_CACHE_FLUSH(code, code_ptr);
+    return code;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Entry, exit                                                          */
+/* --------------------------------------------------------------------- */
+
+/* Creates an index in data_transfer_insts array. */
+#define LOAD_DATA    0x01
+#define WORD_DATA    0x00
+#define BYTE_DATA    0x02
+#define HALF_DATA    0x04
+#define INT_DATA    0x06
+#define SIGNED_DATA    0x08
+/* Separates integer and floating point registers */
+#define GPR_REG        0x0f
+#define DOUBLE_DATA    0x10
+
+#define MEM_MASK    0x1f
+
+#define WRITE_BACK    0x00020
+#define ARG_TEST    0x00040
+#define CUMULATIVE_OP    0x00080
+#define IMM_OP        0x00100
+#define SRC2_IMM    0x00200
+
+#define REG_DEST    0x00400
+#define REG2_SOURCE    0x00800
+#define SLOW_SRC1    0x01000
+#define SLOW_SRC2    0x02000
+#define SLOW_DEST    0x04000
+/* SET_FLAGS (0x10 << 19) also belong here! */
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#include "sljitNativeSPARC_32.c"
+#else
+#include "sljitNativeSPARC_64.c"
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size)
+{
+    CHECK_ERROR();
+    check_sljit_emit_enter(compiler, args, temporaries, saveds, local_size);
+
+    compiler->temporaries = temporaries;
+    compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    compiler->logical_local_size = local_size;
+#endif
+
+    local_size += 23 * sizeof(sljit_w);
+    local_size = (local_size + 7) & ~0x7;
+    compiler->local_size = local_size;
+
+    if (local_size <= SIMM_MAX) {
+        FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_LOCALS_REG) | S1(SLJIT_LOCALS_REG) | IMM(-local_size), UNMOVABLE_INS));
+    }
+    else {
+        FAIL_IF(load_immediate(compiler, TMP_REG1, -local_size));
+        FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_LOCALS_REG) | S1(SLJIT_LOCALS_REG) | S2(TMP_REG1), UNMOVABLE_INS));
+    }
+
+    if (args >= 1)
+        FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG1) | S1(0) | S2A(24), DR(SLJIT_SAVED_REG1)));
+    if (args >= 2)
+        FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG2) | S1(0) | S2A(25), DR(SLJIT_SAVED_REG2)));
+    if (args >= 3)
+        FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG3) | S1(0) | S2A(26), DR(SLJIT_SAVED_REG3)));
+
+    return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size)
+{
+    CHECK_ERROR_VOID();
+    check_sljit_set_context(compiler, args, temporaries, saveds, local_size);
+
+    compiler->temporaries = temporaries;
+    compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    compiler->logical_local_size = local_size;
+#endif
+
+    local_size += 23 * sizeof(sljit_w);
+    compiler->local_size = (local_size + 7) & ~0x7;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int op, int src, sljit_w srcw)
+{
+    CHECK_ERROR();
+    check_sljit_emit_return(compiler, op, src, srcw);
+
+    if (op != SLJIT_MOV || !(src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS)) {
+        FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
+        src = SLJIT_TEMPORARY_REG1;
+    }
+
+    FAIL_IF(push_inst(compiler, JMPL | D(0) | S1A(31) | IMM(8), UNMOVABLE_INS));
+    return push_inst(compiler, RESTORE | D(SLJIT_TEMPORARY_REG1) | S1(src) | S2(0), UNMOVABLE_INS);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Operators                                                            */
+/* --------------------------------------------------------------------- */
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#define ARCH_32_64(a, b)    a
+#else
+#define ARCH_32_64(a, b)    b
+#endif
+
+static SLJIT_CONST sljit_ins data_transfer_insts[16 + 2] = {
+/* u w s */ ARCH_32_64(OPC1(3) | OPC3(0x04) /* stw */, OPC1(3) | OPC3(0x0e) /* stx */),
+/* u w l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x0b) /* ldx */),
+/* u b s */ OPC1(3) | OPC3(0x05) /* stb */,
+/* u b l */ OPC1(3) | OPC3(0x01) /* ldub */,
+/* u h s */ OPC1(3) | OPC3(0x06) /* sth */,
+/* u h l */ OPC1(3) | OPC3(0x02) /* lduh */,
+/* u i s */ OPC1(3) | OPC3(0x04) /* stw */,
+/* u i l */ OPC1(3) | OPC3(0x00) /* lduw */,
+
+/* s w s */ ARCH_32_64(OPC1(3) | OPC3(0x04) /* stw */, OPC1(3) | OPC3(0x0e) /* stx */),
+/* s w l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x0b) /* ldx */),
+/* s b s */ OPC1(3) | OPC3(0x05) /* stb */,
+/* s b l */ OPC1(3) | OPC3(0x09) /* ldsb */,
+/* s h s */ OPC1(3) | OPC3(0x06) /* sth */,
+/* s h l */ OPC1(3) | OPC3(0x0a) /* ldsh */,
+/* s i s */ OPC1(3) | OPC3(0x04) /* stw */,
+/* s i l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x08) /* ldsw */),
+
+/* d   s */ OPC1(3) | OPC3(0x27),
+/* d   l */ OPC1(3) | OPC3(0x23),
+};
+
+#undef ARCH_32_64
+
+/* Can perform an operation using at most 1 instruction. */
+static int getput_arg_fast(struct sljit_compiler *compiler, int flags, int reg, int arg, sljit_w argw)
+{
+    SLJIT_ASSERT(arg & SLJIT_MEM);
+
+    if (!(flags & WRITE_BACK)) {
+        if ((!(arg & 0xf0) && argw <= SIMM_MAX && argw >= SIMM_MIN)
+                || ((arg & 0xf0) && (argw & 0x3) == 0)) {
+            /* Works for both absoulte and relative addresses (immediate case). */
+            if (SLJIT_UNLIKELY(flags & ARG_TEST))
+                return 1;
+            FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK]
+                | ((flags & MEM_MASK) <= GPR_REG ? D(reg) : DA(reg))
+                | S1(arg & 0xf) | ((arg & 0xf0) ? S2((arg >> 4) & 0xf) : IMM(argw)),
+                ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS));
+            return -1;
+        }
+    }
+    return 0;
+}
+
+/* See getput_arg below.
+   Note: can_cache is called only for binary operators. Those
+   operators always uses word arguments without write back. */
+static int can_cache(int arg, sljit_w argw, int next_arg, sljit_w next_argw)
+{
+    SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
+
+    /* Simple operation except for updates. */
+    if (arg & 0xf0) {
+        argw &= 0x3;
+        SLJIT_ASSERT(argw);
+        next_argw &= 0x3;
+        if ((arg & 0xf0) == (next_arg & 0xf0) && argw == next_argw)
+            return 1;
+        return 0;
+    }
+
+    if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN))
+        return 1;
+    return 0;
+}
+
+/* Emit the necessary instructions. See can_cache above. */
+static int getput_arg(struct sljit_compiler *compiler, int flags, int reg, int arg, sljit_w argw, int next_arg, sljit_w next_argw)
+{
+    int base, arg2, delay_slot;
+    sljit_ins dest;
+
+    SLJIT_ASSERT(arg & SLJIT_MEM);
+    if (!(next_arg & SLJIT_MEM)) {
+        next_arg = 0;
+        next_argw = 0;
+    }
+
+    base = arg & 0xf;
+    if (SLJIT_UNLIKELY(arg & 0xf0)) {
+        argw &= 0x3;
+        SLJIT_ASSERT(argw != 0);
+
+        /* Using the cache. */
+        if (((SLJIT_MEM | (arg & 0xf0)) == compiler->cache_arg) && (argw == compiler->cache_argw))
+            arg2 = TMP_REG3;
+        else {
+            if ((arg & 0xf0) == (next_arg & 0xf0) && argw == (next_argw & 0x3)) {
+                compiler->cache_arg = SLJIT_MEM | (arg & 0xf0);
+                compiler->cache_argw = argw;
+                arg2 = TMP_REG3;
+            }
+            else if ((flags & LOAD_DATA) && ((flags & MEM_MASK) <= GPR_REG) && reg != base && (reg << 4) != (arg & 0xf0))
+                arg2 = reg;
+            else /* It must be a mov operation, so tmp1 must be free to use. */
+                arg2 = TMP_REG1;
+            FAIL_IF(push_inst(compiler, SLL_W | D(arg2) | S1((arg >> 4) & 0xf) | IMM_ARG | argw, DR(arg2)));
+        }
+    }
+    else {
+        /* Using the cache. */
+        if ((compiler->cache_arg == SLJIT_MEM) && (argw - compiler->cache_argw) <= SIMM_MAX && (argw - compiler->cache_argw) >= SIMM_MIN) {
+            if (argw != compiler->cache_argw) {
+                FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | S1(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3)));
+                compiler->cache_argw = argw;
+            }
+            arg2 = TMP_REG3;
+        } else {
+            if ((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN) {
+                compiler->cache_arg = SLJIT_MEM;
+                compiler->cache_argw = argw;
+                arg2 = TMP_REG3;
+            }
+            else if ((flags & LOAD_DATA) && ((flags & MEM_MASK) <= GPR_REG) && reg != base)
+                arg2 = reg;
+            else /* It must be a mov operation, so tmp1 must be free to use. */
+                arg2 = TMP_REG1;
+            FAIL_IF(load_immediate(compiler, arg2, argw));
+        }
+    }
+
+    dest = ((flags & MEM_MASK) <= GPR_REG ? D(reg) : DA(reg));
+    delay_slot = ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS;
+    if (!base)
+        return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(arg2) | IMM(0), delay_slot);
+    if (!(flags & WRITE_BACK))
+        return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot);
+    FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot));
+    return push_inst(compiler, ADD | D(base) | S1(base) | S2(arg2), DR(base));
+}
+
+static SLJIT_INLINE int emit_op_mem(struct sljit_compiler *compiler, int flags, int reg, int arg, sljit_w argw)
+{
+    if (getput_arg_fast(compiler, flags, reg, arg, argw))
+        return compiler->error;
+    compiler->cache_arg = 0;
+    compiler->cache_argw = 0;
+    return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
+}
+
+static SLJIT_INLINE int emit_op_mem2(struct sljit_compiler *compiler, int flags, int reg, int arg1, sljit_w arg1w, int arg2, sljit_w arg2w)
+{
+    if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
+        return compiler->error;
+    return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
+}
+
+static int emit_op(struct sljit_compiler *compiler, int op, int flags,
+    int dst, sljit_w dstw,
+    int src1, sljit_w src1w,
+    int src2, sljit_w src2w)
+{
+    /* arg1 goes to TMP_REG1 or src reg
+       arg2 goes to TMP_REG2, imm or src reg
+       TMP_REG3 can be used for caching
+       result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
+    int dst_r = TMP_REG2;
+    int src1_r;
+    sljit_w src2_r = 0;
+    int sugg_src2_r = TMP_REG2;
+
+    compiler->cache_arg = 0;
+    compiler->cache_argw = 0;
+
+    if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REG3) {
+        dst_r = dst;
+        flags |= REG_DEST;
+        if (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI)
+            sugg_src2_r = dst_r;
+    }
+    else if (dst == SLJIT_UNUSED) {
+        if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM))
+            return SLJIT_SUCCESS;
+    }
+    else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw))
+        flags |= SLOW_DEST;
+
+    if (flags & IMM_OP) {
+        if ((src2 & SLJIT_IMM) && src2w) {
+            if (src2w <= SIMM_MAX && src2w >= SIMM_MIN) {
+                flags |= SRC2_IMM;
+                src2_r = src2w;
+            }
+        }
+        if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) {
+            if (src1w <= SIMM_MAX && src1w >= SIMM_MIN) {
+                flags |= SRC2_IMM;
+                src2_r = src1w;
+
+                /* And swap arguments. */
+                src1 = src2;
+                src1w = src2w;
+                src2 = SLJIT_IMM;
+                /* src2w = src2_r unneeded. */
+            }
+        }
+    }
+
+    /* Source 1. */
+    if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= TMP_REG3)
+        src1_r = src1;
+    else if (src1 & SLJIT_IMM) {
+        if (src1w) {
+            FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
+            src1_r = TMP_REG1;
+        }
+        else
+            src1_r = 0;
+    }
+    else {
+        if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w))
+            FAIL_IF(compiler->error);
+        else
+            flags |= SLOW_SRC1;
+        src1_r = TMP_REG1;
+    }
+
+    /* Source 2. */
+    if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REG3) {
+        src2_r = src2;
+        flags |= REG2_SOURCE;
+        if (!(flags & REG_DEST) && GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI)
+            dst_r = src2_r;
+    }
+    else if (src2 & SLJIT_IMM) {
+        if (!(flags & SRC2_IMM)) {
+            if (src2w || (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI)) {
+                FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
+                src2_r = sugg_src2_r;
+            }
+            else
+                src2_r = 0;
+        }
+    }
+    else {
+        if (getput_arg_fast(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w))
+            FAIL_IF(compiler->error);
+        else
+            flags |= SLOW_SRC2;
+        src2_r = sugg_src2_r;
+    }
+
+    if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
+        SLJIT_ASSERT(src2_r == TMP_REG2);
+        if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+            FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
+            FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
+        }
+        else {
+            FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
+            FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
+        }
+    }
+    else if (flags & SLOW_SRC1)
+        FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
+    else if (flags & SLOW_SRC2)
+        FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
+
+    FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
+
+    if (dst & SLJIT_MEM) {
+        if (!(flags & SLOW_DEST)) {
+            getput_arg_fast(compiler, flags, dst_r, dst, dstw);
+            return compiler->error;
+        }
+        return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0);
+    }
+
+    return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
+{
+    CHECK_ERROR();
+    check_sljit_emit_op0(compiler, op);
+
+    op = GET_OPCODE(op);
+    switch (op) {
+    case SLJIT_BREAKPOINT:
+        return push_inst(compiler, TA, UNMOVABLE_INS);
+    case SLJIT_NOP:
+        return push_inst(compiler, NOP, UNMOVABLE_INS);
+    case SLJIT_UMUL:
+    case SLJIT_SMUL:
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+        FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? UMUL : SMUL) | D(SLJIT_TEMPORARY_REG1) | S1(SLJIT_TEMPORARY_REG1) | S2(SLJIT_TEMPORARY_REG2), DR(SLJIT_TEMPORARY_REG1)));
+        return push_inst(compiler, RDY | D(SLJIT_TEMPORARY_REG2), DR(SLJIT_TEMPORARY_REG2));
+#else
+#error "Implementation required"
+#endif
+    case SLJIT_UDIV:
+    case SLJIT_SDIV:
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+        if (op == SLJIT_UDIV)
+            FAIL_IF(push_inst(compiler, WRY | S1(0), MOVABLE_INS));
+        else {
+            FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(SLJIT_TEMPORARY_REG1) | IMM(31), DR(TMP_REG1)));
+            FAIL_IF(push_inst(compiler, WRY | S1(TMP_REG1), MOVABLE_INS));
+        }
+        FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_TEMPORARY_REG1), DR(TMP_REG2)));
+        FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? UDIV : SDIV) | D(SLJIT_TEMPORARY_REG1) | S1(SLJIT_TEMPORARY_REG1) | S2(SLJIT_TEMPORARY_REG2), DR(SLJIT_TEMPORARY_REG1)));
+        FAIL_IF(push_inst(compiler, SMUL | D(SLJIT_TEMPORARY_REG2) | S1(SLJIT_TEMPORARY_REG1) | S2(SLJIT_TEMPORARY_REG2), DR(SLJIT_TEMPORARY_REG2)));
+        FAIL_IF(push_inst(compiler, SUB | D(SLJIT_TEMPORARY_REG2) | S1(TMP_REG2) | S2(SLJIT_TEMPORARY_REG2), DR(SLJIT_TEMPORARY_REG2)));
+        return SLJIT_SUCCESS;
+#else
+#error "Implementation required"
+#endif
+    }
+
+    return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op,
+    int dst, sljit_w dstw,
+    int src, sljit_w srcw)
+{
+    int flags = GET_FLAGS(op) ? SET_FLAGS : 0;
+
+    CHECK_ERROR();
+    check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
+    ADJUST_LOCAL_OFFSET(dst, dstw);
+    ADJUST_LOCAL_OFFSET(src, srcw);
+
+    SLJIT_COMPILE_ASSERT(SLJIT_MOV + 7 == SLJIT_MOVU, movu_offset);
+
+    op = GET_OPCODE(op);
+    switch (op) {
+    case SLJIT_MOV:
+        return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+
+    case SLJIT_MOV_UI:
+        return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+
+    case SLJIT_MOV_SI:
+        return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+
+    case SLJIT_MOV_UB:
+        return emit_op(compiler, SLJIT_MOV_UB, flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw);
+
+    case SLJIT_MOV_SB:
+        return emit_op(compiler, SLJIT_MOV_SB, flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw);
+
+    case SLJIT_MOV_UH:
+        return emit_op(compiler, SLJIT_MOV_UH, flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw);
+
+    case SLJIT_MOV_SH:
+        return emit_op(compiler, SLJIT_MOV_SH, flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw);
+
+    case SLJIT_MOVU:
+        return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+
+    case SLJIT_MOVU_UI:
+        return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+
+    case SLJIT_MOVU_SI:
+        return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
+
+    case SLJIT_MOVU_UB:
+        return emit_op(compiler, SLJIT_MOV_UB, flags | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw);
+
+    case SLJIT_MOVU_SB:
+        return emit_op(compiler, SLJIT_MOV_SB, flags | BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw);
+
+    case SLJIT_MOVU_UH:
+        return emit_op(compiler, SLJIT_MOV_UH, flags | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw);
+
+    case SLJIT_MOVU_SH:
+        return emit_op(compiler, SLJIT_MOV_SH, flags | HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw);
+
+    case SLJIT_NOT:
+    case SLJIT_CLZ:
+        return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
+
+    case SLJIT_NEG:
+        return emit_op(compiler, SLJIT_SUB, flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw);
+    }
+
+    return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op,
+    int dst, sljit_w dstw,
+    int src1, sljit_w src1w,
+    int src2, sljit_w src2w)
+{
+    int flags = GET_FLAGS(op) ? SET_FLAGS : 0;
+
+    CHECK_ERROR();
+    check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+    ADJUST_LOCAL_OFFSET(dst, dstw);
+    ADJUST_LOCAL_OFFSET(src1, src1w);
+    ADJUST_LOCAL_OFFSET(src2, src2w);
+
+    op = GET_OPCODE(op);
+    switch (op) {
+    case SLJIT_ADD:
+    case SLJIT_ADDC:
+    case SLJIT_MUL:
+    case SLJIT_AND:
+    case SLJIT_OR:
+    case SLJIT_XOR:
+        return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+
+    case SLJIT_SUB:
+    case SLJIT_SUBC:
+        return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+
+    case SLJIT_SHL:
+    case SLJIT_LSHR:
+    case SLJIT_ASHR:
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+        if (src2 & SLJIT_IMM)
+            src2w &= 0x1f;
+#else
+        SLJIT_ASSERT_STOP();
+#endif
+        return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+    }
+
+    return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg)
+{
+    check_sljit_get_register_index(reg);
+    return reg_map[reg];
+}
+
+SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler,
+    void *instruction, int size)
+{
+    CHECK_ERROR();
+    check_sljit_emit_op_custom(compiler, instruction, size);
+    SLJIT_ASSERT(size == 4);
+
+    return push_inst(compiler, *(sljit_ins*)instruction, UNMOVABLE_INS);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Floating point operators                                             */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void)
+{
+    return 1;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
+    int dst, sljit_w dstw,
+    int src, sljit_w srcw)
+{
+    int dst_fr;
+
+    CHECK_ERROR();
+    check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
+
+    compiler->cache_arg = 0;
+    compiler->cache_argw = 0;
+
+    if (GET_OPCODE(op) == SLJIT_FCMP) {
+        if (dst > SLJIT_FLOAT_REG4) {
+            FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw));
+            dst = TMP_FREG1;
+        }
+        else
+            dst <<= 1;
+
+        if (src > SLJIT_FLOAT_REG4) {
+            FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0));
+            src = TMP_FREG2;
+        }
+        else
+            src <<= 1;
+
+        return push_inst(compiler, FCMPD | S1A(dst) | S2A(src), FCC_IS_SET | MOVABLE_INS);
+    }
+
+    dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : (dst << 1);
+
+    if (src > SLJIT_FLOAT_REG4) {
+        FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, dst_fr, src, srcw, dst, dstw));
+        src = dst_fr;
+    }
+    else
+        src <<= 1;
+
+    switch (op) {
+        case SLJIT_FMOV:
+            if (src != dst_fr && dst_fr != TMP_FREG1) {
+                FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr) | S2A(src), MOVABLE_INS));
+                FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS));
+            }
+            break;
+        case SLJIT_FNEG:
+            FAIL_IF(push_inst(compiler, FNEGS | DA(dst_fr) | S2A(src), MOVABLE_INS));
+            if (dst_fr != src)
+                FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS));
+            break;
+        case SLJIT_FABS:
+            FAIL_IF(push_inst(compiler, FABSS | DA(dst_fr) | S2A(src), MOVABLE_INS));
+            if (dst_fr != src)
+                FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS));
+            break;
+    }
+
+    if (dst_fr == TMP_FREG1) {
+        if (op == SLJIT_FMOV)
+            dst_fr = src;
+        FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, dst_fr, dst, dstw, 0, 0));
+    }
+
+    return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
+    int dst, sljit_w dstw,
+    int src1, sljit_w src1w,
+    int src2, sljit_w src2w)
+{
+    int dst_fr, flags = 0;
+
+    CHECK_ERROR();
+    check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+
+    compiler->cache_arg = 0;
+    compiler->cache_argw = 0;
+
+    dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG2 : (dst << 1);
+
+    if (src1 > SLJIT_FLOAT_REG4) {
+        if (getput_arg_fast(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w)) {
+            FAIL_IF(compiler->error);
+            src1 = TMP_FREG1;
+        } else
+            flags |= SLOW_SRC1;
+    }
+    else
+        src1 <<= 1;
+
+    if (src2 > SLJIT_FLOAT_REG4) {
+        if (getput_arg_fast(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w)) {
+            FAIL_IF(compiler->error);
+            src2 = TMP_FREG2;
+        } else
+            flags |= SLOW_SRC2;
+    }
+    else
+        src2 <<= 1;
+
+    if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
+        if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+            FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
+            FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
+        }
+        else {
+            FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+            FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
+        }
+    }
+    else if (flags & SLOW_SRC1)
+        FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
+    else if (flags & SLOW_SRC2)
+        FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
+
+    if (flags & SLOW_SRC1)
+        src1 = TMP_FREG1;
+    if (flags & SLOW_SRC2)
+        src2 = TMP_FREG2;
+
+    switch (op) {
+    case SLJIT_FADD:
+        FAIL_IF(push_inst(compiler, FADDD | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS));
+        break;
+
+    case SLJIT_FSUB:
+        FAIL_IF(push_inst(compiler, FSUBD | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS));
+        break;
+
+    case SLJIT_FMUL:
+        FAIL_IF(push_inst(compiler, FMULD | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS));
+        break;
+
+    case SLJIT_FDIV:
+        FAIL_IF(push_inst(compiler, FDIVD | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS));
+        break;
+    }
+
+    if (dst_fr == TMP_FREG2)
+        FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG2, dst, dstw, 0, 0));
+
+    return SLJIT_SUCCESS;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Other instructions                                                   */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw)
+{
+    CHECK_ERROR();
+    check_sljit_emit_fast_enter(compiler, dst, dstw);
+    ADJUST_LOCAL_OFFSET(dst, dstw);
+
+    if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS)
+        return push_inst(compiler, OR | D(dst) | S1(0) | S2(LINK_REG), DR(dst));
+    else if (dst & SLJIT_MEM)
+        return emit_op_mem(compiler, WORD_DATA, LINK_REG, dst, dstw);
+
+    /* SLJIT_UNUSED is also possible, although highly unlikely. */
+    return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_return(struct sljit_compiler *compiler, int src, sljit_w srcw)
+{
+    CHECK_ERROR();
+    check_sljit_emit_fast_return(compiler, src, srcw);
+    ADJUST_LOCAL_OFFSET(src, srcw);
+
+    if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS)
+        FAIL_IF(push_inst(compiler, OR | D(LINK_REG) | S1(0) | S2(src), DR(LINK_REG)));
+    else if (src & SLJIT_MEM)
+        FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, LINK_REG, src, srcw));
+    else if (src & SLJIT_IMM)
+        FAIL_IF(load_immediate(compiler, LINK_REG, srcw));
+
+    FAIL_IF(push_inst(compiler, JMPL | D(0) | S1(LINK_REG) | IMM(8), UNMOVABLE_INS));
+    return push_inst(compiler, NOP, UNMOVABLE_INS);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Conditional instructions                                             */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
+{
+    struct sljit_label *label;
+
+    CHECK_ERROR_PTR();
+    check_sljit_emit_label(compiler);
+
+    if (compiler->last_label && compiler->last_label->size == compiler->size)
+        return compiler->last_label;
+
+    label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
+    PTR_FAIL_IF(!label);
+    set_label(label, compiler);
+    compiler->delay_slot = UNMOVABLE_INS;
+    return label;
+}
+
+static sljit_ins get_cc(int type)
+{
+    switch (type) {
+    case SLJIT_C_EQUAL:
+    case SLJIT_C_MUL_NOT_OVERFLOW:
+        return DA(0x1);
+
+    case SLJIT_C_NOT_EQUAL:
+    case SLJIT_C_MUL_OVERFLOW:
+        return DA(0x9);
+
+    case SLJIT_C_LESS:
+        return DA(0x5);
+
+    case SLJIT_C_GREATER_EQUAL:
+        return DA(0xd);
+
+    case SLJIT_C_GREATER:
+        return DA(0xc);
+
+    case SLJIT_C_LESS_EQUAL:
+        return DA(0x4);
+
+    case SLJIT_C_SIG_LESS:
+        return DA(0x3);
+
+    case SLJIT_C_SIG_GREATER_EQUAL:
+        return DA(0xb);
+
+    case SLJIT_C_SIG_GREATER:
+        return DA(0xa);
+
+    case SLJIT_C_SIG_LESS_EQUAL:
+        return DA(0x2);
+
+    case SLJIT_C_OVERFLOW:
+        return DA(0x7);
+
+    case SLJIT_C_NOT_OVERFLOW:
+        return DA(0xf);
+
+    case SLJIT_C_FLOAT_EQUAL:
+        return DA(0x9);
+
+    case SLJIT_C_FLOAT_NOT_EQUAL: /* Unordered. */
+        return DA(0x1);
+
+    case SLJIT_C_FLOAT_LESS:
+        return DA(0x4);
+
+    case SLJIT_C_FLOAT_GREATER_EQUAL: /* Unordered. */
+        return DA(0xc);
+
+    case SLJIT_C_FLOAT_LESS_EQUAL:
+        return DA(0xd);
+
+    case SLJIT_C_FLOAT_GREATER: /* Unordered. */
+        return DA(0x5);
+
+    case SLJIT_C_FLOAT_UNORDERED:
+        return DA(0x7);
+
+    case SLJIT_C_FLOAT_ORDERED:
+        return DA(0xf);
+
+    default:
+        SLJIT_ASSERT_STOP();
+        return DA(0x8);
+    }
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type)
+{
+    struct sljit_jump *jump;
+
+    CHECK_ERROR_PTR();
+    check_sljit_emit_jump(compiler, type);
+
+    jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+    PTR_FAIL_IF(!jump);
+    set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+    type &= 0xff;
+
+    if (type < SLJIT_C_FLOAT_EQUAL) {
+        jump->flags |= IS_COND;
+        if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & ICC_IS_SET))
+            jump->flags |= IS_MOVABLE;
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+        PTR_FAIL_IF(push_inst(compiler, BICC | get_cc(type ^ 1) | 5, UNMOVABLE_INS));
+#else
+#error "Implementation required"
+#endif
+    }
+    else if (type < SLJIT_JUMP) {
+        jump->flags |= IS_COND;
+        if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & FCC_IS_SET))
+            jump->flags |= IS_MOVABLE;
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+        PTR_FAIL_IF(push_inst(compiler, FBFCC | get_cc(type ^ 1) | 5, UNMOVABLE_INS));
+#else
+#error "Implementation required"
+#endif
+    } else {
+        if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS)
+            jump->flags |= IS_MOVABLE;
+        if (type >= SLJIT_FAST_CALL)
+            jump->flags |= IS_CALL;
+    }
+
+    PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0));
+    PTR_FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? LINK_REG : 0) | S1(TMP_REG2) | IMM(0), UNMOVABLE_INS));
+    jump->addr = compiler->size;
+    PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+
+    return jump;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw)
+{
+    struct sljit_jump *jump = NULL;
+    int src_r;
+
+    CHECK_ERROR();
+    check_sljit_emit_ijump(compiler, type, src, srcw);
+    ADJUST_LOCAL_OFFSET(src, srcw);
+
+    if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS)
+        src_r = src;
+    else if (src & SLJIT_IMM) {
+        jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+        FAIL_IF(!jump);
+        set_jump(jump, compiler, JUMP_ADDR);
+        jump->u.target = srcw;
+        if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS)
+            jump->flags |= IS_MOVABLE;
+        if (type >= SLJIT_FAST_CALL)
+            jump->flags |= IS_CALL;
+
+        FAIL_IF(emit_const(compiler, TMP_REG2, 0));
+        src_r = TMP_REG2;
+    }
+    else {
+        FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw));
+        src_r = TMP_REG2;
+    }
+
+    FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? LINK_REG : 0) | S1(src_r) | IMM(0), UNMOVABLE_INS));
+    if (jump)
+        jump->addr = compiler->size;
+    return push_inst(compiler, NOP, UNMOVABLE_INS);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type)
+{
+    int reg;
+
+    CHECK_ERROR();
+    check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
+    ADJUST_LOCAL_OFFSET(dst, dstw);
+
+    if (dst == SLJIT_UNUSED)
+        return SLJIT_SUCCESS;
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+    reg = (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG2;
+
+    if (type < SLJIT_C_FLOAT_EQUAL)
+        FAIL_IF(push_inst(compiler, BICC | get_cc(type) | 3, UNMOVABLE_INS));
+    else
+        FAIL_IF(push_inst(compiler, FBFCC | get_cc(type) | 3, UNMOVABLE_INS));
+
+    FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(1), UNMOVABLE_INS));
+    FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(0), UNMOVABLE_INS));
+
+    if (GET_OPCODE(op) == SLJIT_OR)
+        return emit_op(compiler, SLJIT_OR, (GET_FLAGS(op) ? SET_FLAGS : 0) | CUMULATIVE_OP | IMM_OP, dst, dstw, dst, dstw, TMP_REG2, 0);
+
+    return (reg == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS;
+#else
+#error "Implementation required"
+#endif
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value)
+{
+    int reg;
+    struct sljit_const *const_;
+
+    CHECK_ERROR_PTR();
+    check_sljit_emit_const(compiler, dst, dstw, init_value);
+    ADJUST_LOCAL_OFFSET(dst, dstw);
+
+    const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
+    PTR_FAIL_IF(!const_);
+    set_const(const_, compiler);
+
+    reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG2;
+
+    PTR_FAIL_IF(emit_const(compiler, reg, init_value));
+
+    if (dst & SLJIT_MEM)
+        PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
+
+    return const_;
+}


Modified: code/trunk/sljit/sljitNativeX86_32.c
===================================================================
--- code/trunk/sljit/sljitNativeX86_32.c    2012-10-20 20:52:52 UTC (rev 1148)
+++ code/trunk/sljit/sljitNativeX86_32.c    2012-10-20 21:33:38 UTC (rev 1149)
@@ -194,7 +194,6 @@
     CHECK_ERROR();
     check_sljit_emit_return(compiler, op, src, srcw);
     SLJIT_ASSERT(compiler->args >= 0);
-    ADJUST_LOCAL_OFFSET(src, srcw);


     compiler->flags_saved = 0;
     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));


Modified: code/trunk/sljit/sljitNativeX86_64.c
===================================================================
--- code/trunk/sljit/sljitNativeX86_64.c    2012-10-20 20:52:52 UTC (rev 1148)
+++ code/trunk/sljit/sljitNativeX86_64.c    2012-10-20 21:33:38 UTC (rev 1149)
@@ -273,7 +273,6 @@


     CHECK_ERROR();
     check_sljit_emit_return(compiler, op, src, srcw);
-    ADJUST_LOCAL_OFFSET(src, srcw);


     compiler->flags_saved = 0;
     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));


Modified: code/trunk/sljit/sljitNativeX86_common.c
===================================================================
--- code/trunk/sljit/sljitNativeX86_common.c    2012-10-20 20:52:52 UTC (rev 1148)
+++ code/trunk/sljit/sljitNativeX86_common.c    2012-10-20 21:33:38 UTC (rev 1149)
@@ -24,7 +24,7 @@
  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */


-SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name()
+SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
 {
     return "x86" SLJIT_CPUINFO;
 }
@@ -67,7 +67,7 @@
 #define TMP_REGISTER    (SLJIT_NO_REGISTERS + 1)


 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = {
-  0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
+    0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
 };


 #define CHECK_EXTRA_REGS(p, w, do) \
@@ -95,20 +95,20 @@
 #ifndef _WIN64
 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
-  0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
+    0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
 };
 /* low-map. reg_map & 0x7. */
 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
-  0, 0, 6, 1, 0, 3,  3, 7,  6,  5,  4,  4, 2, 7, 1
+    0, 0, 6, 1, 0, 3,  3, 7,  6,  5,  4,  4, 2, 7, 1
 };
 #else
 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
-  0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9
+    0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9
 };
 /* low-map. reg_map & 0x7. */
 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
-  0, 0, 2, 1, 3,  5,  3, 6, 7,  6,  7, 4, 2,  0, 1
+    0, 0, 2, 1, 3,  5,  3, 6, 7,  6,  7, 4, 2,  0, 1
 };
 #endif


@@ -203,10 +203,10 @@
     case SLJIT_C_MUL_NOT_OVERFLOW:
         return 0x81;


-    case SLJIT_C_FLOAT_NAN:
+    case SLJIT_C_FLOAT_UNORDERED:
         return 0x8a;


-    case SLJIT_C_FLOAT_NOT_NAN:
+    case SLJIT_C_FLOAT_ORDERED:
         return 0x8b;
     }
     return 0;
@@ -2021,7 +2021,7 @@
 static sljit_i sse2_data[3 + 4 + 4];
 static sljit_i *sse2_buffer;


-static void init_compiler()
+static void init_compiler(void)
 {
     sse2_buffer = (sljit_i*)(((sljit_uw)sse2_data + 15) & ~0xf);
     sse2_buffer[0] = 0;
@@ -2477,11 +2477,11 @@
         cond_set = 0x91;
         break;


-    case SLJIT_C_FLOAT_NAN:
+    case SLJIT_C_FLOAT_UNORDERED:
         cond_set = 0x9a;
         break;


-    case SLJIT_C_FLOAT_NOT_NAN:
+    case SLJIT_C_FLOAT_ORDERED:
         cond_set = 0x9b;
         break;
     }