[Pcre-svn] [714] code/trunk/sljit: JIT compiler update: MIPS…

Startseite
Nachricht löschen
Autor: Subversion repository
Datum:  
To: pcre-svn
Betreff: [Pcre-svn] [714] code/trunk/sljit: JIT compiler update: MIPS III support
Revision: 714
          http://vcs.pcre.org/viewvc?view=rev&revision=714
Author:   zherczeg
Date:     2011-09-28 18:40:47 +0100 (Wed, 28 Sep 2011)


Log Message:
-----------
JIT compiler update: MIPS III support

Modified Paths:
--------------
    code/trunk/sljit/sljitLir.c
    code/trunk/sljit/sljitNativeMIPS_32.c
    code/trunk/sljit/sljitNativeMIPS_common.c


Modified: code/trunk/sljit/sljitLir.c
===================================================================
--- code/trunk/sljit/sljitLir.c    2011-09-27 11:03:15 UTC (rev 713)
+++ code/trunk/sljit/sljitLir.c    2011-09-28 17:40:47 UTC (rev 714)
@@ -161,10 +161,9 @@
     /* instruction types */
     #define UNMOVABLE_INS    0
     /* 1 - 31 last destination register */
-    /* 32 - 39 FCSR FCC bits */
     #define FCSR_FCC    32
     /* no destination (i.e: store) */
-    #define MOVABLE_INS    40
+    #define MOVABLE_INS    33
 #endif


#endif /* !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) */

Modified: code/trunk/sljit/sljitNativeMIPS_32.c
===================================================================
--- code/trunk/sljit/sljitNativeMIPS_32.c    2011-09-27 11:03:15 UTC (rev 713)
+++ code/trunk/sljit/sljitNativeMIPS_32.c    2011-09-28 17:40:47 UTC (rev 714)
@@ -256,8 +256,14 @@


     case SLJIT_MUL:
         SLJIT_ASSERT(!(flags & SRC2_IMM));
-        if (!(op & SLJIT_SET_O))
+        if (!(op & SLJIT_SET_O)) {
+#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
             return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
+#else
+            FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
+            return push_inst(compiler, MFLO | D(dst), DR(dst));
+#endif
+        }
         FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
         FAIL_IF(push_inst(compiler, MFHI | DA(TMP_EREG1), TMP_EREG1));
         FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
@@ -300,8 +306,14 @@
     case SLJIT_MOV_SB:
         SLJIT_ASSERT(src1 == TMP_REG1);
         if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
-            if (op == SLJIT_MOV_SB)
+            if (op == SLJIT_MOV_SB) {
+#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
                 return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst));
+#else
+                FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
+                return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst));
+#endif
+            }
             return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
         }
         else if (dst != src2)
@@ -312,8 +324,14 @@
     case SLJIT_MOV_SH:
         SLJIT_ASSERT(src1 == TMP_REG1);
         if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
-            if (op == SLJIT_MOV_SH)
+            if (op == SLJIT_MOV_SH) {
+#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
                 return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst));
+#else
+                FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
+                return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst));
+#endif
+            }
             return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
         }
         else if (dst != src2)
@@ -330,10 +348,31 @@


     case SLJIT_CLZ:
         SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
         if (op & SLJIT_SET_E)
             FAIL_IF(push_inst(compiler, CLZ | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
         if (CHECK_FLAGS(SLJIT_SET_E))
             FAIL_IF(push_inst(compiler, CLZ | S(src2) | T(dst) | D(dst), DR(dst)));
+#else
+        if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) {
+            FAIL_IF(push_inst(compiler, SRL | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG));
+            return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG);
+        }
+        /* Nearly all instructions are unmovable in the following sequence. */
+        FAIL_IF(push_inst(compiler, ADDU_W | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
+        /* Check zero. */
+        FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(6), UNMOVABLE_INS));
+        FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM(32), UNMOVABLE_INS));
+        /* Check sign bit. */
+        FAIL_IF(push_inst(compiler, BLTZ | S(TMP_REG1) | IMM(4), UNMOVABLE_INS));
+        FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM(0), UNMOVABLE_INS));
+        /* Loop for searching the highest bit. */
+        FAIL_IF(push_inst(compiler, SLL | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), DR(TMP_REG1)));
+        FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
+        FAIL_IF(push_inst(compiler, ADDIU_W | S(dst) | T(dst) | IMM(1), UNMOVABLE_INS));
+        if (op & SLJIT_SET_E)
+            return push_inst(compiler, ADDU_W | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG);
+#endif
         return SLJIT_SUCCESS;
     }



Modified: code/trunk/sljit/sljitNativeMIPS_common.c
===================================================================
--- code/trunk/sljit/sljitNativeMIPS_common.c    2011-09-27 11:03:15 UTC (rev 713)
+++ code/trunk/sljit/sljitNativeMIPS_common.c    2011-09-28 17:40:47 UTC (rev 714)
@@ -33,6 +33,9 @@
 #endif
 }


+/* Latest MIPS architecture. */
+/* Detect SLJIT_MIPS_32_64 */
+
 /* Length of an instruction word
    Both for mips-32 and mips-64 */
 typedef sljit_ui sljit_ins;
@@ -42,24 +45,26 @@
 #define TMP_REG3    (SLJIT_NO_REGISTERS + 3)
 #define REAL_STACK_PTR    (SLJIT_NO_REGISTERS + 4)


+/* For position independent code, t9 must contain the function address. */
+#define PIC_ADDR_REG        TMP_REG2
+
 /* TMP_EREG1 is used mainly for literal encoding on 64 bit. */
-#define TMP_EREG1        24
-#define TMP_EREG2        25
+#define TMP_EREG1        15
+#define TMP_EREG2        24
+/* Floating point status register. */
+#define FCSR_REG        31
+/* Return address register. */
+#define RETURN_ADDR_REG        31


 /* Flags are keept in volatile registers. */
 #define EQUAL_FLAG    7
 /* And carry flag as well. */
-#define ULESS_FLAG    11
-#define UGREATER_FLAG    12
-#define LESS_FLAG    13
-#define GREATER_FLAG    14
-#define OVERFLOW_FLAG    15
+#define ULESS_FLAG    10
+#define UGREATER_FLAG    11
+#define LESS_FLAG    12
+#define GREATER_FLAG    13
+#define OVERFLOW_FLAG    14


-#define UNORD_BIT    1
-#define EQUAL_BIT    2
-#define LESS_BIT    3
-#define GREATER_BIT    4
-
 #define TMP_FREG1    (SLJIT_FLOAT_REG4 + 1)
 #define TMP_FREG2    (SLJIT_FLOAT_REG4 + 2)


@@ -92,7 +97,6 @@
 #define AND        (HI(0) | LO(36))
 #define ANDI        (HI(12))
 #define B        (HI(4))
-#define BAL        (HI(1) | (17 << 16))
 #define BC1F        (HI(17) | (8 << 21))
 #define BC1T        (HI(17) | (8 << 21) | (1 << 16))
 #define BEQ        (HI(4))
@@ -105,11 +109,8 @@
 #define C_UN_D        (HI(17) | FMT_D | LO(49))
 #define C_UEQ_D        (HI(17) | FMT_D | LO(51))
 #define C_ULT_D        (HI(17) | FMT_D | LO(53))
-#define CLZ        (HI(28) | LO(32))
 #define DIV_D        (HI(17) | FMT_D | LO(3))
-#define EXT        (HI(31) | LO(0))
 #define J        (HI(2))
-#define JAL        (HI(3))
 #define JALR        (HI(0) | LO(9))
 #define JR        (HI(0) | LO(8))
 #define LD        (HI(55))
@@ -123,7 +124,6 @@
 #define CFC1        (HI(17) | (2 << 21))
 #define MOVN        (HI(0) | LO(11))
 #define MOVZ        (HI(0) | LO(10))
-#define MUL        (HI(28) | LO(2))
 #define MUL_D        (HI(17) | FMT_D | LO(2))
 #define MULT        (HI(0) | LO(24))
 #define NOP        (HI(0) | LO(0))
@@ -132,8 +132,6 @@
 #define ORI        (HI(13))
 #define SD        (HI(63))
 #define SDC1        (HI(61))
-#define SEB        (HI(31) | (16 << 6) | LO(32))
-#define SEH        (HI(31) | (24 << 6) | LO(32))
 #define SLT        (HI(0) | LO(42))
 #define SLTI        (HI(10))
 #define SLTIU        (HI(11))
@@ -150,16 +148,21 @@
 #define XOR        (HI(0) | LO(38))
 #define XORI        (HI(14))


+#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
+#define CLZ        (HI(28) | LO(32))
+#define MUL        (HI(28) | LO(2))
+#define SEB        (HI(31) | (16 << 6) | LO(32))
+#define SEH        (HI(31) | (24 << 6) | LO(32))
+#endif
+
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
 #define ADDU_W        ADDU
 #define ADDIU_W        ADDIU
-#define EXT_W        EXT
 #define SLL_W        SLL
 #define SUBU_W        SUBU
 #else
 #define ADDU_W        DADDU
 #define ADDIU_W        DADDIU
-#define EXT_W        DEXT
 #define SLL_W        DSLL
 #define SUBU_W        DSUBU
 #endif
@@ -169,7 +172,7 @@
 #define UIMM_MAX    (0xffff)


static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = {
- 0, 2, 5, 6, 3, 4, 17, 18, 19, 20, 21, 16, 8, 9, 10, 29
+ 0, 2, 5, 6, 3, 8, 17, 18, 19, 20, 21, 16, 4, 25, 9, 29
};

 /* dest_reg is the absolute name of the register
@@ -196,7 +199,7 @@
     sljit_ins *inst;
     sljit_ins saved_inst;


-    if (jump->flags & SLJIT_REWRITABLE_JUMP)
+    if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_JAL))
         return code_ptr;


     if (jump->flags & JUMP_ADDR)
@@ -217,7 +220,7 @@


             if (!(jump->flags & IS_COND)) {
                 inst[0] = inst[-1];
-                inst[-1] = (jump->flags & IS_JAL) ? BAL : B;
+                inst[-1] = B;
                 jump->addr -= sizeof(sljit_ins);
                 return inst;
             }
@@ -234,7 +237,7 @@
         jump->flags |= PATCH_B;


         if (!(jump->flags & IS_COND)) {
-            inst[0] = (jump->flags & IS_JAL) ? BAL : B;
+            inst[0] = B;
             inst[1] = NOP;
             return inst + 1;
         }
@@ -262,7 +265,7 @@
         if ((target_addr & ~0xfffffff) == (jump->addr & ~0xfffffff)) {
             jump->flags |= PATCH_J;
             inst[0] = inst[-1];
-            inst[-1] = (jump->flags & IS_JAL) ? JAL : J;
+            inst[-1] = J;
             jump->addr -= sizeof(sljit_ins);
             return inst;
         }
@@ -270,7 +273,7 @@


     if ((target_addr & ~0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~0xfffffff)) {
         jump->flags |= PATCH_J;
-        inst[0] = (jump->flags & IS_JAL) ? JAL : J;
+        inst[0] = J;
         inst[1] = NOP;
         return inst + 1;
     }
@@ -477,7 +480,7 @@
         local_size = 0;
     }


-    FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(31) | IMM(local_size - 1 * (int)sizeof(sljit_w)), MOVABLE_INS));
+    FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(local_size - 1 * (int)sizeof(sljit_w)), MOVABLE_INS));
     if (compiler->has_locals)
         FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_LOCALS_REG) | IMM(local_size - 2 * (int)sizeof(sljit_w)), MOVABLE_INS));
     if (generals >= 1)
@@ -539,7 +542,7 @@
         local_size = 0;
     }


-    FAIL_IF(push_inst(compiler, STACK_LOAD | base | TA(31) | IMM(local_size - 1 * (int)sizeof(sljit_w)), 31));
+    FAIL_IF(push_inst(compiler, STACK_LOAD | base | TA(RETURN_ADDR_REG) | IMM(local_size - 1 * (int)sizeof(sljit_w)), RETURN_ADDR_REG));
     if (compiler->generals >= 5)
         FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_GENERAL_EREG2) | IMM(local_size - 7 * (int)sizeof(sljit_w)), DR(SLJIT_GENERAL_EREG2)));
     if (compiler->generals >= 4)
@@ -553,7 +556,7 @@
     if (compiler->has_locals)
         FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_LOCALS_REG) | IMM(local_size - 2 * (int)sizeof(sljit_w)), DR(SLJIT_LOCALS_REG)));


-    FAIL_IF(push_inst(compiler, JR | SA(31), UNMOVABLE_INS));
+    FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS));
     if (compiler->local_size <= SIMM_MAX)
         return push_inst(compiler, ADDIU_W | S(REAL_STACK_PTR) | T(REAL_STACK_PTR) | IMM(compiler->local_size), UNMOVABLE_INS);
     else
@@ -1054,13 +1057,16 @@


 int sljit_is_fpu_available(void)
 {
-#if 0
+#if (defined SLJIT_QEMU && SLJIT_QEMU)
+    /* Qemu says fir is 0 by default. */
+    return 1;
+#elif defined(__GNUC__)
     sljit_w fir;
     asm ("cfc1 %0, $0" : "=r"(fir));
     return (fir >> 22) & 0x1;
+#else
+#error "FIR check is not implemented for this architecture"
 #endif
-    /* Qemu says fir is 0 by default. */
-    return 1;
 }


 static int emit_fpu_data_transfer(struct sljit_compiler *compiler, int fpu_reg, int load, int arg, sljit_w argw)
@@ -1124,13 +1130,24 @@
         }


         /* src and dst are swapped. */
-        if (op & SLJIT_SET_E)
-            FAIL_IF(push_inst(compiler, C_UEQ_D | FT(src) | FS(dst) | (EQUAL_BIT << 8), FCSR_FCC + EQUAL_BIT));
+        if (op & SLJIT_SET_E) {
+            FAIL_IF(push_inst(compiler, C_UEQ_D | FT(src) | FS(dst), UNMOVABLE_INS));
+            FAIL_IF(push_inst(compiler, CFC1 | TA(EQUAL_FLAG) | DA(FCSR_REG), EQUAL_FLAG));
+            FAIL_IF(push_inst(compiler, SRL | TA(EQUAL_FLAG) | DA(EQUAL_FLAG) | SH_IMM(23), EQUAL_FLAG));
+            FAIL_IF(push_inst(compiler, ANDI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG));
+        }
         if (op & SLJIT_SET_S) {
-            FAIL_IF(push_inst(compiler, C_ULT_D | FT(src) | FS(dst) | (LESS_BIT << 8), FCSR_FCC + LESS_BIT));
-            FAIL_IF(push_inst(compiler, C_ULT_D | FT(dst) | FS(src) | (GREATER_BIT << 8), FCSR_FCC + GREATER_BIT));
+            /* Mixing the instructions for the two checks. */
+            FAIL_IF(push_inst(compiler, C_ULT_D | FT(src) | FS(dst), UNMOVABLE_INS));
+            FAIL_IF(push_inst(compiler, CFC1 | TA(ULESS_FLAG) | DA(FCSR_REG), ULESS_FLAG));
+            FAIL_IF(push_inst(compiler, C_ULT_D | FT(dst) | FS(src), UNMOVABLE_INS));
+            FAIL_IF(push_inst(compiler, SRL | TA(ULESS_FLAG) | DA(ULESS_FLAG) | SH_IMM(23), ULESS_FLAG));
+            FAIL_IF(push_inst(compiler, ANDI | SA(ULESS_FLAG) | TA(ULESS_FLAG) | IMM(1), ULESS_FLAG));
+            FAIL_IF(push_inst(compiler, CFC1 | TA(UGREATER_FLAG) | DA(FCSR_REG), UGREATER_FLAG));
+            FAIL_IF(push_inst(compiler, SRL | TA(UGREATER_FLAG) | DA(UGREATER_FLAG) | SH_IMM(23), UGREATER_FLAG));
+            FAIL_IF(push_inst(compiler, ANDI | SA(UGREATER_FLAG) | TA(UGREATER_FLAG) | IMM(1), UGREATER_FLAG));
         }
-        return push_inst(compiler, C_UN_D | FT(src) | FS(dst) | (UNORD_BIT << 8), FCSR_FCC + UNORD_BIT);
+        return push_inst(compiler, C_UN_D | FT(src) | FS(dst), FCSR_FCC);
     }


     dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst;
@@ -1225,9 +1242,9 @@
     compiler->local_size = (local_size + 15) & ~0xf;


     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS)
-        return push_inst(compiler, ADDU_W | SA(31) | TA(0) | D(dst), DR(dst));
+        return push_inst(compiler, ADDU_W | SA(RETURN_ADDR_REG) | TA(0) | D(dst), DR(dst));
     else if (dst & SLJIT_MEM)
-        return emit_op_mem(compiler, WORD_DATA, 31, dst, dstw);
+        return emit_op_mem(compiler, WORD_DATA, RETURN_ADDR_REG, dst, dstw);
     return SLJIT_SUCCESS;
 }


@@ -1237,13 +1254,13 @@
     check_sljit_emit_fast_return(compiler, src, srcw);


     if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS)
-        FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | DA(31), 31));
+        FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | DA(RETURN_ADDR_REG), RETURN_ADDR_REG));
     else if (src & SLJIT_MEM)
-        FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, 31, src, srcw));
+        FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw));
     else if (src & SLJIT_IMM)
-        FAIL_IF(load_immediate(compiler, 31, srcw));
+        FAIL_IF(load_immediate(compiler, RETURN_ADDR_REG, srcw));


-    FAIL_IF(push_inst(compiler, JR | SA(31), UNMOVABLE_INS));
+    FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS));
     return push_inst(compiler, NOP, UNMOVABLE_INS);
 }


@@ -1284,15 +1301,15 @@
     flags = IS_BIT26_COND; \
     delay_check = src;


-#define BR_T(bit) \
-    inst = BC1T | (bit << 18) | JUMP_LENGTH; \
+#define BR_T() \
+    inst = BC1T | JUMP_LENGTH; \
     flags = IS_BIT16_COND; \
-    delay_check = FCSR_FCC + bit;
+    delay_check = FCSR_FCC;


-#define BR_F(bit) \
-    inst = BC1F | (bit << 18) | JUMP_LENGTH; \
+#define BR_F() \
+    inst = BC1F | JUMP_LENGTH; \
     flags = IS_BIT16_COND; \
-    delay_check = FCSR_FCC + bit;
+    delay_check = FCSR_FCC;


struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type)
{
@@ -1311,21 +1328,27 @@

     switch (type) {
     case SLJIT_C_EQUAL:
+    case SLJIT_C_FLOAT_NOT_EQUAL:
         BR_NZ(EQUAL_FLAG);
         break;
     case SLJIT_C_NOT_EQUAL:
+    case SLJIT_C_FLOAT_EQUAL:
         BR_Z(EQUAL_FLAG);
         break;
     case SLJIT_C_LESS:
+    case SLJIT_C_FLOAT_LESS:
         BR_Z(ULESS_FLAG);
         break;
     case SLJIT_C_GREATER_EQUAL:
+    case SLJIT_C_FLOAT_GREATER_EQUAL:
         BR_NZ(ULESS_FLAG);
         break;
     case SLJIT_C_GREATER:
+    case SLJIT_C_FLOAT_GREATER:
         BR_Z(UGREATER_FLAG);
         break;
     case SLJIT_C_LESS_EQUAL:
+    case SLJIT_C_FLOAT_LESS_EQUAL:
         BR_NZ(UGREATER_FLAG);
         break;
     case SLJIT_C_SIG_LESS:
@@ -1348,29 +1371,11 @@
     case SLJIT_C_MUL_NOT_OVERFLOW:
         BR_NZ(OVERFLOW_FLAG);
         break;
-    case SLJIT_C_FLOAT_EQUAL:
-        BR_F(EQUAL_BIT);
-        break;
-    case SLJIT_C_FLOAT_NOT_EQUAL:
-        BR_T(EQUAL_BIT);
-        break;
-    case SLJIT_C_FLOAT_LESS:
-        BR_F(LESS_BIT);
-        break;
-    case SLJIT_C_FLOAT_GREATER_EQUAL:
-        BR_T(LESS_BIT);
-        break;
-    case SLJIT_C_FLOAT_GREATER:
-        BR_F(GREATER_BIT);
-        break;
-    case SLJIT_C_FLOAT_LESS_EQUAL:
-        BR_T(GREATER_BIT);
-        break;
     case SLJIT_C_FLOAT_NAN:
-        BR_F(UNORD_BIT);
+        BR_F();
         break;
     case SLJIT_C_FLOAT_NOT_NAN:
-        BR_T(UNORD_BIT);
+        BR_T();
         break;
     default:
         /* Not conditional branch. */
@@ -1385,18 +1390,19 @@
     if (inst)
         PTR_FAIL_IF(push_inst(compiler, inst, UNMOVABLE_INS));


-    if (type >= SLJIT_CALL1)
-        PTR_FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_TEMPORARY_REG1) | TA(0) | DA(4), 4));
-
     PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0));
-    if (type <= SLJIT_JUMP)
+    if (type <= SLJIT_JUMP) {
         PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS));
-    else {
+        jump->addr = compiler->size;
+        PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+    } else {
+        /* Cannot be optimized out. */
+        SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
         jump->flags |= IS_JAL;
-        PTR_FAIL_IF(push_inst(compiler, JALR | S(TMP_REG2) | DA(31), UNMOVABLE_INS));
+        PTR_FAIL_IF(push_inst(compiler, JALR | S(TMP_REG2) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
+        jump->addr = compiler->size;
+        PTR_FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_TEMPORARY_REG1) | TA(0) | DA(4), UNMOVABLE_INS));
     }
-    jump->addr = compiler->size;
-    PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
     return jump;
 }


@@ -1567,25 +1573,30 @@
     }


     if (type >= SLJIT_CALL0) {
-        if (src & SLJIT_IMM) {
-            FAIL_IF(load_immediate(compiler, 25, srcw));
-            FAIL_IF(push_inst(compiler, JALR | SA(25) | DA(31), UNMOVABLE_INS));
+        SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
+        if (src & (SLJIT_IMM | SLJIT_MEM)) {
+            if (src & SLJIT_IMM)
+                FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw));
+            else {
+                SLJIT_ASSERT(src_r == TMP_REG2 && (src & SLJIT_MEM));
+                FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
+            }
+            FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
             /* We need an extra instruction in any case. */
             return push_inst(compiler, ADDU_W | S(SLJIT_TEMPORARY_REG1) | TA(0) | DA(4), UNMOVABLE_INS);
         }
-        if (src & SLJIT_MEM)
-            FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));


+        /* Register input. */
         if (type >= SLJIT_CALL1)
             FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_TEMPORARY_REG1) | TA(0) | DA(4), 4));
-        FAIL_IF(push_inst(compiler, JALR | S(src_r) | DA(31), UNMOVABLE_INS));
-        return push_inst(compiler, ADDU_W | S(src_r) | TA(0) | DA(25), UNMOVABLE_INS);
+        FAIL_IF(push_inst(compiler, JALR | S(src_r) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
+        return push_inst(compiler, ADDU_W | S(src_r) | TA(0) | D(PIC_ADDR_REG), UNMOVABLE_INS);
     }


     if (src & SLJIT_IMM) {
         jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
         FAIL_IF(!jump);
-        set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_CALL0) ? IS_JAL : 0));
+        set_jump(jump, compiler, JUMP_ADDR);
         jump->u.target = srcw;


         if (compiler->delay_slot != UNMOVABLE_INS)
@@ -1623,10 +1634,14 @@
         break;
     case SLJIT_C_LESS:
     case SLJIT_C_GREATER_EQUAL:
+    case SLJIT_C_FLOAT_LESS:
+    case SLJIT_C_FLOAT_GREATER_EQUAL:
         dst_ar = ULESS_FLAG;
         break;
     case SLJIT_C_GREATER:
     case SLJIT_C_LESS_EQUAL:
+    case SLJIT_C_FLOAT_GREATER:
+    case SLJIT_C_FLOAT_LESS_EQUAL:
         dst_ar = UGREATER_FLAG;
         break;
     case SLJIT_C_SIG_LESS:
@@ -1647,29 +1662,21 @@
         dst_ar = sugg_dst_ar;
         type ^= 0x1; /* Flip type bit for the XORI below. */
         break;
+    case SLJIT_C_FLOAT_EQUAL:
+    case SLJIT_C_FLOAT_NOT_EQUAL:
+        dst_ar = EQUAL_FLAG;
+        break;
+
+    case SLJIT_C_FLOAT_NAN:
+    case SLJIT_C_FLOAT_NOT_NAN:
+        FAIL_IF(push_inst(compiler, CFC1 | TA(sugg_dst_ar) | DA(FCSR_REG), sugg_dst_ar));
+        FAIL_IF(push_inst(compiler, SRL | TA(sugg_dst_ar) | DA(sugg_dst_ar) | SH_IMM(23), sugg_dst_ar));
+        FAIL_IF(push_inst(compiler, ANDI | SA(sugg_dst_ar) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
+        dst_ar = sugg_dst_ar;
+        break;
+
     default:
-        if (type >= SLJIT_C_FLOAT_EQUAL && type <= SLJIT_C_FLOAT_NOT_NAN) {
-            FAIL_IF(push_inst(compiler, CFC1 | TA(sugg_dst_ar) | DA(31), sugg_dst_ar));
-            switch (type) {
-            case SLJIT_C_FLOAT_EQUAL:
-            case SLJIT_C_FLOAT_NOT_EQUAL:
-                dst_ar = EQUAL_BIT + 24;
-                break;
-            case SLJIT_C_FLOAT_LESS:
-            case SLJIT_C_FLOAT_GREATER_EQUAL:
-                dst_ar = LESS_BIT + 24;
-                break;
-            case SLJIT_C_FLOAT_GREATER:
-            case SLJIT_C_FLOAT_LESS_EQUAL:
-                dst_ar = GREATER_BIT + 24;
-                break;
-            case SLJIT_C_FLOAT_NAN:
-            case SLJIT_C_FLOAT_NOT_NAN:
-                dst_ar = UNORD_BIT + 24;
-                break;
-            }
-            FAIL_IF(push_inst(compiler, EXT_W | SA(sugg_dst_ar) | TA(sugg_dst_ar) | (dst_ar << 6), sugg_dst_ar));
-        }
+        SLJIT_ASSERT_STOP();
         dst_ar = sugg_dst_ar;
         break;
     }