[Pcre-svn] [1676] code/trunk: Major JIT compiler update.

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [1676] code/trunk: Major JIT compiler update.
Revision: 1676
          http://vcs.pcre.org/viewvc?view=rev&revision=1676
Author:   zherczeg
Date:     2017-01-23 07:41:37 +0000 (Mon, 23 Jan 2017)
Log Message:
-----------
Major JIT compiler update.


Modified Paths:
--------------
    code/trunk/pcre_jit_compile.c
    code/trunk/sljit/sljitConfig.h
    code/trunk/sljit/sljitConfigInternal.h
    code/trunk/sljit/sljitExecAllocator.c
    code/trunk/sljit/sljitLir.c
    code/trunk/sljit/sljitLir.h
    code/trunk/sljit/sljitNativeARM_32.c
    code/trunk/sljit/sljitNativeARM_64.c
    code/trunk/sljit/sljitNativeARM_T2_32.c
    code/trunk/sljit/sljitNativeMIPS_32.c
    code/trunk/sljit/sljitNativeMIPS_64.c
    code/trunk/sljit/sljitNativeMIPS_common.c
    code/trunk/sljit/sljitNativePPC_32.c
    code/trunk/sljit/sljitNativePPC_64.c
    code/trunk/sljit/sljitNativePPC_common.c
    code/trunk/sljit/sljitNativeSPARC_32.c
    code/trunk/sljit/sljitNativeSPARC_common.c
    code/trunk/sljit/sljitNativeTILEGX-encoder.c
    code/trunk/sljit/sljitNativeTILEGX_64.c
    code/trunk/sljit/sljitNativeX86_32.c
    code/trunk/sljit/sljitNativeX86_64.c
    code/trunk/sljit/sljitNativeX86_common.c
    code/trunk/sljit/sljitProtExecAllocator.c
    code/trunk/sljit/sljitUtils.c


Modified: code/trunk/pcre_jit_compile.c
===================================================================
--- code/trunk/pcre_jit_compile.c    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/pcre_jit_compile.c    2017-01-23 07:41:37 UTC (rev 1676)
@@ -784,7 +784,7 @@


   default:
   /* All opcodes are supported now! */
-  SLJIT_ASSERT_STOP();
+  SLJIT_UNREACHABLE();
   return NULL;
   }
 }
@@ -2089,7 +2089,7 @@
           break;


           default:
-          SLJIT_ASSERT_STOP();
+          SLJIT_UNREACHABLE();
           break;
           }
       cc += size;
@@ -2103,7 +2103,7 @@
     break;


     case end:
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     break;
     }


@@ -2312,7 +2312,7 @@
{
DEFINE_COMPILER;

-OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
+OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
}

@@ -2396,7 +2396,7 @@
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
loop = LABEL();
OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, loop);
}
}
@@ -2434,7 +2434,7 @@
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
loop = LABEL();
OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
- OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, loop);
}

@@ -2463,7 +2463,7 @@
     break;


     default:
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     break;
     }
   SLJIT_ASSERT(current > (sljit_sw*)current[-1]);
@@ -2501,7 +2501,7 @@
 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
 #endif
 OP1(SLJIT_MOVU_S32, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
-OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
+OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
 JUMPTO(SLJIT_NOT_ZERO, loop);
 JUMPHERE(early_quit);


@@ -3106,7 +3106,7 @@
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
/* Skip low surrogate if necessary. */
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
@@ -3167,7 +3167,7 @@
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);

/* Searching for the first zero. */
-OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
+OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
jump = JUMP(SLJIT_NOT_ZERO);
/* Two byte sequence. */
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
@@ -3181,7 +3181,7 @@
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);

-OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
+OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
jump = JUMP(SLJIT_NOT_ZERO);
/* Three byte sequence. */
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
@@ -3215,7 +3215,7 @@
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);

/* Searching for the first zero. */
-OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
+OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
jump = JUMP(SLJIT_NOT_ZERO);
/* Two byte sequence. */
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
@@ -3222,7 +3222,7 @@
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);

JUMPHERE(jump);
-OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
+OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
/* This code runs only in 8 bit mode. No need to shift the value. */
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
@@ -3246,7 +3246,7 @@

sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);

-OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
+OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
 jump = JUMP(SLJIT_NOT_ZERO);
 /* Two byte sequence. */
 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
@@ -3365,7 +3365,7 @@
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
   end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
-  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
+  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
@@ -3403,7 +3403,7 @@
   {
   singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
-  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
+  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
   OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
   OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
@@ -4158,7 +4158,7 @@
 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
   if (sljit_x86_is_cmov_available())
     {
-    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
+    OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
     sljit_x86_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
     }
 #endif
@@ -4215,7 +4215,7 @@
     }
   else if (sljit_x86_is_cmov_available())
     {
-    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
+    OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
     sljit_x86_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, has_match_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end ? common->match_end_ptr : 0);
     }
   else
@@ -4249,10 +4249,10 @@
     }
   else
     {
-    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1);
+    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1);
     OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
-    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2);
-    OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2);
+    OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
     found = JUMP(SLJIT_NOT_ZERO);
     }
   }
@@ -4571,7 +4571,7 @@
   firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);


OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
+ OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
@@ -4616,7 +4616,7 @@
JUMPHERE(foundcr);
notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
@@ -4670,7 +4670,7 @@
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
- OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+ OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
found = JUMP(SLJIT_NOT_ZERO);
}

@@ -4692,7 +4692,7 @@
{
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
@@ -4786,8 +4786,7 @@
/* Drop frames until we reach STACK_TOP. */
mainloop = LABEL();
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
-OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
-jump = JUMP(SLJIT_SIG_LESS_EQUAL);
+jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);

OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
@@ -4837,10 +4836,10 @@
jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
+ OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
+ OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
JUMPHERE(jump);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
@@ -4881,10 +4880,10 @@
jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
+ OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
+ OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
JUMPHERE(jump);
}
@@ -4913,7 +4912,7 @@
}
set_jumps(skipread_list, LABEL());

-OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
+OP2(SLJIT_XOR | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
}

@@ -5064,7 +5063,7 @@
return TRUE;

default:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return FALSE;
}
}
@@ -5077,9 +5076,9 @@
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);

OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
-OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
+OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
-OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
+OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
#ifdef COMPILE_PCRE8
if (common->utf)
@@ -5087,12 +5086,12 @@
#endif
OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
#ifdef COMPILE_PCRE8
}
#endif
#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
-OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
}

@@ -5103,11 +5102,11 @@

sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);

-OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
+OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
-OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
+OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
-OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
+OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
#ifdef COMPILE_PCRE8
if (common->utf)
@@ -5114,23 +5113,23 @@
{
#endif
OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
+ OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
#ifdef COMPILE_PCRE8
}
#endif
#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
-OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_EQUAL);

sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
}
@@ -5143,22 +5142,22 @@
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);

OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
-OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
+OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
-OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
+OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
#ifdef COMPILE_PCRE8
if (common->utf)
{
#endif
- OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
#ifdef COMPILE_PCRE8
}
#endif
#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
-OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_EQUAL);

sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
}
@@ -5183,7 +5182,7 @@
OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
-OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
+OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
JUMPTO(SLJIT_NOT_ZERO, label);

JUMPHERE(jump);
@@ -5227,7 +5226,7 @@
JUMPHERE(jump);
#endif
jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
-OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
+OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
JUMPTO(SLJIT_NOT_ZERO, label);

JUMPHERE(jump);
@@ -5394,7 +5393,7 @@
#endif

       default:
-      SLJIT_ASSERT_STOP();
+      SLJIT_UNREACHABLE();
       break;
       }
     context->ucharptr = 0;
@@ -5568,7 +5567,7 @@
       break;


       default:
-      SLJIT_ASSERT_STOP();
+      SLJIT_UNREACHABLE();
       break;
       }
     cc += 2;
@@ -5592,7 +5591,7 @@
       OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
       OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
       OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
-      OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+      OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
       add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
       }


@@ -5625,7 +5624,7 @@
     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
     OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
-    OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+    OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
     add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));


#ifdef COMPILE_PCRE8
@@ -5735,14 +5734,14 @@

     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
       {
-      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
       numberofcmps++;
       }
     else if (numberofcmps > 0)
       {
-      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
-      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
       numberofcmps = 0;
       }
@@ -5761,14 +5760,14 @@


     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
       {
-      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
       OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
       numberofcmps++;
       }
     else if (numberofcmps > 0)
       {
-      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
-      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
       numberofcmps = 0;
       }
@@ -5793,12 +5792,12 @@
       break;


       case PT_LAMP:
-      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
+      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
-      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
+      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
-      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
-      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
       break;


@@ -5820,33 +5819,33 @@
       case PT_SPACE:
       case PT_PXSPACE:
       SET_CHAR_OFFSET(9);
-      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
+      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);


-      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
-      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_EQUAL);


-      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
-      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_EQUAL);


       SET_TYPE_OFFSET(ucp_Zl);
-      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
-      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
       break;


       case PT_WORD:
-      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
+      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
       /* Fall through. */


       case PT_ALNUM:
       SET_TYPE_OFFSET(ucp_Ll);
-      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
+      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
       OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
       SET_TYPE_OFFSET(ucp_Nd);
-      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
-      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
       break;


@@ -5868,7 +5867,7 @@
           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
           }
-        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
+        OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
         other_cases += 2;
         }
@@ -5881,42 +5880,42 @@
           OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
           OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
           }
-        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
+        OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);


-        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
-        OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+        OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
+        OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);


         other_cases += 3;
         }
       else
         {
-        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
+        OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
         OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
         }


       while (*other_cases != NOTACHAR)
         {
-        OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
-        OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+        OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
+        OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
         }
       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
       break;


       case PT_UCNC:
-      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
+      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
-      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
+      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
-      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
+      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);


       SET_CHAR_OFFSET(0xa0);
-      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
+      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
       SET_CHAR_OFFSET(0);
-      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
-      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
+      OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
       break;


@@ -5923,7 +5922,7 @@
       case PT_PXGRAPH:
       /* C and Z groups are the farthest two groups. */
       SET_TYPE_OFFSET(ucp_Ll);
-      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
+      OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);


       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
@@ -5930,13 +5929,13 @@


       /* In case of ucp_Cf, we overwrite the result. */
       SET_CHAR_OFFSET(0x2066);
-      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
+      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);


-      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
+      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);


-      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
+      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);


       JUMPHERE(jump);
@@ -5946,10 +5945,10 @@
       case PT_PXPRINT:
       /* C and Z groups are the farthest two groups. */
       SET_TYPE_OFFSET(ucp_Ll);
-      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
+      OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);


-      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
+      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
       OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);


       jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
@@ -5956,10 +5955,10 @@


       /* In case of ucp_Cf, we overwrite the result. */
       SET_CHAR_OFFSET(0x2066);
-      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
+      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);


-      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
+      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
       OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);


       JUMPHERE(jump);
@@ -5968,21 +5967,21 @@


       case PT_PXPUNCT:
       SET_TYPE_OFFSET(ucp_Sc);
-      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
+      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);


       SET_CHAR_OFFSET(0);
-      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
+      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
       OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);


       SET_TYPE_OFFSET(ucp_Pc);
-      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
-      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
       jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
       break;


       default:
-      SLJIT_ASSERT_STOP();
+      SLJIT_UNREACHABLE();
       break;
       }
     cc += 2;
@@ -6043,10 +6042,10 @@
     else
       {
       jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
-      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
+      OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
-      OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
-      OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
+      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
+      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
       add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
       check_partial(common, TRUE);
       add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
@@ -6068,9 +6067,9 @@
     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
     jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
     OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
-    OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
+    OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
     jump[2] = JUMP(SLJIT_GREATER);
-    add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
+    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
     /* Equal. */
     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
     jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
@@ -6204,7 +6203,7 @@
     label = LABEL();
     add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
     skip_char_back(common);
-    OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
+    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
     JUMPTO(SLJIT_NOT_ZERO, label);
     }
   else
@@ -6217,7 +6216,7 @@
   check_start_used_ptr(common);
   return cc + LINK_SIZE;
   }
-SLJIT_ASSERT_STOP();
+SLJIT_UNREACHABLE();
 return cc;
 }


@@ -6250,7 +6249,7 @@
 #endif
     read_char8_type(common, type == OP_NOT_DIGIT);
     /* Flip the starting bit in the negative case. */
-  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
+  OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
   add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
   return cc;


@@ -6264,7 +6263,7 @@
   else
 #endif
     read_char8_type(common, type == OP_NOT_WHITESPACE);
-  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
+  OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
   add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
   return cc;


@@ -6278,7 +6277,7 @@
   else
 #endif
     read_char8_type(common, type == OP_NOT_WORDCHAR);
-  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
+  OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
   add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
   return cc;


@@ -6320,7 +6319,7 @@
 #elif defined COMPILE_PCRE16
     jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
-    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
+    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
     OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
@@ -6418,7 +6417,7 @@
   OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
   OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
   OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
-  OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+  OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
   JUMPTO(SLJIT_NOT_ZERO, label);


OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
@@ -6587,7 +6586,7 @@
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
- OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+ OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));

#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
@@ -6604,7 +6603,7 @@
return cc + GET(cc, 0) - 1;
#endif
}
-SLJIT_ASSERT_STOP();
+SLJIT_UNREACHABLE();
return cc;
}

@@ -6790,9 +6789,9 @@
 #endif /* SUPPORT_UTF && SUPPORT_UCP */
   {
   if (ref)
-    OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
+    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
   else
-    OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
+    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);


   if (withchecks)
     jump = JUMP(SLJIT_ZERO);
@@ -6883,7 +6882,7 @@
   cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
   break;
   default:
-  SLJIT_ASSERT_STOP();
+  SLJIT_UNREACHABLE();
   break;
   }


@@ -7195,10 +7194,10 @@
free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));

 /* Check return value. */
-OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
+OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
 if (common->forced_quit_label == NULL)
-  add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS));
+  add_jump(compiler, &common->forced_quit, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
 else
   JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label);
 return cc + 2 + 2 * LINK_SIZE;
@@ -8090,13 +8089,13 @@
     slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
     OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
-    OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
+    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
     slot += common->name_entry_size;
     i--;
     while (i-- > 0)
       {
       OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
-      OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0);
+      OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
       slot += common->name_entry_size;
       }
     OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
@@ -8244,7 +8243,7 @@
     {
     if (has_alternatives)
       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
-    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
+    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
     JUMPTO(SLJIT_NOT_ZERO, rmax_label);
     /* Drop STR_PTR for greedy plus quantifier. */
     if (opcode != OP_ONCE)
@@ -8274,7 +8273,7 @@
 if (repeat_type == OP_EXACT)
   {
   count_match(common);
-  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
+  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
   JUMPTO(SLJIT_NOT_ZERO, rmax_label);
   }
 else if (repeat_type == OP_UPTO)
@@ -8374,7 +8373,7 @@
   break;


default:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
break;
}

@@ -8824,7 +8823,7 @@
     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
     label = LABEL();
     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
-    OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+    OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
     JUMPTO(SLJIT_NOT_ZERO, label);
     }
   else
@@ -8832,7 +8831,7 @@
     OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
     label = LABEL();
     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
-    OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+    OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
     JUMPTO(SLJIT_NOT_ZERO, label);
     }
   }
@@ -8862,7 +8861,7 @@
     if (opcode == OP_UPTO)
       {
       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
-      OP2(SLJIT_SUB | SLJIT_SET_E, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+      OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
       jump = JUMP(SLJIT_ZERO);
       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
       }
@@ -8924,7 +8923,7 @@
       label = LABEL();
       if (opcode == OP_UPTO)
         {
-        OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+        OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
         add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
         }
       compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
@@ -8944,7 +8943,7 @@
       OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
       if (opcode == OP_UPTO)
         {
-        OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+        OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
         add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
         }


@@ -8971,7 +8970,7 @@

       if (opcode == OP_UPTO)
         {
-        OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+        OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
         JUMPTO(SLJIT_NOT_ZERO, label);
         }
       else
@@ -9000,7 +8999,7 @@


       if (opcode == OP_UPTO)
         {
-        OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+        OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
         JUMPTO(SLJIT_NOT_ZERO, label);
         }
       else
@@ -9026,7 +9025,7 @@
       compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
       if (opcode == OP_UPTO)
         {
-        OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+        OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
         JUMPTO(SLJIT_NOT_ZERO, label);
         OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
         }
@@ -9113,7 +9112,7 @@
     label = LABEL();
     compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
-    OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+    OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
     JUMPTO(SLJIT_NOT_ZERO, label);
     set_jumps(no_match, LABEL());
     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
@@ -9124,7 +9123,7 @@
   label = LABEL();
   detect_partial_match(common, &no_match);
   compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
-  OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+  OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
   JUMPTO(SLJIT_NOT_ZERO, label);
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
   set_jumps(no_char1_match, LABEL());
@@ -9142,7 +9141,7 @@
   break;


default:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
break;
}

@@ -9569,7 +9568,7 @@
     break;


     default:
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return;
     }
   if (cc == NULL)
@@ -9677,7 +9676,7 @@
   case OP_MINUPTO:
   OP1(SLJIT_MOV, TMP1, 0, base, offset1);
   OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
-  OP2(SLJIT_SUB | SLJIT_SET_E, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
   add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));


OP1(SLJIT_MOV, base, offset1, TMP1, 0);
@@ -9723,7 +9722,7 @@
break;

default:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
break;
}

@@ -10645,7 +10644,7 @@
     break;


     default:
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     break;
     }
   current = current->prev;


Modified: code/trunk/sljit/sljitConfig.h
===================================================================
--- code/trunk/sljit/sljitConfig.h    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/sljit/sljitConfig.h    2017-01-23 07:41:37 UTC (rev 1676)
@@ -1,7 +1,7 @@
 /*
  *    Stack-less Just-In-Time compiler
  *
- *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *    Copyright Zoltan Herczeg (hzmester@???). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are
  * permitted provided that the following conditions are met:
@@ -90,7 +90,7 @@


 /* Executable code allocation:
    If SLJIT_EXECUTABLE_ALLOCATOR is not defined, the application should
-   define SLJIT_MALLOC_EXEC, SLJIT_FREE_EXEC, and SLJIT_ENABLE_EXEC. */
+   define SLJIT_MALLOC_EXEC, SLJIT_FREE_EXEC, and SLJIT_EXEC_OFFSET. */
 #ifndef SLJIT_EXECUTABLE_ALLOCATOR
 /* Enabled by default. */
 #define SLJIT_EXECUTABLE_ALLOCATOR 1


Modified: code/trunk/sljit/sljitConfigInternal.h
===================================================================
--- code/trunk/sljit/sljitConfigInternal.h    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/sljit/sljitConfigInternal.h    2017-01-23 07:41:37 UTC (rev 1676)
@@ -1,7 +1,7 @@
 /*
  *    Stack-less Just-In-Time compiler
  *
- *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *    Copyright Zoltan Herczeg (hzmester@???). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are
  * permitted provided that the following conditions are met:
@@ -187,14 +187,6 @@
 /* External function definitions. */
 /**********************************/


-#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED)
-
-/* These libraries are needed for the macros below. */
-#include <stdlib.h>
-#include <string.h>
-
-#endif /* SLJIT_STD_MACROS_DEFINED */
-
 /* General macros:
    Note: SLJIT is designed to be independent from them as possible.


@@ -547,10 +539,10 @@
#define SLJIT_FREE_EXEC(ptr) sljit_free_exec(ptr)

#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR)
-SLJIT_API_FUNC_ATTRIBUTE void sljit_enable_exec(void* from, void *to);
-#define SLJIT_ENABLE_EXEC(from, to) sljit_enable_exec((from), (to))
+SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
+#define SLJIT_EXEC_OFFSET(ptr) sljit_exec_offset(ptr)
#else
-#define SLJIT_ENABLE_EXEC(from, to)
+#define SLJIT_EXEC_OFFSET(ptr) 0
#endif

#endif
@@ -561,13 +553,13 @@

#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)

-#define SLJIT_NUMBER_OF_REGISTERS 10
-#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7
+#define SLJIT_NUMBER_OF_REGISTERS 12
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 9
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
-#define SLJIT_LOCALS_OFFSET_BASE ((2 + 4) * sizeof(sljit_sw))
+#define SLJIT_LOCALS_OFFSET_BASE ((2 + 6) * sizeof(sljit_sw))
#else
/* Maximum 3 arguments are passed on the stack, +1 for double alignment. */
-#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1 + 4) * sizeof(sljit_sw))
+#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1 + 6) * sizeof(sljit_sw))
#endif /* SLJIT_X86_32_FASTCALL */

#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@@ -584,14 +576,14 @@

#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)

-#define SLJIT_NUMBER_OF_REGISTERS 11
+#define SLJIT_NUMBER_OF_REGISTERS 12
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
#define SLJIT_LOCALS_OFFSET_BASE 0

#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)

-#define SLJIT_NUMBER_OF_REGISTERS 11
-#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7
+#define SLJIT_NUMBER_OF_REGISTERS 12
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
#define SLJIT_LOCALS_OFFSET_BASE 0

#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
@@ -615,7 +607,7 @@

#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)

-#define SLJIT_NUMBER_OF_REGISTERS 17
+#define SLJIT_NUMBER_OF_REGISTERS 21
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
#define SLJIT_LOCALS_OFFSET_BASE (4 * sizeof(sljit_sw))
@@ -671,7 +663,7 @@

#if (defined SLJIT_DEBUG && SLJIT_DEBUG)

-#if !defined(SLJIT_ASSERT) || !defined(SLJIT_ASSERT_STOP)
+#if !defined(SLJIT_ASSERT) || !defined(SLJIT_UNREACHABLE)

/* SLJIT_HALT_PROCESS must halt the process. */
#ifndef SLJIT_HALT_PROCESS
@@ -683,7 +675,7 @@

#include <stdio.h>

-#endif /* !SLJIT_ASSERT || !SLJIT_ASSERT_STOP */
+#endif /* !SLJIT_ASSERT || !SLJIT_UNREACHABLE */

/* Feel free to redefine these two macros. */
#ifndef SLJIT_ASSERT
@@ -698,25 +690,25 @@

#endif /* !SLJIT_ASSERT */

-#ifndef SLJIT_ASSERT_STOP
+#ifndef SLJIT_UNREACHABLE

-#define SLJIT_ASSERT_STOP() \
+#define SLJIT_UNREACHABLE() \
     do { \
         printf("Should never been reached " __FILE__ ":%d\n", __LINE__); \
         SLJIT_HALT_PROCESS(); \
     } while (0)


-#endif /* !SLJIT_ASSERT_STOP */
+#endif /* !SLJIT_UNREACHABLE */

#else /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */

/* Forcing empty, but valid statements. */
#undef SLJIT_ASSERT
-#undef SLJIT_ASSERT_STOP
+#undef SLJIT_UNREACHABLE

 #define SLJIT_ASSERT(x) \
     do { } while (0)
-#define SLJIT_ASSERT_STOP() \
+#define SLJIT_UNREACHABLE() \
     do { } while (0)


#endif /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */
@@ -723,9 +715,8 @@

#ifndef SLJIT_COMPILE_ASSERT

-/* Should be improved eventually. */
 #define SLJIT_COMPILE_ASSERT(x, description) \
-    SLJIT_ASSERT(x)
+    switch(0) { case 0: case ((x) ? 1 : 0): break; }


#endif /* !SLJIT_COMPILE_ASSERT */


Modified: code/trunk/sljit/sljitExecAllocator.c
===================================================================
--- code/trunk/sljit/sljitExecAllocator.c    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/sljit/sljitExecAllocator.c    2017-01-23 07:41:37 UTC (rev 1676)
@@ -1,7 +1,7 @@
 /*
  *    Stack-less Just-In-Time compiler
  *
- *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *    Copyright Zoltan Herczeg (hzmester@???). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are
  * permitted provided that the following conditions are met:
@@ -86,7 +86,7 @@
     return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
 }


-static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size)
+static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
 {
     SLJIT_UNUSED_ARG(size);
     VirtualFree(chunk, 0, MEM_RELEASE);
@@ -96,7 +96,7 @@


 static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
 {
-    void* retval;
+    void *retval;


 #ifdef MAP_ANON
     retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
@@ -111,7 +111,7 @@
     return (retval != MAP_FAILED) ? retval : NULL;
 }


-static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size)
+static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
 {
     munmap(chunk, size);
 }
@@ -180,8 +180,8 @@
     sljit_uw chunk_size;


     allocator_grab_lock();
-    if (size < sizeof(struct free_block))
-        size = sizeof(struct free_block);
+    if (size < (64 - sizeof(struct block_header)))
+        size = (64 - sizeof(struct block_header));
     size = ALIGN_SIZE(size);


     free_block = free_blocks;


Modified: code/trunk/sljit/sljitLir.c
===================================================================
--- code/trunk/sljit/sljitLir.c    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/sljit/sljitLir.c    2017-01-23 07:41:37 UTC (rev 1676)
@@ -1,7 +1,7 @@
 /*
  *    Stack-less Just-In-Time compiler
  *
- *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *    Copyright Zoltan Herczeg (hzmester@???). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are
  * permitted provided that the following conditions are met:
@@ -26,6 +26,14 @@


#include "sljitLir.h"

+#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED)
+
+/* These libraries are needed for the macros below. */
+#include <stdlib.h>
+#include <string.h>
+
+#endif /* SLJIT_STD_MACROS_DEFINED */
+
 #define CHECK_ERROR() \
     do { \
         if (SLJIT_UNLIKELY(compiler->error)) \
@@ -76,14 +84,18 @@


#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)

+#define VARIABLE_FLAG_SHIFT (10)
+#define VARIABLE_FLAG_MASK (0x3f << VARIABLE_FLAG_SHIFT)
+#define GET_FLAG_TYPE(op) ((op) >> VARIABLE_FLAG_SHIFT)
+
 #define GET_OPCODE(op) \
-    ((op) & ~(SLJIT_I32_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))
+    ((op) & ~(SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK))


-#define GET_FLAGS(op) \
-    ((op) & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C))
+#define HAS_FLAGS(op) \
+    ((op) & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))


 #define GET_ALL_FLAGS(op) \
-    ((op) & (SLJIT_I32_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))
+    ((op) & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK))


 #define TYPE_CAST_NEEDED(op) \
     (((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S16) || ((op) >= SLJIT_MOVU_U8 && (op) <= SLJIT_MOVU_S16))
@@ -251,6 +263,12 @@


#endif

+#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR)
+#define SLJIT_ADD_EXEC_OFFSET(ptr, exec_offset) ((sljit_u8 *)(ptr) + (exec_offset))
+#else
+#define SLJIT_ADD_EXEC_OFFSET(ptr, exec_offset) ((sljit_u8 *)(ptr))
+#endif
+
/* Argument checking features. */

 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
@@ -289,13 +307,6 @@
         } \
     } while (0)


-#define CHECK_DYN_CODE_MOD(extra_check) \
-    if ((extra_check) && !sljit_is_dyn_code_modification_enabled()) \
-    { \
-        compiler->error = SLJIT_ERR_DYN_CODE_MOD; \
-        return NULL; \
-    }
-
 #elif (defined SLJIT_DEBUG && SLJIT_DEBUG)


/* Assertion failure occures if an invalid argument is passed. */
@@ -308,7 +319,6 @@
#define CHECK(x) x
#define CHECK_PTR(x) x
#define CHECK_REG_INDEX(x) x
-#define CHECK_DYN_CODE_MOD(extra_check) SLJIT_ASSERT(!(extra_check) || sljit_is_dyn_code_modification_enabled())

#elif (defined SLJIT_VERBOSE && SLJIT_VERBOSE)

@@ -318,7 +328,6 @@
#define CHECK(x) x
#define CHECK_PTR(x) x
#define CHECK_REG_INDEX(x) x
-#define CHECK_DYN_CODE_MOD(extra_check)

#else

@@ -326,7 +335,6 @@
#define CHECK(x)
#define CHECK_PTR(x)
#define CHECK_REG_INDEX(x)
-#define CHECK_DYN_CODE_MOD(extra_check)

#endif /* SLJIT_ARGUMENT_CHECKS */

@@ -361,6 +369,8 @@
         int_op_and_single_op_must_be_the_same);
     SLJIT_COMPILE_ASSERT(SLJIT_REWRITABLE_JUMP != SLJIT_F32_OP,
         rewritable_jump_and_single_op_must_not_be_the_same);
+    SLJIT_COMPILE_ASSERT(!(SLJIT_EQUAL & 0x1) && !(SLJIT_LESS & 0x1) && !(SLJIT_EQUAL_F64 & 0x1) && !(SLJIT_JUMP & 0x1),
+        conditional_flags_must_be_even_numbers);


     /* Only the non-zero members must be set. */
     compiler->error = SLJIT_SUCCESS;
@@ -457,15 +467,6 @@
         compiler->error = SLJIT_ERR_ALLOC_FAILED;
 }


-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_dyn_code_modification_enabled(void)
-{
-#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) \
-        && (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR)
-    return 0;
-#endif
-    return 1;
-}
-
 #if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
 SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code)
 {
@@ -649,66 +650,7 @@
     (((exp) & SLJIT_MEM) && (((exp) & REG_MASK) == reg || OFFS_REG(exp) == reg))


 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
-#define FUNCTION_CHECK_OP() \
-    CHECK_ARGUMENT(!GET_FLAGS(op) || !(op & SLJIT_KEEP_FLAGS)); \
-    switch (GET_OPCODE(op)) { \
-    case SLJIT_NOT: \
-    case SLJIT_CLZ: \
-    case SLJIT_AND: \
-    case SLJIT_OR: \
-    case SLJIT_XOR: \
-    case SLJIT_SHL: \
-    case SLJIT_LSHR: \
-    case SLJIT_ASHR: \
-        CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C))); \
-        break; \
-    case SLJIT_NEG: \
-        CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))); \
-        break; \
-    case SLJIT_MUL: \
-        CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))); \
-        break; \
-    case SLJIT_ADD: \
-        CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_S))); \
-        break; \
-    case SLJIT_SUB: \
-        break; \
-    case SLJIT_ADDC: \
-    case SLJIT_SUBC: \
-        CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))); \
-        break; \
-    case SLJIT_BREAKPOINT: \
-    case SLJIT_NOP: \
-    case SLJIT_LMUL_UW: \
-    case SLJIT_LMUL_SW: \
-    case SLJIT_MOV: \
-    case SLJIT_MOV_U32: \
-    case SLJIT_MOV_P: \
-    case SLJIT_MOVU: \
-    case SLJIT_MOVU_U32: \
-    case SLJIT_MOVU_P: \
-        /* Nothing allowed */ \
-        CHECK_ARGUMENT(!(op & (SLJIT_I32_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \
-        break; \
-    default: \
-        /* Only SLJIT_I32_OP or SLJIT_F32_OP is allowed. */ \
-        CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \
-        break; \
-    }


-#define FUNCTION_CHECK_FOP() \
-    CHECK_ARGUMENT(!GET_FLAGS(op) || !(op & SLJIT_KEEP_FLAGS)); \
-    switch (GET_OPCODE(op)) { \
-    case SLJIT_CMP_F64: \
-        CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \
-        CHECK_ARGUMENT((op & (SLJIT_SET_E | SLJIT_SET_S))); \
-        break; \
-    default: \
-        /* Only SLJIT_I32_OP or SLJIT_F32_OP is allowed. */ \
-        CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \
-        break; \
-    }
-
 #define FUNCTION_CHECK_IS_REG(r) \
     (((r) >= SLJIT_R0 && (r) < (SLJIT_R0 + compiler->scratches)) || \
     ((r) > (SLJIT_S0 - compiler->saveds) && (r) <= SLJIT_S0))
@@ -785,14 +727,6 @@
         CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \
     }


-#define FUNCTION_CHECK_OP1() \
-    if (GET_OPCODE(op) >= SLJIT_MOVU && GET_OPCODE(op) <= SLJIT_MOVU_P) { \
-        CHECK_ARGUMENT(!(src & SLJIT_MEM) || (src & REG_MASK) != SLJIT_SP); \
-        CHECK_ARGUMENT(!(dst & SLJIT_MEM) || (dst & REG_MASK) != SLJIT_SP); \
-        if ((src & SLJIT_MEM) && (src & REG_MASK)) \
-            CHECK_ARGUMENT((dst & REG_MASK) != (src & REG_MASK) && OFFS_REG(dst) != (src & REG_MASK)); \
-    }
-
 #endif /* SLJIT_ARGUMENT_CHECKS */


 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
@@ -816,8 +750,10 @@
     do { \
         if ((r) < (SLJIT_R0 + compiler->scratches)) \
             fprintf(compiler->verbose, "r%d", (r) - SLJIT_R0); \
+        else if ((r) != SLJIT_SP) \
+            fprintf(compiler->verbose, "s%d", SLJIT_NUMBER_OF_REGISTERS - (r)); \
         else \
-            fprintf(compiler->verbose, "s%d", SLJIT_NUMBER_OF_REGISTERS - (r)); \
+            fprintf(compiler->verbose, "sp"); \
     } while (0)


 #define sljit_verbose_param(compiler, p, i) \
@@ -910,6 +846,7 @@
     (char*)"sig_greater", (char*)"sig_less_equal",
     (char*)"overflow", (char*)"not_overflow",
     (char*)"mul_overflow", (char*)"mul_not_overflow",
+    (char*)"carry", (char*)"",
     (char*)"equal", (char*)"not_equal",
     (char*)"less", (char*)"greater_equal",
     (char*)"greater", (char*)"less_equal",
@@ -964,6 +901,7 @@
     CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
     CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
     CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE);
+    compiler->last_flags = 0;
 #endif
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose))
@@ -977,11 +915,6 @@
     sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
     sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
 {
-    if (SLJIT_UNLIKELY(compiler->skip_checks)) {
-        compiler->skip_checks = 0;
-        CHECK_RETURN_OK;
-    }
-
 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
     CHECK_ARGUMENT(!(options & ~SLJIT_DOUBLE_ALIGNMENT));
     CHECK_ARGUMENT(args >= 0 && args <= 3);
@@ -993,6 +926,7 @@
     CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
     CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
     CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE);
+    compiler->last_flags = 0;
 #endif
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose))
@@ -1012,6 +946,7 @@
     }
     else
         CHECK_ARGUMENT(src == 0 && srcw == 0);
+    compiler->last_flags = 0;
 #endif
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1031,6 +966,7 @@
 {
 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
     FUNCTION_CHECK_DST(dst, dstw);
+    compiler->last_flags = 0;
 #endif
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1046,6 +982,7 @@
 {
 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
     FUNCTION_CHECK_SRC(src, srcw);
+    compiler->last_flags = 0;
 #endif
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1063,6 +1000,8 @@
     CHECK_ARGUMENT((op >= SLJIT_BREAKPOINT && op <= SLJIT_LMUL_SW)
         || ((op & ~SLJIT_I32_OP) >= SLJIT_DIVMOD_UW && (op & ~SLJIT_I32_OP) <= SLJIT_DIV_SW));
     CHECK_ARGUMENT(op < SLJIT_LMUL_UW || compiler->scratches >= 2);
+    if (op >= SLJIT_LMUL_UW)
+        compiler->last_flags = 0;
 #endif
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose))
@@ -1088,10 +1027,50 @@


 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
     CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_CLZ);
-    FUNCTION_CHECK_OP();
+
+    switch (GET_OPCODE(op)) {
+    case SLJIT_NOT:
+    case SLJIT_CLZ:
+        CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK));
+        break;
+    case SLJIT_NEG:
+        CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)
+            || GET_FLAG_TYPE(op) == SLJIT_OVERFLOW
+            || GET_FLAG_TYPE(op) == SLJIT_NOT_OVERFLOW);
+        break;
+    case SLJIT_MOV:
+    case SLJIT_MOV_U32:
+    case SLJIT_MOV_P:
+    case SLJIT_MOVU:
+    case SLJIT_MOVU_U32:
+    case SLJIT_MOVU_P:
+        /* Nothing allowed */
+        CHECK_ARGUMENT(!(op & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
+        break;
+    default:
+        /* Only SLJIT_I32_OP or SLJIT_F32_OP is allowed. */
+        CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
+        break;
+    }
+
     FUNCTION_CHECK_SRC(src, srcw);
     FUNCTION_CHECK_DST(dst, dstw);
-    FUNCTION_CHECK_OP1();
+
+    if (GET_OPCODE(op) >= SLJIT_NOT)
+        compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z));
+    else if (GET_OPCODE(op) >= SLJIT_MOVU) {
+        CHECK_ARGUMENT(!(src & SLJIT_MEM) || (src & REG_MASK) != SLJIT_SP);
+        CHECK_ARGUMENT(!(dst & SLJIT_MEM) || (dst & REG_MASK) != SLJIT_SP);
+        if ((src & REG_MASK) != SLJIT_UNUSED) {
+            CHECK_ARGUMENT((src & REG_MASK) != (dst & REG_MASK) && (src & REG_MASK) != OFFS_REG(dst));
+            CHECK_ARGUMENT((src & OFFS_REG_MASK) == SLJIT_UNUSED || srcw == 0);
+        }
+        if ((dst & REG_MASK) != SLJIT_UNUSED) {
+            CHECK_ARGUMENT((dst & REG_MASK) != OFFS_REG(src));
+            CHECK_ARGUMENT((dst & OFFS_REG_MASK) == SLJIT_UNUSED || dstw == 0);
+        }
+        compiler->last_flags = 0;
+    }
 #endif
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1102,9 +1081,9 @@
         }
         else
         {
-            fprintf(compiler->verbose, "  %s%s%s%s%s%s%s%s ", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], !(op & SLJIT_I32_OP) ? "" : "32",
-                !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_U) ? "" : ".u", !(op & SLJIT_SET_S) ? "" : ".s",
-                !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k");
+            fprintf(compiler->verbose, "  %s%s%s%s%s ", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], !(op & SLJIT_I32_OP) ? "" : "32",
+                !(op & SLJIT_SET_Z) ? "" : ".z", !(op & VARIABLE_FLAG_MASK) ? "" : ".",
+                !(op & VARIABLE_FLAG_MASK) ? "" : jump_names[GET_FLAG_TYPE(op)]);
         }


         sljit_verbose_param(compiler, dst, dstw);
@@ -1128,16 +1107,55 @@


 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
     CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD && GET_OPCODE(op) <= SLJIT_ASHR);
-    FUNCTION_CHECK_OP();
+
+    switch (GET_OPCODE(op)) {
+    case SLJIT_AND:
+    case SLJIT_OR:
+    case SLJIT_XOR:
+    case SLJIT_SHL:
+    case SLJIT_LSHR:
+    case SLJIT_ASHR:
+        CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK));
+        break;
+    case SLJIT_MUL:
+        CHECK_ARGUMENT(!(op & SLJIT_SET_Z));
+        CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)
+            || GET_FLAG_TYPE(op) == SLJIT_MUL_OVERFLOW
+            || GET_FLAG_TYPE(op) == SLJIT_MUL_NOT_OVERFLOW);
+        break;
+    case SLJIT_ADD:
+        CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)
+            || GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)
+            || GET_FLAG_TYPE(op) == SLJIT_OVERFLOW
+            || GET_FLAG_TYPE(op) == SLJIT_NOT_OVERFLOW);
+        break;
+    case SLJIT_SUB:
+        CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)
+            || (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_NOT_OVERFLOW)
+            || GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY));
+        break;
+    case SLJIT_ADDC:
+    case SLJIT_SUBC:
+        CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)
+            || GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY));
+        CHECK_ARGUMENT((compiler->last_flags & 0xff) == GET_FLAG_TYPE(SLJIT_SET_CARRY));
+        CHECK_ARGUMENT((op & SLJIT_I32_OP) == (compiler->last_flags & SLJIT_I32_OP));
+        break;
+    default:
+        SLJIT_UNREACHABLE();
+        break;
+    }
+
     FUNCTION_CHECK_SRC(src1, src1w);
     FUNCTION_CHECK_SRC(src2, src2w);
     FUNCTION_CHECK_DST(dst, dstw);
+    compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z));
 #endif
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose)) {
-        fprintf(compiler->verbose, "  %s%s%s%s%s%s%s%s ", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], !(op & SLJIT_I32_OP) ? "" : "32",
-            !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_U) ? "" : ".u", !(op & SLJIT_SET_S) ? "" : ".s",
-            !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k");
+        fprintf(compiler->verbose, "  %s%s%s%s%s ", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], !(op & SLJIT_I32_OP) ? "" : "32",
+            !(op & SLJIT_SET_Z) ? "" : ".z", !(op & VARIABLE_FLAG_MASK) ? "" : ".",
+            !(op & VARIABLE_FLAG_MASK) ? "" : jump_names[GET_FLAG_TYPE(op)]);
         sljit_verbose_param(compiler, dst, dstw);
         fprintf(compiler->verbose, ", ");
         sljit_verbose_param(compiler, src1, src1w);
@@ -1211,7 +1229,7 @@
 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
     CHECK_ARGUMENT(sljit_is_fpu_available());
     CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV_F64 && GET_OPCODE(op) <= SLJIT_ABS_F64);
-    FUNCTION_CHECK_FOP();
+    CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
     FUNCTION_FCHECK(src, srcw);
     FUNCTION_FCHECK(dst, dstw);
 #endif
@@ -1237,6 +1255,10 @@
     sljit_s32 src1, sljit_sw src1w,
     sljit_s32 src2, sljit_sw src2w)
 {
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+    compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z));
+#endif
+
     if (SLJIT_UNLIKELY(compiler->skip_checks)) {
         compiler->skip_checks = 0;
         CHECK_RETURN_OK;
@@ -1245,14 +1267,19 @@
 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
     CHECK_ARGUMENT(sljit_is_fpu_available());
     CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_CMP_F64);
-    FUNCTION_CHECK_FOP();
+    CHECK_ARGUMENT(!(op & SLJIT_SET_Z));
+    CHECK_ARGUMENT((op & VARIABLE_FLAG_MASK)
+        || (GET_FLAG_TYPE(op) >= SLJIT_EQUAL_F64 && GET_FLAG_TYPE(op) <= SLJIT_ORDERED_F64));
     FUNCTION_FCHECK(src1, src1w);
     FUNCTION_FCHECK(src2, src2w);
 #endif
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose)) {
-        fprintf(compiler->verbose, "  %s%s%s%s ", fop1_names[SLJIT_CMP_F64 - SLJIT_FOP1_BASE], (op & SLJIT_F32_OP) ? ".f32" : ".f64",
-            (op & SLJIT_SET_E) ? ".e" : "", (op & SLJIT_SET_S) ? ".s" : "");
+        fprintf(compiler->verbose, "  %s%s", fop1_names[SLJIT_CMP_F64 - SLJIT_FOP1_BASE], (op & SLJIT_F32_OP) ? ".f32" : ".f64");
+        if (op & VARIABLE_FLAG_MASK) {
+            fprintf(compiler->verbose, ".%s_f", jump_names[GET_FLAG_TYPE(op)]);
+        }
+        fprintf(compiler->verbose, " ");
         sljit_verbose_fparam(compiler, src1, src1w);
         fprintf(compiler->verbose, ", ");
         sljit_verbose_fparam(compiler, src2, src2w);
@@ -1274,7 +1301,7 @@
 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
     CHECK_ARGUMENT(sljit_is_fpu_available());
     CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONV_SW_FROM_F64 && GET_OPCODE(op) <= SLJIT_CONV_S32_FROM_F64);
-    FUNCTION_CHECK_FOP();
+    CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
     FUNCTION_FCHECK(src, srcw);
     FUNCTION_CHECK_DST(dst, dstw);
 #endif
@@ -1304,7 +1331,7 @@
 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
     CHECK_ARGUMENT(sljit_is_fpu_available());
     CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONV_F64_FROM_SW && GET_OPCODE(op) <= SLJIT_CONV_F64_FROM_S32);
-    FUNCTION_CHECK_FOP();
+    CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
     FUNCTION_CHECK_SRC(src, srcw);
     FUNCTION_FCHECK(dst, dstw);
 #endif
@@ -1330,7 +1357,7 @@
 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
     CHECK_ARGUMENT(sljit_is_fpu_available());
     CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD_F64 && GET_OPCODE(op) <= SLJIT_DIV_F64);
-    FUNCTION_CHECK_FOP();
+    CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
     FUNCTION_FCHECK(src1, src1w);
     FUNCTION_FCHECK(src2, src2w);
     FUNCTION_FCHECK(dst, dstw);
@@ -1353,6 +1380,10 @@
 {
     SLJIT_UNUSED_ARG(compiler);


+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+    compiler->last_flags = 0;
+#endif
+
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose))
         fprintf(compiler->verbose, "label:\n");
@@ -1369,9 +1400,18 @@


 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
     CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_I32_OP)));
+    CHECK_ARGUMENT((type & 0xff) != GET_FLAG_TYPE(SLJIT_SET_CARRY) && (type & 0xff) != (GET_FLAG_TYPE(SLJIT_SET_CARRY) + 1));
     CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_CALL3);
     CHECK_ARGUMENT((type & 0xff) < SLJIT_JUMP || !(type & SLJIT_I32_OP));
     CHECK_ARGUMENT((type & 0xff) <= SLJIT_CALL0 || ((type & 0xff) - SLJIT_CALL0) <= compiler->scratches);
+
+    if ((type & 0xff) < SLJIT_JUMP) {
+        if ((type & 0xff) <= SLJIT_NOT_ZERO)
+            CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z);
+        else
+            CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff));
+        CHECK_ARGUMENT((type & SLJIT_I32_OP) == (compiler->last_flags & SLJIT_I32_OP));
+    }
 #endif
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose))
@@ -1390,6 +1430,7 @@
     CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_SIG_LESS_EQUAL);
     FUNCTION_CHECK_SRC(src1, src1w);
     FUNCTION_CHECK_SRC(src2, src2w);
+    compiler->last_flags = 0;
 #endif
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1414,6 +1455,7 @@
     CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL_F64 && (type & 0xff) <= SLJIT_ORDERED_F64);
     FUNCTION_FCHECK(src1, src1w);
     FUNCTION_FCHECK(src2, src2w);
+    compiler->last_flags = 0;
 #endif
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1430,6 +1472,10 @@


 static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
 {
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+    compiler->last_flags = 0;
+#endif
+
     if (SLJIT_UNLIKELY(compiler->skip_checks)) {
         compiler->skip_checks = 0;
         CHECK_RETURN_OK;
@@ -1458,21 +1504,28 @@
 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
     CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP)));
     CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64);
+    CHECK_ARGUMENT((type & 0xff) != GET_FLAG_TYPE(SLJIT_SET_CARRY) && (type & 0xff) != (GET_FLAG_TYPE(SLJIT_SET_CARRY) + 1));
     CHECK_ARGUMENT(op == SLJIT_MOV || GET_OPCODE(op) == SLJIT_MOV_U32 || GET_OPCODE(op) == SLJIT_MOV_S32
         || (GET_OPCODE(op) >= SLJIT_AND && GET_OPCODE(op) <= SLJIT_XOR));
-    CHECK_ARGUMENT((op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C)) == 0);
-    CHECK_ARGUMENT((op & (SLJIT_SET_E | SLJIT_KEEP_FLAGS)) != (SLJIT_SET_E | SLJIT_KEEP_FLAGS));
+    CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK));
+
+    if ((type & 0xff) <= SLJIT_NOT_ZERO)
+        CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z);
+    else
+        CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff));
+
     if (GET_OPCODE(op) < SLJIT_ADD) {
         CHECK_ARGUMENT(src == SLJIT_UNUSED && srcw == 0);
     } else {
         CHECK_ARGUMENT(src == dst && srcw == dstw);
+        compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z));
     }
     FUNCTION_CHECK_DST(dst, dstw);
 #endif
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose)) {
-        fprintf(compiler->verbose, "  flags %s%s%s%s, ",
-            !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k",
+        fprintf(compiler->verbose, "  flags%s %s%s, ",
+            !(op & SLJIT_SET_Z) ? "" : ".z",
             GET_OPCODE(op) < SLJIT_OP2_BASE ? "mov" : op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE],
             GET_OPCODE(op) < SLJIT_OP2_BASE ? op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE] : ((op & SLJIT_I32_OP) ? "32" : ""));
         sljit_verbose_param(compiler, dst, dstw);
@@ -1626,7 +1679,6 @@
     sljit_sw tmp_srcw;


     CHECK_ERROR_PTR();
-    CHECK_DYN_CODE_MOD(type & SLJIT_REWRITABLE_JUMP);
     CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w));


     condition = type & 0xff;
@@ -1671,6 +1723,7 @@
             condition = SLJIT_SIG_GREATER_EQUAL;
             break;
         }
+
         type = condition | (type & (SLJIT_I32_OP | SLJIT_REWRITABLE_JUMP));
         tmp_src = src1;
         src1 = src2;
@@ -1681,11 +1734,9 @@
     }


     if (condition <= SLJIT_NOT_ZERO)
-        flags = SLJIT_SET_E;
-    else if (condition <= SLJIT_LESS_EQUAL)
-        flags = SLJIT_SET_U;
+        flags = SLJIT_SET_Z;
     else
-        flags = SLJIT_SET_S;
+        flags = condition << VARIABLE_FLAG_SHIFT;


 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
         || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
@@ -1697,39 +1748,31 @@
         || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
     compiler->skip_checks = 1;
 #endif
-    return sljit_emit_jump(compiler, condition | (type & SLJIT_REWRITABLE_JUMP));
+    return sljit_emit_jump(compiler, condition | (type & (SLJIT_REWRITABLE_JUMP | SLJIT_I32_OP)));
 }


+#endif
+
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type,
     sljit_s32 src1, sljit_sw src1w,
     sljit_s32 src2, sljit_sw src2w)
 {
-    sljit_s32 flags, condition;
-
     CHECK_ERROR_PTR();
-    CHECK_DYN_CODE_MOD(type & SLJIT_REWRITABLE_JUMP);
     CHECK_PTR(check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w));


-    condition = type & 0xff;
-    flags = (condition <= SLJIT_NOT_EQUAL_F64) ? SLJIT_SET_E : SLJIT_SET_S;
-    if (type & SLJIT_F32_OP)
-        flags |= SLJIT_F32_OP;
-
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
         || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
     compiler->skip_checks = 1;
 #endif
-    sljit_emit_fop1(compiler, SLJIT_CMP_F64 | flags, src1, src1w, src2, src2w);
+    sljit_emit_fop1(compiler, SLJIT_CMP_F64 | ((type & 0xff) << VARIABLE_FLAG_SHIFT) | (type & SLJIT_I32_OP), src1, src1w, src2, src2w);


 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
         || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
     compiler->skip_checks = 1;
 #endif
-    return sljit_emit_jump(compiler, condition | (type & SLJIT_REWRITABLE_JUMP));
+    return sljit_emit_jump(compiler, type);
 }


-#endif
-
#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)

 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
@@ -1743,7 +1786,7 @@
     compiler->skip_checks = 1;
 #endif
     if (offset != 0)
-        return sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
+        return sljit_emit_op2(compiler, SLJIT_ADD, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
     return sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_SP, 0);
 }


@@ -1758,9 +1801,10 @@
     return "unsupported";
 }


-SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void)
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data)
 {
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNUSED_ARG(allocator_data);
+    SLJIT_UNREACHABLE();
     return NULL;
 }


@@ -1767,14 +1811,20 @@
 SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler)
 {
     SLJIT_UNUSED_ARG(compiler);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
 }


+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler)
+{
+    SLJIT_UNUSED_ARG(compiler);
+    SLJIT_UNREACHABLE();
+}
+
 SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_s32 size)
 {
     SLJIT_UNUSED_ARG(compiler);
     SLJIT_UNUSED_ARG(size);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return NULL;
 }


@@ -1783,7 +1833,7 @@
 {
     SLJIT_UNUSED_ARG(compiler);
     SLJIT_UNUSED_ARG(verbose);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
 }
 #endif


@@ -1790,7 +1840,7 @@
 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
 {
     SLJIT_UNUSED_ARG(compiler);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return NULL;
 }


@@ -1797,7 +1847,7 @@
 SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code)
 {
     SLJIT_UNUSED_ARG(code);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
 }


 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
@@ -1812,7 +1862,7 @@
     SLJIT_UNUSED_ARG(fscratches);
     SLJIT_UNUSED_ARG(fsaveds);
     SLJIT_UNUSED_ARG(local_size);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return SLJIT_ERR_UNSUPPORTED;
 }


@@ -1828,7 +1878,7 @@
     SLJIT_UNUSED_ARG(fscratches);
     SLJIT_UNUSED_ARG(fsaveds);
     SLJIT_UNUSED_ARG(local_size);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return SLJIT_ERR_UNSUPPORTED;
 }


@@ -1838,7 +1888,7 @@
     SLJIT_UNUSED_ARG(op);
     SLJIT_UNUSED_ARG(src);
     SLJIT_UNUSED_ARG(srcw);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return SLJIT_ERR_UNSUPPORTED;
 }


@@ -1847,7 +1897,7 @@
     SLJIT_UNUSED_ARG(compiler);
     SLJIT_UNUSED_ARG(dst);
     SLJIT_UNUSED_ARG(dstw);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return SLJIT_ERR_UNSUPPORTED;
 }


@@ -1856,7 +1906,7 @@
     SLJIT_UNUSED_ARG(compiler);
     SLJIT_UNUSED_ARG(src);
     SLJIT_UNUSED_ARG(srcw);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return SLJIT_ERR_UNSUPPORTED;
 }


@@ -1864,7 +1914,7 @@
 {
     SLJIT_UNUSED_ARG(compiler);
     SLJIT_UNUSED_ARG(op);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return SLJIT_ERR_UNSUPPORTED;
 }


@@ -1878,7 +1928,7 @@
     SLJIT_UNUSED_ARG(dstw);
     SLJIT_UNUSED_ARG(src);
     SLJIT_UNUSED_ARG(srcw);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return SLJIT_ERR_UNSUPPORTED;
 }


@@ -1895,13 +1945,13 @@
     SLJIT_UNUSED_ARG(src1w);
     SLJIT_UNUSED_ARG(src2);
     SLJIT_UNUSED_ARG(src2w);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return SLJIT_ERR_UNSUPPORTED;
 }


 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
 {
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return reg;
 }


@@ -1911,13 +1961,13 @@
     SLJIT_UNUSED_ARG(compiler);
     SLJIT_UNUSED_ARG(instruction);
     SLJIT_UNUSED_ARG(size);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return SLJIT_ERR_UNSUPPORTED;
 }


 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
 {
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return 0;
 }


@@ -1931,7 +1981,7 @@
     SLJIT_UNUSED_ARG(dstw);
     SLJIT_UNUSED_ARG(src);
     SLJIT_UNUSED_ARG(srcw);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return SLJIT_ERR_UNSUPPORTED;
 }


@@ -1948,7 +1998,7 @@
     SLJIT_UNUSED_ARG(src1w);
     SLJIT_UNUSED_ARG(src2);
     SLJIT_UNUSED_ARG(src2w);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return SLJIT_ERR_UNSUPPORTED;
 }


@@ -1955,7 +2005,7 @@
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
 {
     SLJIT_UNUSED_ARG(compiler);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return NULL;
 }


@@ -1963,7 +2013,7 @@
 {
     SLJIT_UNUSED_ARG(compiler);
     SLJIT_UNUSED_ARG(type);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return NULL;
 }


@@ -1977,7 +2027,7 @@
     SLJIT_UNUSED_ARG(src1w);
     SLJIT_UNUSED_ARG(src2);
     SLJIT_UNUSED_ARG(src2w);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return NULL;
 }


@@ -1991,7 +2041,7 @@
     SLJIT_UNUSED_ARG(src1w);
     SLJIT_UNUSED_ARG(src2);
     SLJIT_UNUSED_ARG(src2w);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return NULL;
 }


@@ -1999,7 +2049,7 @@
 {
     SLJIT_UNUSED_ARG(jump);
     SLJIT_UNUSED_ARG(label);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
 }


 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target)
@@ -2006,7 +2056,7 @@
 {
     SLJIT_UNUSED_ARG(jump);
     SLJIT_UNUSED_ARG(target);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
 }


 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
@@ -2015,7 +2065,7 @@
     SLJIT_UNUSED_ARG(type);
     SLJIT_UNUSED_ARG(src);
     SLJIT_UNUSED_ARG(srcw);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return SLJIT_ERR_UNSUPPORTED;
 }


@@ -2031,7 +2081,7 @@
     SLJIT_UNUSED_ARG(src);
     SLJIT_UNUSED_ARG(srcw);
     SLJIT_UNUSED_ARG(type);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return SLJIT_ERR_UNSUPPORTED;
 }


@@ -2041,7 +2091,7 @@
     SLJIT_UNUSED_ARG(dst);
     SLJIT_UNUSED_ARG(dstw);
     SLJIT_UNUSED_ARG(offset);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return SLJIT_ERR_UNSUPPORTED;
 }


@@ -2051,22 +2101,24 @@
     SLJIT_UNUSED_ARG(dst);
     SLJIT_UNUSED_ARG(dstw);
     SLJIT_UNUSED_ARG(initval);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return NULL;
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
 {
     SLJIT_UNUSED_ARG(addr);
-    SLJIT_UNUSED_ARG(new_addr);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNUSED_ARG(new_target);
+    SLJIT_UNUSED_ARG(executable_offset);
+    SLJIT_UNREACHABLE();
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
 {
     SLJIT_UNUSED_ARG(addr);
     SLJIT_UNUSED_ARG(new_constant);
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNUSED_ARG(executable_offset);
+    SLJIT_UNREACHABLE();
 }


#endif

Modified: code/trunk/sljit/sljitLir.h
===================================================================
--- code/trunk/sljit/sljitLir.h    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/sljit/sljitLir.h    2017-01-23 07:41:37 UTC (rev 1676)
@@ -1,7 +1,7 @@
 /*
  *    Stack-less Just-In-Time compiler
  *
- *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *    Copyright Zoltan Herczeg (hzmester@???). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are
  * permitted provided that the following conditions are met:
@@ -325,7 +325,9 @@
     sljit_s32 local_size;
     /* Code size. */
     sljit_uw size;
-    /* For statistical purposes. */
+    /* Relative offset of the executable mapping from the writable mapping. */
+    sljit_uw executable_offset;
+    /* Executable size for statistical purposes. */
     sljit_uw executable_size;


 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
@@ -336,10 +338,6 @@
     sljit_s32 mode32;
 #endif


-#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
-    sljit_s32 flags_saved;
-#endif
-
 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
     /* Constant pool handling. */
     sljit_uw *cpool;
@@ -354,15 +352,8 @@
 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
     /* Temporary fields. */
     sljit_uw shift_imm;
-    sljit_s32 cache_arg;
-    sljit_sw cache_argw;
 #endif


-#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
-    sljit_s32 cache_arg;
-    sljit_sw cache_argw;
-#endif
-
 #if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
     sljit_s32 cache_arg;
     sljit_sw cache_argw;
@@ -397,6 +388,9 @@


 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \
         || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    /* Flags specified by the last arithmetic instruction.
+       It contains the type of the variable flag. */
+    sljit_s32 last_flags;
     /* Local size passed to the functions. */
     sljit_s32 logical_local_size;
 #endif
@@ -404,6 +398,7 @@
 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \
         || (defined SLJIT_DEBUG && SLJIT_DEBUG) \
         || (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+    /* Trust arguments when the API function is called. */
     sljit_s32 skip_checks;
 #endif
 };
@@ -457,10 +452,6 @@
 SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose);
 #endif


-/* Returns with non-zero if dynamic code modification is enabled. */
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_dyn_code_modification_enabled(void);
-
 /*
    Create executable code from the sljit instruction stream. This is the final step
    of the code generation so no more instructions can be added after this call.
@@ -473,13 +464,24 @@
 SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code);


/*
- After the machine code generation is finished we can retrieve the allocated
- executable memory size, although this area may not be fully filled with
- instructions depending on some optimizations. This function is useful only
- for statistical purposes.
+ When the protected executable allocator is used the JIT code is mapped
+ twice. The first mapping has read/write and the second mapping has read/exec
+ permissions. This function returns with the relative offset of the executable
+ mapping using the writable mapping as the base after the machine code is
+ successfully generated. The returned value is always 0 for the normal executable
+ allocator, since it uses only one mapping with read/write/exec permissions.
+ Dynamic code modifications requires this value.

    Before a successful code generation, this function returns with 0.
 */
+static SLJIT_INLINE sljit_sw sljit_get_executable_offset(struct sljit_compiler *compiler) { return compiler->executable_offset; }
+
+/*
+   The executable memory consumption of the generated code can be retrieved by
+   this function. The returned value can be used for statistical purposes.
+
+   Before a successful code generation, this function returns with 0.
+*/
 static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler *compiler) { return compiler->executable_size; }


 /* Instruction generation. Returns with any error code. If there is no
@@ -569,7 +571,7 @@
    and setting up a new stack frame would cost too much performance. However, it is still
    possible to return to the address of the caller (or anywhere else). */


-/* Note: flags are not changed (unlike sljit_emit_enter / sljit_emit_return). */
+/* Note: may destroy flags. */

 /* Note: although sljit_emit_fast_return could be replaced by an ijump, it is not suggested,
    since many architectures do clever branch prediction on call / return instruction pairs. */
@@ -638,58 +640,102 @@
 #define SLJIT_MEM2(r1, r2)    (SLJIT_MEM | (r1) | ((r2) << 8))
 #define SLJIT_IMM        0x40


-/* Set 32 bit operation mode (I) on 64 bit CPUs. This flag is ignored on 32
-   bit CPUs. When this flag is set for an arithmetic operation, only the
-   lower 32 bit of the input register(s) are used, and the CPU status flags
-   are set according to the 32 bit result. Although the higher 32 bit of
-   the input and the result registers are not defined by SLJIT, it might be
-   defined by the CPU architecture (e.g. MIPS). To satisfy these requirements
-   all source registers must be computed by operations where this flag is
-   also set. In other words 32 and 64 bit arithmetic operations cannot be
-   mixed. The only exception is SLJIT_IMOV and SLJIT_IMOVU whose source
-   register can hold any 32 or 64 bit value. This source register is
-   converted to a 32 bit compatible format. SLJIT does not generate any
-   instructions on certain CPUs (e.g. on x86 and ARM) if the source and
-   destination operands are the same registers. Affects sljit_emit_op0,
-   sljit_emit_op1 and sljit_emit_op2. */
+/* Set 32 bit operation mode (I) on 64 bit CPUs. This option is ignored on
+   32 bit CPUs. When this option is set for an arithmetic operation, only
+   the lower 32 bit of the input registers are used, and the CPU status
+   flags are set according to the 32 bit result. Although the higher 32 bit
+   of the input and the result registers are not defined by SLJIT, it might
+   be defined by the CPU architecture (e.g. MIPS). To satisfy these CPU
+   requirements all source registers must be the result of those operations
+   where this option was also set. Memory loads read 32 bit values rather
+   than 64 bit ones. In other words 32 bit and 64 bit operations cannot
+   be mixed. The only exception is SLJIT_MOV32 and SLJIT_MOVU32 whose source
+   register can hold any 32 or 64 bit value, and it is converted to a 32 bit
+   compatible format first. This conversion is free (no instructions are
+   emitted) on most CPUs. A 32 bit value can also be coverted to a 64 bit
+   value by SLJIT_MOV_S32 (sign extension) or SLJIT_MOV_U32 (zero extension).
+
+   Note: memory addressing always uses 64 bit values on 64 bit systems so
+         the result of a 32 bit operation must not be used with SLJIT_MEMx
+         macros.
+
+   This option is part of the instruction name, so there is no need to
+   manually set it. E.g:
+
+     SLJIT_ADD32 == (SLJIT_ADD | SLJIT_I32_OP) */
 #define SLJIT_I32_OP        0x100


-/* F32 precision mode (SP). This flag is similar to SLJIT_I32_OP, just
-   it applies to floating point registers (it is even the same bit). When
-   this flag is passed, the CPU performs 32 bit floating point operations.
-   Similar to SLJIT_I32_OP, all register arguments must be computed by
-   floating point operations where this flag is also set. Affects
-   sljit_emit_fop1, sljit_emit_fop2 and sljit_emit_fcmp. */
-#define SLJIT_F32_OP        0x100
+/* Set F32 (single) precision mode for floating-point computation. This
+   option is similar to SLJIT_I32_OP, it just applies to floating point
+   registers. When this option is passed, the CPU performs 32 bit floating
+   point operations, rather than 64 bit one. Similar to SLJIT_I32_OP, all
+   register arguments must be the result of those operations where this
+   option was also set.


-/* Common CPU status flags for all architectures (x86, ARM, PPC)
-    - carry flag
-    - overflow flag
-    - zero flag
-    - negative/positive flag (depends on arc)
-   On mips, these flags are emulated by software. */
+   This option is part of the instruction name, so there is no need to
+   manually set it. E.g:


-/* By default, the instructions may, or may not set the CPU status flags.
-   Forcing to set or keep status flags can be done with the following flags: */
+     SLJIT_MOV_F32 = (SLJIT_MOV_F64 | SLJIT_F32_OP)
+ */
+#define SLJIT_F32_OP        SLJIT_I32_OP


-/* Note: sljit tries to emit the minimum number of instructions. Using these
- flags can increase them, so use them wisely to avoid unnecessary code generation. */
+/* Many CPUs (x86, ARM, PPC) has status flags which can be set according
+ to the result of an operation. Other CPUs (MIPS) does not have status
+ flags, and results must be stored in registers. To cover both architecture
+ types efficiently only two flags are defined by SLJIT:

-/* Set Equal (Zero) status flag (E). */
-#define SLJIT_SET_E            0x0200
-/* Set unsigned status flag (U). */
-#define SLJIT_SET_U            0x0400
-/* Set signed status flag (S). */
-#define SLJIT_SET_S            0x0800
-/* Set signed overflow flag (O). */
-#define SLJIT_SET_O            0x1000
-/* Set carry flag (C).
-   Note: Kinda unsigned overflow, but behaves differently on various cpus. */
-#define SLJIT_SET_C            0x2000
-/* Do not modify the flags (K).
-   Note: This flag cannot be combined with any other SLJIT_SET_* flag. */
-#define SLJIT_KEEP_FLAGS        0x4000
+    * Zero (equal) flag: it is set if the result is zero
+    * Variable flag: its value is defined by the last arithmetic operation


+   SLJIT instructions can set any or both of these flags. The value of
+   these flags is undefined if the instruction does not specify their value.
+   The description of each instruction contains the list of allowed flag
+   types.
+
+   Example: SLJIT_ADD can set the Z, OVERFLOW, CARRY flags hence
+
+     sljit_op2(..., SLJIT_ADD, ...)
+       Both the zero and variable flags are undefined so their
+       they hold a random value after the operation is completed.
+
+     sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z, ...)
+       Sets the zero flag if the result is zero, clears it otherwise.
+       The variable flag is undefined.
+
+     sljit_op2(..., SLJIT_ADD | SLJIT_SET_OVERFLOW, ...)
+       Sets the variable flag if an integer overflow occurs, clears
+       it otherwise. The zero flag is undefined.
+
+     sljit_op2(..., SLJIT_ADD | SLJIT_SET_NOT_OVERFLOW, ...)
+       Sets the variable flag if an integer overflow does NOT occur,
+       clears it otherwise. The zero flag is undefined.
+
+     sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z | SLJIT_SET_CARRY, ...)
+       Sets the zero flag if the result is zero, clears it otherwise.
+       Sets the variable flag if unsigned overflow (carry) occurs,
+       clears it otherwise.
+
+   If an instruction (e.g. SLJIT_MOV) does not modify flags the flags are
+   unchanged.
+
+   Using these flags can reduce the number of emitted instructions. E.g. a
+   fast loop can be implemented by decreasing a counter register and set the
+   zero flag to jump back if the counter register is not reached zero.
+
+   Motivation: although CPUs can set a large number of flags, usually their
+   values are ignored or only one of them is used. Emulating a large number
+   of flags on systems without flag register is complicated so SLJIT
+   instructions must specify the flag they want to use and only that flag
+   will be emulated. The last arithmetic instruction can be repeated if
+   multiple flags needs to be checked.
+*/
+
+/* Set Zero status flag. */
+#define SLJIT_SET_Z            0x0200
+/* Set the variable status flag if condition is true.
+   See comparison types. */
+#define SLJIT_SET(condition)            ((condition) << 10)
+
 /* Notes:
      - you cannot postpone conditional jump instructions except if noted that
        the instruction does not set flags (See: SLJIT_KEEP_FLAGS).
@@ -698,11 +744,11 @@
 /* Starting index of opcodes for sljit_emit_op0. */
 #define SLJIT_OP0_BASE            0


-/* Flags: - (never set any flags)
+/* Flags: - (does not modify flags)
    Note: breakpoint instruction is not supported by all architectures (e.g. ppc)
          It falls back to SLJIT_NOP in those cases. */
 #define SLJIT_BREAKPOINT        (SLJIT_OP0_BASE + 0)
-/* Flags: - (never set any flags)
+/* Flags: - (does not modify flags)
    Note: may or may not cause an extra cycle wait
          it can even decrease the runtime in a few cases. */
 #define SLJIT_NOP            (SLJIT_OP0_BASE + 1)
@@ -714,13 +760,13 @@
    Signed multiplication of SLJIT_R0 and SLJIT_R1.
    Result is placed into SLJIT_R1:SLJIT_R0 (high:low) word */
 #define SLJIT_LMUL_SW            (SLJIT_OP0_BASE + 3)
-/* Flags: I - (may destroy flags)
+/* Flags: - (may destroy flags)
    Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1.
    The result is placed into SLJIT_R0 and the remainder into SLJIT_R1.
    Note: if SLJIT_R1 is 0, the behaviour is undefined. */
 #define SLJIT_DIVMOD_UW            (SLJIT_OP0_BASE + 4)
 #define SLJIT_DIVMOD_U32        (SLJIT_DIVMOD_UW | SLJIT_I32_OP)
-/* Flags: I - (may destroy flags)
+/* Flags: - (may destroy flags)
    Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1.
    The result is placed into SLJIT_R0 and the remainder into SLJIT_R1.
    Note: if SLJIT_R1 is 0, the behaviour is undefined.
@@ -728,13 +774,13 @@
          the behaviour is undefined. */
 #define SLJIT_DIVMOD_SW            (SLJIT_OP0_BASE + 5)
 #define SLJIT_DIVMOD_S32        (SLJIT_DIVMOD_SW | SLJIT_I32_OP)
-/* Flags: I - (may destroy flags)
+/* Flags: - (may destroy flags)
    Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1.
    The result is placed into SLJIT_R0. SLJIT_R1 preserves its value.
    Note: if SLJIT_R1 is 0, the behaviour is undefined. */
 #define SLJIT_DIV_UW            (SLJIT_OP0_BASE + 6)
 #define SLJIT_DIV_U32            (SLJIT_DIV_UW | SLJIT_I32_OP)
-/* Flags: I - (may destroy flags)
+/* Flags: - (may destroy flags)
    Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1.
    The result is placed into SLJIT_R0. SLJIT_R1 preserves its value.
    Note: if SLJIT_R1 is 0, the behaviour is undefined.
@@ -748,75 +794,104 @@
 /* Starting index of opcodes for sljit_emit_op1. */
 #define SLJIT_OP1_BASE            32


-/* Notes for MOV instructions:
-   U = Mov with update (pre form). If source or destination defined as SLJIT_MEM1(r1)
-       or SLJIT_MEM2(r1, r2), r1 is increased by the sum of r2 and the constant argument
-   UB = unsigned byte (8 bit)
-   SB = signed byte (8 bit)
-   UH = unsigned half (16 bit)
-   SH = signed half (16 bit)
-   UI = unsigned int (32 bit)
-   SI = signed int (32 bit)
-   P  = pointer (sljit_p) size */
+/* The MOV instruction transfer data from source to destination.


-/* Flags: - (never set any flags) */
+   MOV instruction suffixes:
+
+   U8  - unsigned 8 bit data transfer
+   S8  - signed 8 bit data transfer
+   U16 - unsigned 16 bit data transfer
+   S16 - signed 16 bit data transfer
+   U32 - unsigned int (32 bit) data transfer
+   S32 - signed int (32 bit) data transfer
+   P   - pointer (sljit_p) data transfer
+
+   U = move with update (pre form). If source or destination defined as
+       SLJIT_MEM1(r1) or SLJIT_MEM2(r1, r2), r1 is increased by the
+       offset part of the address.
+
+   Register arguments and base registers can only be used once for move
+   with update instructions. The shift value of SLJIT_MEM2 addressing
+   mode must also be 0. Reason: SLJIT_MOVU instructions are expected to
+   be in high-performance loops where complex instruction emulation
+   would be too costly.
+
+   Examples for invalid move with update instructions:
+
+   sljit_emit_op1(..., SLJIT_MOVU_U8,
+       SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), 8);
+   sljit_emit_op1(..., SLJIT_MOVU_U8,
+       SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0, SLJIT_R0, 0);
+   sljit_emit_op1(..., SLJIT_MOVU_U8,
+       SLJIT_MEM2(SLJIT_R0, SLJIT_R1), 0, SLJIT_MEM1(SLJIT_R0), 8);
+   sljit_emit_op1(..., SLJIT_MOVU_U8,
+       SLJIT_MEM2(SLJIT_R0, SLJIT_R1), 0, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0);
+   sljit_emit_op1(..., SLJIT_MOVU_U8,
+       SLJIT_R2, 0, SLJIT_MEM2(SLJIT_R0, SLJIT_R1), 1);
+
+   The following example is valid, since only the offset register is
+   used multiple times:
+
+   sljit_emit_op1(..., SLJIT_MOVU_U8,
+       SLJIT_MEM2(SLJIT_R0, SLJIT_R2), 0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 0);
+*/
+
+/* Flags: - (does not modify flags) */
 #define SLJIT_MOV            (SLJIT_OP1_BASE + 0)
-/* Flags: I - (never set any flags) */
+/* Flags: - (does not modify flags) */
 #define SLJIT_MOV_U8            (SLJIT_OP1_BASE + 1)
 #define SLJIT_MOV32_U8            (SLJIT_MOV_U8 | SLJIT_I32_OP)
-/* Flags: I - (never set any flags) */
+/* Flags: - (does not modify flags) */
 #define SLJIT_MOV_S8            (SLJIT_OP1_BASE + 2)
 #define SLJIT_MOV32_S8            (SLJIT_MOV_S8 | SLJIT_I32_OP)
-/* Flags: I - (never set any flags) */
+/* Flags: - (does not modify flags) */
 #define SLJIT_MOV_U16            (SLJIT_OP1_BASE + 3)
 #define SLJIT_MOV32_U16            (SLJIT_MOV_U16 | SLJIT_I32_OP)
-/* Flags: I - (never set any flags) */
+/* Flags: - (does not modify flags) */
 #define SLJIT_MOV_S16            (SLJIT_OP1_BASE + 4)
 #define SLJIT_MOV32_S16            (SLJIT_MOV_S16 | SLJIT_I32_OP)
-/* Flags: I - (never set any flags)
+/* Flags: - (does not modify flags)
    Note: no SLJIT_MOV32_U32 form, since it is the same as SLJIT_MOV32 */
 #define SLJIT_MOV_U32            (SLJIT_OP1_BASE + 5)
-/* Flags: I - (never set any flags)
+/* Flags: - (does not modify flags)
    Note: no SLJIT_MOV32_S32 form, since it is the same as SLJIT_MOV32 */
 #define SLJIT_MOV_S32            (SLJIT_OP1_BASE + 6)
-/* Flags: I - (never set any flags) */
+/* Flags: - (does not modify flags) */
 #define SLJIT_MOV32            (SLJIT_MOV_S32 | SLJIT_I32_OP)
-/* Flags: - (never set any flags) */
+/* Flags: - (does not modify flags) */
 #define SLJIT_MOV_P            (SLJIT_OP1_BASE + 7)
-/* Flags: - (never set any flags) */
+/* Flags: - (may destroy flags) */
 #define SLJIT_MOVU            (SLJIT_OP1_BASE + 8)
-/* Flags: I - (never set any flags) */
+/* Flags: - (may destroy flags) */
 #define SLJIT_MOVU_U8            (SLJIT_OP1_BASE + 9)
 #define SLJIT_MOVU32_U8            (SLJIT_MOVU_U8 | SLJIT_I32_OP)
-/* Flags: I - (never set any flags) */
+/* Flags: - (may destroy flags) */
 #define SLJIT_MOVU_S8            (SLJIT_OP1_BASE + 10)
 #define SLJIT_MOVU32_S8            (SLJIT_MOVU_S8 | SLJIT_I32_OP)
-/* Flags: I - (never set any flags) */
+/* Flags: - (may destroy flags) */
 #define SLJIT_MOVU_U16            (SLJIT_OP1_BASE + 11)
 #define SLJIT_MOVU32_U16            (SLJIT_MOVU_U16 | SLJIT_I32_OP)
-/* Flags: I - (never set any flags) */
+/* Flags: - (may destroy flags) */
 #define SLJIT_MOVU_S16            (SLJIT_OP1_BASE + 12)
 #define SLJIT_MOVU32_S16        (SLJIT_MOVU_S16 | SLJIT_I32_OP)
-/* Flags: I - (never set any flags)
+/* Flags: - (may destroy flags)
    Note: no SLJIT_MOVU32_U32 form, since it is the same as SLJIT_MOVU32 */
 #define SLJIT_MOVU_U32            (SLJIT_OP1_BASE + 13)
-/* Flags: I - (never set any flags)
+/* Flags: - (may destroy flags)
    Note: no SLJIT_MOVU32_S32 form, since it is the same as SLJIT_MOVU32 */
 #define SLJIT_MOVU_S32            (SLJIT_OP1_BASE + 14)
-/* Flags: I - (never set any flags) */
+/* Flags: - (may destroy flags) */
 #define SLJIT_MOVU32            (SLJIT_MOVU_S32 | SLJIT_I32_OP)
-/* Flags: - (never set any flags) */
+/* Flags: - (may destroy flags) */
 #define SLJIT_MOVU_P            (SLJIT_OP1_BASE + 15)
-/* Flags: I | E | K */
+/* Flags: Z */
 #define SLJIT_NOT            (SLJIT_OP1_BASE + 16)
 #define SLJIT_NOT32            (SLJIT_NOT | SLJIT_I32_OP)
-/* Flags: I | E | O | K */
+/* Flags: Z | OVERFLOW */
 #define SLJIT_NEG            (SLJIT_OP1_BASE + 17)
 #define SLJIT_NEG32            (SLJIT_NEG | SLJIT_I32_OP)
 /* Count leading zeroes
-   Flags: I | E | K
-   Important note! Sparc 32 does not support K flag, since
-   the required popc instruction is introduced only in sparc 64. */
+   Flags: Z */
 #define SLJIT_CLZ            (SLJIT_OP1_BASE + 18)
 #define SLJIT_CLZ32            (SLJIT_CLZ | SLJIT_I32_OP)


@@ -827,32 +902,34 @@
 /* Starting index of opcodes for sljit_emit_op2. */
 #define SLJIT_OP2_BASE            96


-/* Flags: I | E | O | C | K */
+/* Flags: Z | OVERFLOW | CARRY */
 #define SLJIT_ADD            (SLJIT_OP2_BASE + 0)
 #define SLJIT_ADD32            (SLJIT_ADD | SLJIT_I32_OP)
-/* Flags: I | C | K */
+/* Flags: CARRY */
 #define SLJIT_ADDC            (SLJIT_OP2_BASE + 1)
 #define SLJIT_ADDC32            (SLJIT_ADDC | SLJIT_I32_OP)
-/* Flags: I | E | U | S | O | C | K */
+/* Flags: Z | LESS | GREATER_EQUAL | GREATER | LESS_EQUAL
+          SIG_LESS | SIG_GREATER_EQUAL | SIG_GREATER
+          SIG_LESS_EQUAL | CARRY */
 #define SLJIT_SUB            (SLJIT_OP2_BASE + 2)
 #define SLJIT_SUB32            (SLJIT_SUB | SLJIT_I32_OP)
-/* Flags: I | C | K */
+/* Flags: CARRY */
 #define SLJIT_SUBC            (SLJIT_OP2_BASE + 3)
 #define SLJIT_SUBC32            (SLJIT_SUBC | SLJIT_I32_OP)
 /* Note: integer mul
-   Flags: I | O (see SLJIT_C_MUL_*) | K */
+   Flags: MUL_OVERFLOW */
 #define SLJIT_MUL            (SLJIT_OP2_BASE + 4)
 #define SLJIT_MUL32            (SLJIT_MUL | SLJIT_I32_OP)
-/* Flags: I | E | K */
+/* Flags: Z */
 #define SLJIT_AND            (SLJIT_OP2_BASE + 5)
 #define SLJIT_AND32            (SLJIT_AND | SLJIT_I32_OP)
-/* Flags: I | E | K */
+/* Flags: Z */
 #define SLJIT_OR            (SLJIT_OP2_BASE + 6)
 #define SLJIT_OR32            (SLJIT_OR | SLJIT_I32_OP)
-/* Flags: I | E | K */
+/* Flags: Z */
 #define SLJIT_XOR            (SLJIT_OP2_BASE + 7)
 #define SLJIT_XOR32            (SLJIT_XOR | SLJIT_I32_OP)
-/* Flags: I | E | K
+/* Flags: Z
    Let bit_length be the length of the shift operation: 32 or 64.
    If src2 is immediate, src2w is masked by (bit_length - 1).
    Otherwise, if the content of src2 is outside the range from 0
@@ -859,7 +936,7 @@
    to bit_length - 1, the result is undefined. */
 #define SLJIT_SHL            (SLJIT_OP2_BASE + 8)
 #define SLJIT_SHL32            (SLJIT_SHL | SLJIT_I32_OP)
-/* Flags: I | E | K
+/* Flags: Z
    Let bit_length be the length of the shift operation: 32 or 64.
    If src2 is immediate, src2w is masked by (bit_length - 1).
    Otherwise, if the content of src2 is outside the range from 0
@@ -866,7 +943,7 @@
    to bit_length - 1, the result is undefined. */
 #define SLJIT_LSHR            (SLJIT_OP2_BASE + 9)
 #define SLJIT_LSHR32            (SLJIT_LSHR | SLJIT_I32_OP)
-/* Flags: I | E | K
+/* Flags: Z
    Let bit_length be the length of the shift operation: 32 or 64.
    If src2 is immediate, src2w is masked by (bit_length - 1).
    Otherwise, if the content of src2 is outside the range from 0
@@ -886,37 +963,35 @@
 /* Starting index of opcodes for sljit_emit_fop1. */
 #define SLJIT_FOP1_BASE            128


-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
 #define SLJIT_MOV_F64            (SLJIT_FOP1_BASE + 0)
 #define SLJIT_MOV_F32            (SLJIT_MOV_F64 | SLJIT_F32_OP)
 /* Convert opcodes: CONV[DST_TYPE].FROM[SRC_TYPE]
    SRC/DST TYPE can be: D - double, S - single, W - signed word, I - signed int
    Rounding mode when the destination is W or I: round towards zero. */
-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
 #define SLJIT_CONV_F64_FROM_F32        (SLJIT_FOP1_BASE + 1)
 #define SLJIT_CONV_F32_FROM_F64        (SLJIT_CONV_F64_FROM_F32 | SLJIT_F32_OP)
-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
 #define SLJIT_CONV_SW_FROM_F64        (SLJIT_FOP1_BASE + 2)
 #define SLJIT_CONV_SW_FROM_F32        (SLJIT_CONV_SW_FROM_F64 | SLJIT_F32_OP)
-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
 #define SLJIT_CONV_S32_FROM_F64        (SLJIT_FOP1_BASE + 3)
 #define SLJIT_CONV_S32_FROM_F32        (SLJIT_CONV_S32_FROM_F64 | SLJIT_F32_OP)
-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
 #define SLJIT_CONV_F64_FROM_SW        (SLJIT_FOP1_BASE + 4)
 #define SLJIT_CONV_F32_FROM_SW        (SLJIT_CONV_F64_FROM_SW | SLJIT_F32_OP)
-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
 #define SLJIT_CONV_F64_FROM_S32        (SLJIT_FOP1_BASE + 5)
 #define SLJIT_CONV_F32_FROM_S32        (SLJIT_CONV_F64_FROM_S32 | SLJIT_F32_OP)
 /* Note: dst is the left and src is the right operand for SLJIT_CMPD.
-   Note: NaN check is always performed. If SLJIT_C_FLOAT_UNORDERED flag
-         is set, the comparison result is unpredictable.
-   Flags: SP | E | S (see SLJIT_C_FLOAT_*) */
+   Flags: EQUAL_F | LESS_F | GREATER_EQUAL_F | GREATER_F | LESS_EQUAL_F */
 #define SLJIT_CMP_F64            (SLJIT_FOP1_BASE + 6)
 #define SLJIT_CMP_F32            (SLJIT_CMP_F64 | SLJIT_F32_OP)
-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
 #define SLJIT_NEG_F64            (SLJIT_FOP1_BASE + 7)
 #define SLJIT_NEG_F32            (SLJIT_NEG_F64 | SLJIT_F32_OP)
-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
 #define SLJIT_ABS_F64            (SLJIT_FOP1_BASE + 8)
 #define SLJIT_ABS_F32            (SLJIT_ABS_F64 | SLJIT_F32_OP)


@@ -927,16 +1002,16 @@
 /* Starting index of opcodes for sljit_emit_fop2. */
 #define SLJIT_FOP2_BASE            160


-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
 #define SLJIT_ADD_F64            (SLJIT_FOP2_BASE + 0)
 #define SLJIT_ADD_F32            (SLJIT_ADD_F64 | SLJIT_F32_OP)
-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
 #define SLJIT_SUB_F64            (SLJIT_FOP2_BASE + 1)
 #define SLJIT_SUB_F32            (SLJIT_SUB_F64 | SLJIT_F32_OP)
-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
 #define SLJIT_MUL_F64            (SLJIT_FOP2_BASE + 2)
 #define SLJIT_MUL_F32            (SLJIT_MUL_F64 | SLJIT_F32_OP)
-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
 #define SLJIT_DIV_F64            (SLJIT_FOP2_BASE + 3)
 #define SLJIT_DIV_F32            (SLJIT_DIV_F64 | SLJIT_F32_OP)


@@ -963,56 +1038,79 @@

 #define SLJIT_LESS            2
 #define SLJIT_LESS32            (SLJIT_LESS | SLJIT_I32_OP)
+#define SLJIT_SET_LESS            SLJIT_SET(SLJIT_LESS)
 #define SLJIT_GREATER_EQUAL        3
 #define SLJIT_GREATER_EQUAL32        (SLJIT_GREATER_EQUAL | SLJIT_I32_OP)
+#define SLJIT_SET_GREATER_EQUAL        SLJIT_SET(SLJIT_GREATER_EQUAL)
 #define SLJIT_GREATER            4
 #define SLJIT_GREATER32            (SLJIT_GREATER | SLJIT_I32_OP)
+#define SLJIT_SET_GREATER        SLJIT_SET(SLJIT_GREATER)
 #define SLJIT_LESS_EQUAL        5
 #define SLJIT_LESS_EQUAL32        (SLJIT_LESS_EQUAL | SLJIT_I32_OP)
+#define SLJIT_SET_LESS_EQUAL        SLJIT_SET(SLJIT_LESS_EQUAL)
 #define SLJIT_SIG_LESS            6
 #define SLJIT_SIG_LESS32        (SLJIT_SIG_LESS | SLJIT_I32_OP)
+#define SLJIT_SET_SIG_LESS        SLJIT_SET(SLJIT_SIG_LESS)
 #define SLJIT_SIG_GREATER_EQUAL        7
 #define SLJIT_SIG_GREATER_EQUAL32    (SLJIT_SIG_GREATER_EQUAL | SLJIT_I32_OP)
+#define SLJIT_SET_SIG_GREATER_EQUAL    SLJIT_SET(SLJIT_SET_SIG_GREATER_EQUAL)
 #define SLJIT_SIG_GREATER        8
 #define SLJIT_SIG_GREATER32        (SLJIT_SIG_GREATER | SLJIT_I32_OP)
+#define SLJIT_SET_SIG_GREATER        SLJIT_SET(SLJIT_SIG_GREATER)
 #define SLJIT_SIG_LESS_EQUAL        9
 #define SLJIT_SIG_LESS_EQUAL32        (SLJIT_SIG_LESS_EQUAL | SLJIT_I32_OP)
+#define SLJIT_SET_SIG_LESS_EQUAL    SLJIT_SET(SLJIT_SIG_LESS_EQUAL)


 #define SLJIT_OVERFLOW            10
 #define SLJIT_OVERFLOW32        (SLJIT_OVERFLOW | SLJIT_I32_OP)
+#define SLJIT_SET_OVERFLOW        SLJIT_SET(SLJIT_OVERFLOW)
 #define SLJIT_NOT_OVERFLOW        11
 #define SLJIT_NOT_OVERFLOW32        (SLJIT_NOT_OVERFLOW | SLJIT_I32_OP)
+#define SLJIT_SET_NOT_OVERFLOW        SLJIT_SET(SLJIT_NOT_OVERFLOW)


 #define SLJIT_MUL_OVERFLOW        12
 #define SLJIT_MUL_OVERFLOW32        (SLJIT_MUL_OVERFLOW | SLJIT_I32_OP)
+#define SLJIT_SET_MUL_OVERFLOW        SLJIT_SET(SLJIT_MUL_OVERFLOW)
 #define SLJIT_MUL_NOT_OVERFLOW        13
 #define SLJIT_MUL_NOT_OVERFLOW32    (SLJIT_MUL_NOT_OVERFLOW | SLJIT_I32_OP)
+#define SLJIT_SET_MUL_NOT_OVERFLOW    SLJIT_SET(SLJIT_MUL_NOT_OVERFLOW)


+/* There is no SLJIT_CARRY or SLJIT_NOT_CARRY. */
+#define SLJIT_SET_CARRY            SLJIT_SET(14)
+
 /* Floating point comparison types. */
-#define SLJIT_EQUAL_F64            14
+#define SLJIT_EQUAL_F64            16
 #define SLJIT_EQUAL_F32            (SLJIT_EQUAL_F64 | SLJIT_F32_OP)
-#define SLJIT_NOT_EQUAL_F64        15
+#define SLJIT_SET_EQUAL_F        SLJIT_SET(SLJIT_EQUAL_F64)
+#define SLJIT_NOT_EQUAL_F64        17
 #define SLJIT_NOT_EQUAL_F32        (SLJIT_NOT_EQUAL_F64 | SLJIT_F32_OP)
-#define SLJIT_LESS_F64            16
+#define SLJIT_SET_NOT_EQUAL_F        SLJIT_SET(SLJIT_NOT_EQUAL_F64)
+#define SLJIT_LESS_F64            18
 #define SLJIT_LESS_F32            (SLJIT_LESS_F64 | SLJIT_F32_OP)
-#define SLJIT_GREATER_EQUAL_F64        17
+#define SLJIT_SET_LESS_F        SLJIT_SET(SLJIT_LESS_F64)
+#define SLJIT_GREATER_EQUAL_F64        19
 #define SLJIT_GREATER_EQUAL_F32        (SLJIT_GREATER_EQUAL_F64 | SLJIT_F32_OP)
-#define SLJIT_GREATER_F64        18
+#define SLJIT_SET_GREATER_EQUAL_F    SLJIT_SET(SLJIT_GREATER_EQUAL_F64)
+#define SLJIT_GREATER_F64        20
 #define SLJIT_GREATER_F32        (SLJIT_GREATER_F64 | SLJIT_F32_OP)
-#define SLJIT_LESS_EQUAL_F64        19
+#define SLJIT_SET_GREATER_F        SLJIT_SET(SLJIT_GREATER_F64)
+#define SLJIT_LESS_EQUAL_F64        21
 #define SLJIT_LESS_EQUAL_F32        (SLJIT_LESS_EQUAL_F64 | SLJIT_F32_OP)
-#define SLJIT_UNORDERED_F64        20
+#define SLJIT_SET_LESS_EQUAL_F        SLJIT_SET(SLJIT_LESS_EQUAL_F64)
+#define SLJIT_UNORDERED_F64        22
 #define SLJIT_UNORDERED_F32        (SLJIT_UNORDERED_F64 | SLJIT_F32_OP)
-#define SLJIT_ORDERED_F64        21
+#define SLJIT_SET_UNORDERED_F        SLJIT_SET(SLJIT_UNORDERED_F64)
+#define SLJIT_ORDERED_F64        23
 #define SLJIT_ORDERED_F32        (SLJIT_ORDERED_F64 | SLJIT_F32_OP)
+#define SLJIT_SET_ORDERED_F        SLJIT_SET(SLJIT_ORDERED_F64)


 /* Unconditional jump types. */
-#define SLJIT_JUMP            22
-#define SLJIT_FAST_CALL            23
-#define SLJIT_CALL0            24
-#define SLJIT_CALL1            25
-#define SLJIT_CALL2            26
-#define SLJIT_CALL3            27
+#define SLJIT_JUMP            24
+#define SLJIT_FAST_CALL            25
+#define SLJIT_CALL0            26
+#define SLJIT_CALL1            27
+#define SLJIT_CALL2            28
+#define SLJIT_CALL3            29


/* Fast calling method. See sljit_emit_fast_enter / sljit_emit_fast_return. */

@@ -1022,8 +1120,9 @@
 /* Emit a jump instruction. The destination is not set, only the type of the jump.
     type must be between SLJIT_EQUAL and SLJIT_CALL3
     type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP
-   Flags: - (never set any flags) for both conditional and unconditional jumps.
-   Flags: destroy all flags for calls. */
+
+   Flags: does not modify flags for conditional and unconditional
+          jumps but destroy all flags for calls. */
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type);


 /* Basic arithmetic comparison. In most architectures it is implemented as
@@ -1033,7 +1132,7 @@
    It is suggested to use this comparison form when appropriate.
     type must be between SLJIT_EQUAL and SLJIT_I_SIG_LESS_EQUAL
     type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP
-   Flags: destroy flags. */
+   Flags: may destroy flags. */
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type,
     sljit_s32 src1, sljit_sw src1w,
     sljit_s32 src2, sljit_sw src2w);
@@ -1061,8 +1160,9 @@
     type must be between SLJIT_JUMP and SLJIT_CALL3
     Direct form: set src to SLJIT_IMM() and srcw to the address
     Indirect form: any other valid addressing mode
-   Flags: - (never set any flags) for unconditional jumps.
-   Flags: destroy all flags for calls. */
+
+   Flags: does not modify flags for unconditional jumps but
+          destroy all flags for calls. */
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw);


 /* Perform the operation using the conditional flags as the second argument.
@@ -1073,12 +1173,12 @@
    If op == SLJIT_MOV, SLJIT_MOV_S32, SLJIT_MOV_U32:
      Set dst to the value represented by the type (0 or 1).
      Src must be SLJIT_UNUSED, and srcw must be 0
-     Flags: - (never set any flags)
+     Flags: - (does not modify flags)
    If op == SLJIT_OR, op == SLJIT_AND, op == SLJIT_XOR
      Performs the binary operation using src as the first, and the value
      represented by type as the second argument.
      Important note: only dst=src and dstw=srcw is supported at the moment!
-     Flags: I | E | K
+     Flags: Z (may destroy flags)
    Note: sljit_emit_op_flags does nothing, if dst is SLJIT_UNUSED (regardless of op). */
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
     sljit_s32 dst, sljit_sw dstw,
@@ -1086,11 +1186,11 @@
     sljit_s32 type);


/* Copies the base address of SLJIT_SP + offset to dst.
- Flags: - (never set any flags) */
+ Flags: - (may destroy flags) */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset);

/* The constant can be changed runtime (see: sljit_set_const)
- Flags: - (never set any flags) */
+ Flags: - (does not modify flags) */
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value);

/* After the code generation the address for label, jump and const instructions
@@ -1100,9 +1200,10 @@
static SLJIT_INLINE sljit_uw sljit_get_jump_addr(struct sljit_jump *jump) { return jump->addr; }
static SLJIT_INLINE sljit_uw sljit_get_const_addr(struct sljit_const *const_) { return const_->addr; }

-/* Only the address is required to rewrite the code. */
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr);
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant);
+/* Only the address and executable offset are required to perform dynamic
+ code modifications. See sljit_get_executable_offset function. */
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset);
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset);

 /* --------------------------------------------------------------------- */
 /*  Miscellaneous utility functions                                      */
@@ -1246,7 +1347,7 @@
     type must be between SLJIT_EQUAL and SLJIT_S_ORDERED
     dst_reg must be a valid register and it can be combined
       with SLJIT_I32_OP to perform 32 bit arithmetic
-   Flags: I - (never set any flags)
+   Flags: - (does not modify flags)
  */


SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_emit_cmov(struct sljit_compiler *compiler,

Modified: code/trunk/sljit/sljitNativeARM_32.c
===================================================================
--- code/trunk/sljit/sljitNativeARM_32.c    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/sljit/sljitNativeARM_32.c    2017-01-23 07:41:37 UTC (rev 1676)
@@ -1,7 +1,7 @@
 /*
  *    Stack-less Just-In-Time compiler
  *
- *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *    Copyright Zoltan Herczeg (hzmester@???). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are
  * permitted provided that the following conditions are met:
@@ -38,8 +38,7 @@
 /* Last register + 1. */
 #define TMP_REG1    (SLJIT_NUMBER_OF_REGISTERS + 2)
 #define TMP_REG2    (SLJIT_NUMBER_OF_REGISTERS + 3)
-#define TMP_REG3    (SLJIT_NUMBER_OF_REGISTERS + 4)
-#define TMP_PC        (SLJIT_NUMBER_OF_REGISTERS + 5)
+#define TMP_PC        (SLJIT_NUMBER_OF_REGISTERS + 4)


 #define TMP_FREG1    (0)
 #define TMP_FREG2    (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
@@ -55,8 +54,8 @@
     (((max_diff) / (sljit_s32)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1))


 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
-static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
-    0, 0, 1, 2, 11, 10, 9, 8, 7, 6, 5, 4, 13, 3, 12, 14, 15
+static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
+    0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 14, 12, 15
 };


 #define RM(rm) (reg_map[rm])
@@ -260,7 +259,7 @@
 {
     /* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */
     SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092));
-    return push_inst(compiler, BLX | RM(TMP_REG1));
+    return push_inst(compiler, BLX | RM(TMP_REG2));
 }


static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size)
@@ -389,7 +388,7 @@

#endif

-static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code)
+static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code, sljit_sw executable_offset)
 {
     sljit_sw diff;


@@ -401,7 +400,7 @@
         code_ptr--;


     if (jump->flags & JUMP_ADDR)
-        diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2));
+        diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset);
     else {
         SLJIT_ASSERT(jump->flags & JUMP_LABEL);
         diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2));
@@ -426,7 +425,7 @@
     }
 #else
     if (jump->flags & JUMP_ADDR)
-        diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr);
+        diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr - executable_offset);
     else {
         SLJIT_ASSERT(jump->flags & JUMP_LABEL);
         diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)code_ptr);
@@ -446,26 +445,28 @@
     return 0;
 }


-static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr, sljit_s32 flush)
+static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw executable_offset, sljit_uw new_addr, sljit_s32 flush_cache)
 {
 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
-    sljit_uw *ptr = (sljit_uw*)addr;
-    sljit_uw *inst = (sljit_uw*)ptr[0];
+    sljit_uw *ptr = (sljit_uw *)jump_ptr;
+    sljit_uw *inst = (sljit_uw *)ptr[0];
     sljit_uw mov_pc = ptr[1];
     sljit_s32 bl = (mov_pc & 0x0000f000) != RD(TMP_PC);
-    sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2)) >> 2);
+    sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2) - executable_offset) >> 2);


     if (diff <= 0x7fffff && diff >= -0x800000) {
         /* Turn to branch. */
         if (!bl) {
             inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff);
-            if (flush) {
+            if (flush_cache) {
+                inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
                 SLJIT_CACHE_FLUSH(inst, inst + 1);
             }
         } else {
             inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff);
             inst[1] = NOP;
-            if (flush) {
+            if (flush_cache) {
+                inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
                 SLJIT_CACHE_FLUSH(inst, inst + 2);
             }
         }
@@ -479,12 +480,14 @@
         if (*inst != mov_pc) {
             inst[0] = mov_pc;
             if (!bl) {
-                if (flush) {
+                if (flush_cache) {
+                    inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
                     SLJIT_CACHE_FLUSH(inst, inst + 1);
                 }
             } else {
                 inst[1] = BLX | RM(TMP_REG1);
-                if (flush) {
+                if (flush_cache) {
+                    inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
                     SLJIT_CACHE_FLUSH(inst, inst + 2);
                 }
             }
@@ -492,11 +495,12 @@
         *ptr = new_addr;
     }
 #else
-    sljit_uw *inst = (sljit_uw*)addr;
+    sljit_uw *inst = (sljit_uw*)jump_ptr;
     SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
     inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff);
     inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff);
-    if (flush) {
+    if (flush_cache) {
+        inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
         SLJIT_CACHE_FLUSH(inst, inst + 2);
     }
 #endif
@@ -504,7 +508,7 @@


static sljit_uw get_imm(sljit_uw imm);

-static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw new_constant, sljit_s32 flush)
+static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_sw new_constant, sljit_s32 flush_cache)
 {
 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
     sljit_uw *ptr = (sljit_uw*)addr;
@@ -515,7 +519,8 @@
     src2 = get_imm(new_constant);
     if (src2) {
         *inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2;
-        if (flush) {
+        if (flush_cache) {
+            inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
             SLJIT_CACHE_FLUSH(inst, inst + 1);
         }
         return;
@@ -524,7 +529,8 @@
     src2 = get_imm(~new_constant);
     if (src2) {
         *inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2;
-        if (flush) {
+        if (flush_cache) {
+            inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
             SLJIT_CACHE_FLUSH(inst, inst + 1);
         }
         return;
@@ -537,7 +543,8 @@


     if (*inst != ldr_literal) {
         *inst = ldr_literal;
-        if (flush) {
+        if (flush_cache) {
+            inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
             SLJIT_CACHE_FLUSH(inst, inst + 1);
         }
     }
@@ -547,7 +554,8 @@
     SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
     inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff);
     inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff);
-    if (flush) {
+    if (flush_cache) {
+        inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
         SLJIT_CACHE_FLUSH(inst, inst + 2);
     }
 #endif
@@ -562,6 +570,8 @@
     sljit_uw *buf_end;
     sljit_uw size;
     sljit_uw word_count;
+    sljit_sw executable_offset;
+    sljit_sw jump_addr;
 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
     sljit_uw cpool_size;
     sljit_uw cpool_skip_alignment;
@@ -602,6 +612,7 @@


     code_ptr = code;
     word_count = 0;
+    executable_offset = SLJIT_EXEC_OFFSET(code);


     label = compiler->labels;
     jump = compiler->jumps;
@@ -608,8 +619,7 @@
     const_ = compiler->consts;


     if (label && label->size == 0) {
-        label->addr = (sljit_uw)code;
-        label->size = 0;
+        label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
         label = label->next;
     }


@@ -636,7 +646,7 @@
                         cpool_size = 0;
                         if (label && label->size == word_count) {
                             /* Points after the current instruction. */
-                            label->addr = (sljit_uw)code_ptr;
+                            label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
                             label->size = code_ptr - code;
                             label = label->next;
                         }
@@ -652,12 +662,12 @@
                 SLJIT_ASSERT(!const_ || const_->addr >= word_count);
                 if (jump && jump->addr == word_count) {
 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
-                    if (detect_jump_type(jump, code_ptr, code))
+                    if (detect_jump_type(jump, code_ptr, code, executable_offset))
                         code_ptr--;
                     jump->addr = (sljit_uw)code_ptr;
 #else
                     jump->addr = (sljit_uw)(code_ptr - 2);
-                    if (detect_jump_type(jump, code_ptr, code))
+                    if (detect_jump_type(jump, code_ptr, code, executable_offset))
                         code_ptr -= 2;
 #endif
                     jump = jump->next;
@@ -664,7 +674,7 @@
                 }
                 if (label && label->size == word_count) {
                     /* code_ptr can be affected above. */
-                    label->addr = (sljit_uw)(code_ptr + 1);
+                    label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset);
                     label->size = (code_ptr + 1) - code;
                     label = label->next;
                 }
@@ -729,17 +739,18 @@


     jump = compiler->jumps;
     while (jump) {
-        buf_ptr = (sljit_uw*)jump->addr;
+        buf_ptr = (sljit_uw *)jump->addr;


         if (jump->flags & PATCH_B) {
+            jump_addr = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset);
             if (!(jump->flags & JUMP_ADDR)) {
                 SLJIT_ASSERT(jump->flags & JUMP_LABEL);
-                SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) >= -0x02000000);
-                *buf_ptr |= (((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) >> 2) & 0x00ffffff;
+                SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - jump_addr) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - jump_addr) >= -0x02000000);
+                *buf_ptr |= (((sljit_sw)jump->u.label->addr - jump_addr) >> 2) & 0x00ffffff;
             }
             else {
-                SLJIT_ASSERT(((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) >= -0x02000000);
-                *buf_ptr |= (((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) >> 2) & 0x00ffffff;
+                SLJIT_ASSERT(((sljit_sw)jump->u.target - jump_addr) <= 0x01ffffff && ((sljit_sw)jump->u.target - jump_addr) >= -0x02000000);
+                *buf_ptr |= (((sljit_sw)jump->u.target - jump_addr) >> 2) & 0x00ffffff;
             }
         }
         else if (jump->flags & SLJIT_REWRITABLE_JUMP) {
@@ -747,10 +758,10 @@
             jump->addr = (sljit_uw)code_ptr;
             code_ptr[0] = (sljit_uw)buf_ptr;
             code_ptr[1] = *buf_ptr;
-            inline_set_jump_addr((sljit_uw)code_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
+            inline_set_jump_addr((sljit_uw)code_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
             code_ptr += 2;
 #else
-            inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
+            inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
 #endif
         }
         else {
@@ -763,7 +774,7 @@
                 buf_ptr += 1;
             *buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
 #else
-            inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
+            inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
 #endif
         }
         jump = jump->next;
@@ -782,7 +793,7 @@
         else
             buf_ptr += 1;
         /* Set the value again (can be a simple constant). */
-        inline_set_const((sljit_uw)code_ptr, *buf_ptr, 0);
+        inline_set_const((sljit_uw)code_ptr, executable_offset, *buf_ptr, 0);
         code_ptr += 2;


         const_ = const_->next;
@@ -792,8 +803,12 @@
     SLJIT_ASSERT(code_ptr - code <= (sljit_s32)size);


     compiler->error = SLJIT_ERR_COMPILED;
+    compiler->executable_offset = executable_offset;
     compiler->executable_size = (code_ptr - code) * sizeof(sljit_uw);
-    SLJIT_ENABLE_EXEC(code, code_ptr);
+
+    code = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
+    code_ptr = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+
     SLJIT_CACHE_FLUSH(code, code_ptr);
     return code;
 }
@@ -802,21 +817,55 @@
 /*  Entry, exit                                                          */
 /* --------------------------------------------------------------------- */


+/* Creates an index in data_transfer_insts array. */
+#define WORD_DATA    0x00
+#define BYTE_DATA    0x01
+#define HALF_DATA    0x02
+#define SIGNED_DATA    0x04
+#define LOAD_DATA    0x08
+
 /* emit_op inp_flags.
    WRITE_BACK must be the first, since it is a flag. */
-#define WRITE_BACK    0x01
-#define ALLOW_IMM    0x02
-#define ALLOW_INV_IMM    0x04
+#define WRITE_BACK    0x10
+#define ALLOW_IMM    0x20
+#define ALLOW_INV_IMM    0x40
 #define ALLOW_ANY_IMM    (ALLOW_IMM | ALLOW_INV_IMM)
-#define ARG_TEST    0x08


-/* Creates an index in data_transfer_insts array. */
-#define WORD_DATA    0x00
-#define BYTE_DATA    0x10
-#define HALF_DATA    0x20
-#define SIGNED_DATA    0x40
-#define LOAD_DATA    0x80
+/* s/l - store/load (1 bit)
+   u/s - signed/unsigned (1 bit)
+   w/b/h/N - word/byte/half/NOT allowed (2 bit)
+   Storing signed and unsigned values are the same operations. */


+static const sljit_uw data_transfer_insts[16] = {
+/* s u w */ 0xe5000000 /* str */,
+/* s u b */ 0xe5400000 /* strb */,
+/* s u h */ 0xe10000b0 /* strh */,
+/* s u N */ 0x00000000 /* not allowed */,
+/* s s w */ 0xe5000000 /* str */,
+/* s s b */ 0xe5400000 /* strb */,
+/* s s h */ 0xe10000b0 /* strh */,
+/* s s N */ 0x00000000 /* not allowed */,
+
+/* l u w */ 0xe5100000 /* ldr */,
+/* l u b */ 0xe5500000 /* ldrb */,
+/* l u h */ 0xe11000b0 /* ldrh */,
+/* l u N */ 0x00000000 /* not allowed */,
+/* l s w */ 0xe5100000 /* ldr */,
+/* l s b */ 0xe11000d0 /* ldrsb */,
+/* l s h */ 0xe11000f0 /* ldrsh */,
+/* l s N */ 0x00000000 /* not allowed */,
+};
+
+#define EMIT_DATA_TRANSFER(type, add, wb, target_reg, base_reg, arg) \
+    (data_transfer_insts[(type) & 0xf] | ((add) << 23) | ((wb) << (21 - 4)) | (reg_map[target_reg] << 12) | (reg_map[base_reg] << 16) | (arg))
+
+/* Normal ldr/str instruction.
+   Type2: ldrsb, ldrh, ldrsh */
+#define IS_TYPE1_TRANSFER(type) \
+    (data_transfer_insts[(type) & 0xf] & 0x04000000)
+#define TYPE2_TRANSFER_IMM(imm) \
+    (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22))
+
 /* Condition: AL. */
 #define EMIT_DATA_PROCESS_INS(opcode, set_flags, dst, src1, src2) \
     (0xe0000000 | ((opcode) << 21) | (set_flags) | RD(dst) | RN(src1) | (src2))
@@ -913,40 +962,6 @@
 /*  Operators                                                            */
 /* --------------------------------------------------------------------- */


-/* s/l - store/load (1 bit)
-   u/s - signed/unsigned (1 bit)
-   w/b/h/N - word/byte/half/NOT allowed (2 bit)
-   It contans 16 items, but not all are different. */
-
-static sljit_sw data_transfer_insts[16] = {
-/* s u w */ 0xe5000000 /* str */,
-/* s u b */ 0xe5400000 /* strb */,
-/* s u h */ 0xe10000b0 /* strh */,
-/* s u N */ 0x00000000 /* not allowed */,
-/* s s w */ 0xe5000000 /* str */,
-/* s s b */ 0xe5400000 /* strb */,
-/* s s h */ 0xe10000b0 /* strh */,
-/* s s N */ 0x00000000 /* not allowed */,
-
-/* l u w */ 0xe5100000 /* ldr */,
-/* l u b */ 0xe5500000 /* ldrb */,
-/* l u h */ 0xe11000b0 /* ldrh */,
-/* l u N */ 0x00000000 /* not allowed */,
-/* l s w */ 0xe5100000 /* ldr */,
-/* l s b */ 0xe11000d0 /* ldrsb */,
-/* l s h */ 0xe11000f0 /* ldrsh */,
-/* l s N */ 0x00000000 /* not allowed */,
-};
-
-#define EMIT_DATA_TRANSFER(type, add, wb, target, base1, base2) \
-    (data_transfer_insts[(type) >> 4] | ((add) << 23) | ((wb) << 21) | (reg_map[target] << 12) | (reg_map[base1] << 16) | (base2))
-/* Normal ldr/str instruction.
-   Type2: ldrsb, ldrh, ldrsh */
-#define IS_TYPE1_TRANSFER(type) \
-    (data_transfer_insts[(type) >> 4] & 0x04000000)
-#define TYPE2_TRANSFER_IMM(imm) \
-    (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22))
-
 /* flags: */
   /* Arguments are swapped. */
 #define ARGS_SWAPPED    0x01
@@ -953,12 +968,7 @@
   /* Inverted immediate. */
 #define INV_IMM        0x02
   /* Source and destination is register. */
-#define REG_DEST    0x04
-#define REG_SOURCE    0x08
-  /* One instruction is enough. */
-#define FAST_DEST    0x10
-  /* Multiple instructions are required. */
-#define SLOW_DEST    0x20
+#define MOVE_REG_CONV    0x04
 /* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */
 #define SET_FLAGS    (1 << 20)
 /* dst: reg
@@ -967,38 +977,32 @@
    SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */
 #define SRC2_IMM    (1 << 25)


-#define EMIT_DATA_PROCESS_INS_AND_RETURN(opcode) \
-    return push_inst(compiler, EMIT_DATA_PROCESS_INS(opcode, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)))
-
-#define EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(opcode, dst, src1, src2) \
-    return push_inst(compiler, EMIT_DATA_PROCESS_INS(opcode, flags & SET_FLAGS, dst, src1, src2))
-
 #define EMIT_SHIFT_INS_AND_RETURN(opcode) \
     SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); \
     if (compiler->shift_imm != 0x20) { \
         SLJIT_ASSERT(src1 == TMP_REG1); \
         SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); \
+        \
         if (compiler->shift_imm != 0) \
-            return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (compiler->shift_imm << 7) | (opcode << 5) | reg_map[src2])); \
-        return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, reg_map[src2])); \
+            return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, \
+                dst, SLJIT_UNUSED, (compiler->shift_imm << 7) | (opcode << 5) | RM(src2))); \
+        return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, RM(src2))); \
     } \
-    return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | ((flags & ARGS_SWAPPED) ? reg_map[src2] : reg_map[src1])));
+    return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, \
+        dst, SLJIT_UNUSED, (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | RM((flags & ARGS_SWAPPED) ? src2 : src1)));


 static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
     sljit_s32 dst, sljit_s32 src1, sljit_s32 src2)
 {
-    sljit_sw mul_inst;
-
     switch (GET_OPCODE(op)) {
     case SLJIT_MOV:
         SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
         if (dst != src2) {
             if (src2 & SRC2_IMM) {
-                if (flags & INV_IMM)
-                    EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
-                EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
+                return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MVN_DP : MOV_DP, 0,
+                    dst, SLJIT_UNUSED, src2));
             }
-            EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, reg_map[src2]);
+            return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(src2)));
         }
         return SLJIT_SUCCESS;


@@ -1005,12 +1009,12 @@
     case SLJIT_MOV_U8:
     case SLJIT_MOV_S8:
         SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
-        if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) {
+        if (flags & MOVE_REG_CONV) {
 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
             if (op == SLJIT_MOV_U8)
                 return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff));
-            FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2])));
-            return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_U8 ? 0x20 : 0x40) | reg_map[dst]));
+            FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | RM(src2))));
+            return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_U8 ? 0x20 : 0x40) | RM(dst)));
 #else
             return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2));
 #endif
@@ -1017,9 +1021,8 @@
         }
         else if (dst != src2) {
             SLJIT_ASSERT(src2 & SRC2_IMM);
-            if (flags & INV_IMM)
-                EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
-            EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
+            return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MVN_DP : MOV_DP, 0,
+                dst, SLJIT_UNUSED, src2));
         }
         return SLJIT_SUCCESS;


@@ -1026,10 +1029,10 @@
     case SLJIT_MOV_U16:
     case SLJIT_MOV_S16:
         SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
-        if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) {
+        if (flags & MOVE_REG_CONV) {
 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
-            FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2])));
-            return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_U16 ? 0x20 : 0x40) | reg_map[dst]));
+            FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | RM(src2))));
+            return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_U16 ? 0x20 : 0x40) | RM(dst)));
 #else
             return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2));
 #endif
@@ -1036,19 +1039,17 @@
         }
         else if (dst != src2) {
             SLJIT_ASSERT(src2 & SRC2_IMM);
-            if (flags & INV_IMM)
-                EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
-            EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
+            return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MVN_DP : MOV_DP, 0,
+                dst, SLJIT_UNUSED, src2));
         }
         return SLJIT_SUCCESS;


     case SLJIT_NOT:
         if (src2 & SRC2_IMM) {
-            if (flags & INV_IMM)
-                EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
-            EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
+            return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MOV_DP : MVN_DP, flags & SET_FLAGS,
+                dst, SLJIT_UNUSED, src2));
         }
-        EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, RM(src2));
+        return push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, RM(src2)));


     case SLJIT_CLZ:
         SLJIT_ASSERT(!(flags & INV_IMM));
@@ -1055,69 +1056,52 @@
         SLJIT_ASSERT(!(src2 & SRC2_IMM));
         FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2)));
         if (flags & SET_FLAGS)
-            EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(CMP_DP, SLJIT_UNUSED, dst, SRC2_IMM);
+            return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, flags & SET_FLAGS, SLJIT_UNUSED, dst, SRC2_IMM));
         return SLJIT_SUCCESS;


     case SLJIT_ADD:
         SLJIT_ASSERT(!(flags & INV_IMM));
-        EMIT_DATA_PROCESS_INS_AND_RETURN(ADD_DP);
+        return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, flags & SET_FLAGS,
+            dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));


     case SLJIT_ADDC:
         SLJIT_ASSERT(!(flags & INV_IMM));
-        EMIT_DATA_PROCESS_INS_AND_RETURN(ADC_DP);
+        return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADC_DP, flags & SET_FLAGS,
+            dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));


     case SLJIT_SUB:
         SLJIT_ASSERT(!(flags & INV_IMM));
-        if (!(flags & ARGS_SWAPPED))
-            EMIT_DATA_PROCESS_INS_AND_RETURN(SUB_DP);
-        EMIT_DATA_PROCESS_INS_AND_RETURN(RSB_DP);
+        return push_inst(compiler, EMIT_DATA_PROCESS_INS(!(flags & ARGS_SWAPPED) ? SUB_DP : RSB_DP, flags & SET_FLAGS,
+            dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));


     case SLJIT_SUBC:
         SLJIT_ASSERT(!(flags & INV_IMM));
-        if (!(flags & ARGS_SWAPPED))
-            EMIT_DATA_PROCESS_INS_AND_RETURN(SBC_DP);
-        EMIT_DATA_PROCESS_INS_AND_RETURN(RSC_DP);
+        return push_inst(compiler, EMIT_DATA_PROCESS_INS(!(flags & ARGS_SWAPPED) ? SBC_DP : RSC_DP, flags & SET_FLAGS,
+            dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));


     case SLJIT_MUL:
         SLJIT_ASSERT(!(flags & INV_IMM));
         SLJIT_ASSERT(!(src2 & SRC2_IMM));
-        if (SLJIT_UNLIKELY(op & SLJIT_SET_O))
-            mul_inst = SMULL | (reg_map[TMP_REG3] << 16) | (reg_map[dst] << 12);
-        else
-            mul_inst = MUL | (reg_map[dst] << 16);


-        if (dst != src2)
-            FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src1] << 8) | reg_map[src2]));
-        else if (dst != src1)
-            FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src2] << 8) | reg_map[src1]));
-        else {
-            /* Rm and Rd must not be the same register. */
-            SLJIT_ASSERT(dst != TMP_REG1);
-            FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, reg_map[src2])));
-            FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src2] << 8) | reg_map[TMP_REG1]));
-        }
+        if (!HAS_FLAGS(op))
+            return push_inst(compiler, MUL | (reg_map[dst] << 16) | (reg_map[src2] << 8) | reg_map[src1]);


-        if (!(op & SLJIT_SET_O))
-            return SLJIT_SUCCESS;
+        FAIL_IF(push_inst(compiler, SMULL | (reg_map[TMP_REG1] << 16) | (reg_map[dst] << 12) | (reg_map[src2] << 8) | reg_map[src1]));


-        /* We need to use TMP_REG3. */
-        compiler->cache_arg = 0;
-        compiler->cache_argw = 0;
-        /* cmp TMP_REG2, dst asr #31. */
-        return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, SET_FLAGS, SLJIT_UNUSED, TMP_REG3, RM(dst) | 0xfc0));
+        /* cmp TMP_REG1, dst asr #31. */
+        return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, SET_FLAGS, SLJIT_UNUSED, TMP_REG1, RM(dst) | 0xfc0));


     case SLJIT_AND:
-        if (!(flags & INV_IMM))
-            EMIT_DATA_PROCESS_INS_AND_RETURN(AND_DP);
-        EMIT_DATA_PROCESS_INS_AND_RETURN(BIC_DP);
+        return push_inst(compiler, EMIT_DATA_PROCESS_INS(!(flags & INV_IMM) ? AND_DP : BIC_DP, flags & SET_FLAGS,
+            dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));


     case SLJIT_OR:
         SLJIT_ASSERT(!(flags & INV_IMM));
-        EMIT_DATA_PROCESS_INS_AND_RETURN(ORR_DP);
+        return push_inst(compiler, EMIT_DATA_PROCESS_INS(ORR_DP, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));


     case SLJIT_XOR:
         SLJIT_ASSERT(!(flags & INV_IMM));
-        EMIT_DATA_PROCESS_INS_AND_RETURN(EOR_DP);
+        return push_inst(compiler, EMIT_DATA_PROCESS_INS(EOR_DP, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));


     case SLJIT_SHL:
         EMIT_SHIFT_INS_AND_RETURN(0);
@@ -1128,12 +1112,11 @@
     case SLJIT_ASHR:
         EMIT_SHIFT_INS_AND_RETURN(2);
     }
-    SLJIT_ASSERT_STOP();
+
+    SLJIT_UNREACHABLE();
     return SLJIT_SUCCESS;
 }


-#undef EMIT_DATA_PROCESS_INS_AND_RETURN
-#undef EMIT_FULL_DATA_PROCESS_INS_AND_RETURN
#undef EMIT_SHIFT_INS_AND_RETURN

/* Tests whether the immediate can be stored in the 12 bit imm field.
@@ -1317,348 +1300,162 @@
#endif
}

-/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
-static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value)
+static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
+    sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
 {
-    if (value >= 0) {
-        value = get_imm(value);
-        if (value)
-            return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, dst, reg, value));
-    }
-    else {
-        value = get_imm(-value);
-        if (value)
-            return push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, dst, reg, value));
-    }
-    return SLJIT_ERR_UNSUPPORTED;
-}
+    sljit_uw offset_reg, imm;
+    sljit_uw is_type1_transfer = IS_TYPE1_TRANSFER(flags);


-/* Can perform an operation using at most 1 instruction. */
-static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
-{
-    sljit_uw imm;
+    SLJIT_ASSERT (arg & SLJIT_MEM);
+    SLJIT_ASSERT((arg & REG_MASK) != tmp_reg);


-    if (arg & SLJIT_IMM) {
-        imm = get_imm(argw);
-        if (imm) {
-            if (inp_flags & ARG_TEST)
-                return 1;
-            FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, imm)));
-            return -1;
-        }
-        imm = get_imm(~argw);
-        if (imm) {
-            if (inp_flags & ARG_TEST)
-                return 1;
-            FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, imm)));
-            return -1;
-        }
-        return 0;
+    SLJIT_COMPILE_ASSERT(WRITE_BACK == 0x10, optimized_for_emit_data_transfer);
+
+    if ((arg & REG_MASK) == SLJIT_UNUSED) {
+        /* Write back is not used. */
+        FAIL_IF(load_immediate(compiler, tmp_reg, argw));
+        return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, 0, reg, tmp_reg, is_type1_transfer ? 0 : TYPE2_TRANSFER_IMM(0)));
     }


-    SLJIT_ASSERT(arg & SLJIT_MEM);
+    if (arg & OFFS_REG_MASK) {
+        offset_reg = OFFS_REG(arg);
+        arg &= REG_MASK;
+        argw &= 0x3;


-    /* Fast loads/stores. */
-    if (!(arg & REG_MASK))
-        return 0;
+        if (argw != 0 && !is_type1_transfer) {
+            SLJIT_ASSERT(!(flags & WRITE_BACK));


-    if (arg & OFFS_REG_MASK) {
-        if ((argw & 0x3) != 0 && !IS_TYPE1_TRANSFER(inp_flags))
-            return 0;
+            FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_reg, arg, RM(offset_reg) | (argw << 7))));
+            return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, 0, reg, tmp_reg, TYPE2_TRANSFER_IMM(0)));
+        }


-        if (inp_flags & ARG_TEST)
-            return 1;
-        FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK,
-            RM(OFFS_REG(arg)) | (IS_TYPE1_TRANSFER(inp_flags) ? SRC2_IMM : 0) | ((argw & 0x3) << 7))));
-        return -1;
+        /* Bit 25: RM is offset. */
+        return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg,
+            RM(offset_reg) | (is_type1_transfer ? (1 << 25) : 0) | (argw << 7)));
     }


-    if (IS_TYPE1_TRANSFER(inp_flags)) {
+    arg &= REG_MASK;
+
+    if (is_type1_transfer) {
+        if (argw > 0xfff) {
+            imm = get_imm(argw & ~0xfff);
+            if (imm) {
+                offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg;
+                FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, offset_reg, arg, imm)));
+                argw = argw & 0xfff;
+                arg = offset_reg;
+            }
+        }
+        else if (argw < -0xfff) {
+            imm = get_imm(-argw & ~0xfff);
+            if (imm) {
+                offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg;
+                FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, offset_reg, arg, imm)));
+                argw = -(-argw & 0xfff);
+                arg = offset_reg;
+            }
+        }
+
         if (argw >= 0 && argw <= 0xfff) {
-            if (inp_flags & ARG_TEST)
-                return 1;
-            FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, argw)));
-            return -1;
+            return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg & REG_MASK, argw));
         }
         if (argw < 0 && argw >= -0xfff) {
-            if (inp_flags & ARG_TEST)
-                return 1;
-            FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, -argw)));
-            return -1;
+            return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, flags & WRITE_BACK, reg, arg & REG_MASK, -argw));
         }
     }
     else {
+        if (argw > 0xff) {
+            imm = get_imm(argw & ~0xff);
+            if (imm) {
+                offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg;
+                FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, offset_reg, arg, imm)));
+                argw = argw & 0xff;
+                arg = offset_reg;
+            }
+        }
+        else if (argw < -0xff) {
+            imm = get_imm(-argw & ~0xff);
+            if (imm) {
+                offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg;
+                FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, offset_reg, arg, imm)));
+                argw = -(-argw & 0xff);
+                arg = offset_reg;
+            }
+        }
+
         if (argw >= 0 && argw <= 0xff) {
-            if (inp_flags & ARG_TEST)
-                return 1;
-            FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw))));
-            return -1;
+            return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg, TYPE2_TRANSFER_IMM(argw)));
         }
         if (argw < 0 && argw >= -0xff) {
-            if (inp_flags & ARG_TEST)
-                return 1;
             argw = -argw;
-            FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw))));
-            return -1;
+            return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, flags & WRITE_BACK, reg, arg, TYPE2_TRANSFER_IMM(argw)));
         }
     }


-    return 0;
+    FAIL_IF(load_immediate(compiler, tmp_reg, argw));
+    return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg,
+        RM(tmp_reg) | (is_type1_transfer ? (1 << 25) : 0)));
 }


-/* See getput_arg below.
-   Note: can_cache is called only for binary operators. Those
-   operators always uses word arguments without write back. */
-static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
-{
-    /* Immediate caching is not supported as it would be an operation on constant arguments. */
-    if (arg & SLJIT_IMM)
-        return 0;
-
-    /* Always a simple operation. */
-    if (arg & OFFS_REG_MASK)
-        return 0;
-
-    if (!(arg & REG_MASK)) {
-        /* Immediate access. */
-        if ((next_arg & SLJIT_MEM) && ((sljit_uw)argw - (sljit_uw)next_argw <= 0xfff || (sljit_uw)next_argw - (sljit_uw)argw <= 0xfff))
-            return 1;
-        return 0;
-    }
-
-    if (argw <= 0xfffff && argw >= -0xfffff)
-        return 0;
-
-    if (argw == next_argw && (next_arg & SLJIT_MEM))
-        return 1;
-
-    if (arg == next_arg && ((sljit_uw)argw - (sljit_uw)next_argw <= 0xfff || (sljit_uw)next_argw - (sljit_uw)argw <= 0xfff))
-        return 1;
-
-    return 0;
-}
-
-#define GETPUT_ARG_DATA_TRANSFER(add, wb, target, base, imm) \
-    if (max_delta & 0xf00) \
-        FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, add, wb, target, base, imm))); \
-    else \
-        FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, add, wb, target, base, TYPE2_TRANSFER_IMM(imm))));
-
-#define TEST_WRITE_BACK() \
-    if (inp_flags & WRITE_BACK) { \
-        tmp_r = arg & REG_MASK; \
-        if (reg == tmp_r) { \
-            /* This can only happen for stores */ \
-            /* since ldr reg, [reg, ...]! has no meaning */ \
-            SLJIT_ASSERT(!(inp_flags & LOAD_DATA)); \
-            FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(reg)))); \
-            reg = TMP_REG3; \
-        } \
-    }
-
-/* Emit the necessary instructions. See can_cache above. */
-static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
-{
-    sljit_s32 tmp_r;
-    sljit_sw max_delta;
-    sljit_sw sign;
-    sljit_uw imm;
-
-    if (arg & SLJIT_IMM) {
-        SLJIT_ASSERT(inp_flags & LOAD_DATA);
-        return load_immediate(compiler, reg, argw);
-    }
-
-    SLJIT_ASSERT(arg & SLJIT_MEM);
-
-    tmp_r = (inp_flags & LOAD_DATA) ? reg : TMP_REG3;
-    max_delta = IS_TYPE1_TRANSFER(inp_flags) ? 0xfff : 0xff;
-
-    if ((arg & REG_MASK) == SLJIT_UNUSED) {
-        /* Write back is not used. */
-        imm = (sljit_uw)(argw - compiler->cache_argw);
-        if ((compiler->cache_arg & SLJIT_IMM) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) {
-            if (imm <= (sljit_uw)max_delta) {
-                sign = 1;
-                argw = argw - compiler->cache_argw;
-            }
-            else {
-                sign = 0;
-                argw = compiler->cache_argw - argw;
-            }
-
-            GETPUT_ARG_DATA_TRANSFER(sign, 0, reg, TMP_REG3, argw);
-            return SLJIT_SUCCESS;
-        }
-
-        /* With write back, we can create some sophisticated loads, but
-           it is hard to decide whether we should convert downward (0s) or upward (1s). */
-        imm = (sljit_uw)(argw - next_argw);
-        if ((next_arg & SLJIT_MEM) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) {
-            SLJIT_ASSERT(inp_flags & LOAD_DATA);
-
-            compiler->cache_arg = SLJIT_IMM;
-            compiler->cache_argw = argw;
-            tmp_r = TMP_REG3;
-        }
-
-        FAIL_IF(load_immediate(compiler, tmp_r, argw));
-        GETPUT_ARG_DATA_TRANSFER(1, 0, reg, tmp_r, 0);
-        return SLJIT_SUCCESS;
-    }
-
-    if (arg & OFFS_REG_MASK) {
-        SLJIT_ASSERT((argw & 0x3) && !(max_delta & 0xf00));
-        if (inp_flags & WRITE_BACK)
-            tmp_r = arg & REG_MASK;
-        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))));
-        return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, 0, reg, tmp_r, TYPE2_TRANSFER_IMM(0)));
-    }
-
-    imm = (sljit_uw)(argw - compiler->cache_argw);
-    if (compiler->cache_arg == arg && imm <= (sljit_uw)max_delta) {
-        SLJIT_ASSERT(!(inp_flags & WRITE_BACK));
-        GETPUT_ARG_DATA_TRANSFER(1, 0, reg, TMP_REG3, imm);
-        return SLJIT_SUCCESS;
-    }
-    if (compiler->cache_arg == arg && imm >= (sljit_uw)-max_delta) {
-        SLJIT_ASSERT(!(inp_flags & WRITE_BACK));
-        imm = (sljit_uw)-(sljit_sw)imm;
-        GETPUT_ARG_DATA_TRANSFER(0, 0, reg, TMP_REG3, imm);
-        return SLJIT_SUCCESS;
-    }
-
-    imm = get_imm(argw & ~max_delta);
-    if (imm) {
-        TEST_WRITE_BACK();
-        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, imm)));
-        GETPUT_ARG_DATA_TRANSFER(1, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta);
-        return SLJIT_SUCCESS;
-    }
-
-    imm = get_imm(-argw & ~max_delta);
-    if (imm) {
-        argw = -argw;
-        TEST_WRITE_BACK();
-        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, tmp_r, arg & REG_MASK, imm)));
-        GETPUT_ARG_DATA_TRANSFER(0, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta);
-        return SLJIT_SUCCESS;
-    }
-
-    if ((compiler->cache_arg & SLJIT_IMM) && compiler->cache_argw == argw) {
-        TEST_WRITE_BACK();
-        return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
-    }
-
-    if (argw == next_argw && (next_arg & SLJIT_MEM)) {
-        SLJIT_ASSERT(inp_flags & LOAD_DATA);
-        FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
-
-        compiler->cache_arg = SLJIT_IMM;
-        compiler->cache_argw = argw;
-
-        TEST_WRITE_BACK();
-        return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
-    }
-
-    imm = (sljit_uw)(argw - next_argw);
-    if (arg == next_arg && !(inp_flags & WRITE_BACK) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) {
-        SLJIT_ASSERT(inp_flags & LOAD_DATA);
-        FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
-        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, TMP_REG3, reg_map[arg & REG_MASK])));
-
-        compiler->cache_arg = arg;
-        compiler->cache_argw = argw;
-
-        GETPUT_ARG_DATA_TRANSFER(1, 0, reg, TMP_REG3, 0);
-        return SLJIT_SUCCESS;
-    }
-
-    if ((arg & REG_MASK) == tmp_r) {
-        compiler->cache_arg = SLJIT_IMM;
-        compiler->cache_argw = argw;
-        tmp_r = TMP_REG3;
-    }
-
-    FAIL_IF(load_immediate(compiler, tmp_r, argw));
-    return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, reg_map[tmp_r] | (max_delta & 0xf00 ? SRC2_IMM : 0)));
-}
-
-static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
-{
-    if (getput_arg_fast(compiler, flags, reg, arg, argw))
-        return compiler->error;
-    compiler->cache_arg = 0;
-    compiler->cache_argw = 0;
-    return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
-}
-
-static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
-{
-    if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
-        return compiler->error;
-    return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
-}
-
 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags,
     sljit_s32 dst, sljit_sw dstw,
     sljit_s32 src1, sljit_sw src1w,
     sljit_s32 src2, sljit_sw src2w)
 {
-    /* arg1 goes to TMP_REG1 or src reg
-       arg2 goes to TMP_REG2, imm or src reg
-       TMP_REG3 can be used for caching
-       result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
+    /* src1 is reg or TMP_REG1
+       src2 is reg, TMP_REG2, or imm
+       result goes to TMP_REG2, so put result can use TMP_REG1. */


     /* We prefers register and simple consts. */
-    sljit_s32 dst_r;
-    sljit_s32 src1_r;
-    sljit_s32 src2_r = 0;
-    sljit_s32 sugg_src2_r = TMP_REG2;
-    sljit_s32 flags = GET_FLAGS(op) ? SET_FLAGS : 0;
+    sljit_s32 dst_reg;
+    sljit_s32 src1_reg;
+    sljit_s32 src2_reg;
+    sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;


-    compiler->cache_arg = 0;
-    compiler->cache_argw = 0;
-
     /* Destination check. */
     if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
-        if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM))
+        if (op <= SLJIT_MOVU_P && !(src2 & SLJIT_MEM))
             return SLJIT_SUCCESS;
-        dst_r = TMP_REG2;
     }
-    else if (FAST_IS_REG(dst)) {
-        dst_r = dst;
-        flags |= REG_DEST;
-        if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
-            sugg_src2_r = dst_r;
-    }
-    else {
-        SLJIT_ASSERT(dst & SLJIT_MEM);
-        if (getput_arg_fast(compiler, inp_flags | ARG_TEST, TMP_REG2, dst, dstw)) {
-            flags |= FAST_DEST;
-            dst_r = TMP_REG2;
+
+    SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM));
+
+    src2_reg = 0;
+
+    do {
+        if (!(inp_flags & ALLOW_IMM))
+            break;
+
+        if (src2 & SLJIT_IMM) {
+            src2_reg = get_imm(src2w);
+            if (src2_reg)
+                break;
+            if (inp_flags & ALLOW_INV_IMM) {
+                src2_reg = get_imm(~src2w);
+                if (src2_reg) {
+                    flags |= INV_IMM;
+                    break;
+                }
+            }
+            if (GET_OPCODE(op) == SLJIT_ADD) {
+                src2_reg = get_imm(-src2w);
+                if (src2_reg) {
+                    op = SLJIT_SUB | GET_ALL_FLAGS(op);
+                    break;
+                }
+            }
+            if (GET_OPCODE(op) == SLJIT_SUB) {
+                src2_reg = get_imm(-src2w);
+                if (src2_reg) {
+                    op = SLJIT_ADD | GET_ALL_FLAGS(op);
+                    break;
+                }
+            }
         }
-        else {
-            flags |= SLOW_DEST;
-            dst_r = 0;
-        }
-    }


-    /* Source 1. */
-    if (FAST_IS_REG(src1))
-        src1_r = src1;
-    else if (FAST_IS_REG(src2)) {
-        flags |= ARGS_SWAPPED;
-        src1_r = src2;
-        src2 = src1;
-        src2w = src1w;
-    }
-    else do { /* do { } while(0) is used because of breaks. */
-        src1_r = 0;
-        if ((inp_flags & ALLOW_ANY_IMM) && (src1 & SLJIT_IMM)) {
-            /* The second check will generate a hit. */
-            src2_r = get_imm(src1w);
-            if (src2_r) {
+        if (src1 & SLJIT_IMM) {
+            src2_reg = get_imm(src1w);
+            if (src2_reg) {
                 flags |= ARGS_SWAPPED;
                 src1 = src2;
                 src1w = src2w;
@@ -1665,8 +1462,8 @@
                 break;
             }
             if (inp_flags & ALLOW_INV_IMM) {
-                src2_r = get_imm(~src1w);
-                if (src2_r) {
+                src2_reg = get_imm(~src1w);
+                if (src2_reg) {
                     flags |= ARGS_SWAPPED | INV_IMM;
                     src1 = src2;
                     src1w = src2w;
@@ -1674,9 +1471,9 @@
                 }
             }
             if (GET_OPCODE(op) == SLJIT_ADD) {
-                src2_r = get_imm(-src1w);
-                if (src2_r) {
-                    /* Note: ARGS_SWAPPED is intentionally not applied! */
+                src2_reg = get_imm(-src1w);
+                if (src2_reg) {
+                    /* Note: add is commutative operation. */
                     src1 = src2;
                     src1w = src2w;
                     op = SLJIT_SUB | GET_ALL_FLAGS(op);
@@ -1684,110 +1481,54 @@
                 }
             }
         }
+    } while(0);


-        if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
-            FAIL_IF(compiler->error);
-            src1_r = TMP_REG1;
-        }
-    } while (0);
+    /* Source 1. */
+    if (FAST_IS_REG(src1))
+        src1_reg = src1;
+    else if (src1 & SLJIT_MEM) {
+        FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1));
+        src1_reg = TMP_REG1;
+    }
+    else {
+        FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
+        src1_reg = TMP_REG1;
+    }


-    /* Source 2. */
-    if (src2_r == 0) {
-        if (FAST_IS_REG(src2)) {
-            src2_r = src2;
-            flags |= REG_SOURCE;
-            if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
-                dst_r = src2_r;
-        }
-        else do { /* do { } while(0) is used because of breaks. */
-            if ((inp_flags & ALLOW_ANY_IMM) && (src2 & SLJIT_IMM)) {
-                src2_r = get_imm(src2w);
-                if (src2_r)
-                    break;
-                if (inp_flags & ALLOW_INV_IMM) {
-                    src2_r = get_imm(~src2w);
-                    if (src2_r) {
-                        flags |= INV_IMM;
-                        break;
-                    }
-                }
-                if (GET_OPCODE(op) == SLJIT_ADD) {
-                    src2_r = get_imm(-src2w);
-                    if (src2_r) {
-                        op = SLJIT_SUB | GET_ALL_FLAGS(op);
-                        flags &= ~ARGS_SWAPPED;
-                        break;
-                    }
-                }
-                if (GET_OPCODE(op) == SLJIT_SUB && !(flags & ARGS_SWAPPED)) {
-                    src2_r = get_imm(-src2w);
-                    if (src2_r) {
-                        op = SLJIT_ADD | GET_ALL_FLAGS(op);
-                        flags &= ~ARGS_SWAPPED;
-                        break;
-                    }
-                }
-            }
+    /* Destination. */
+    dst_reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;


-            /* src2_r is 0. */
-            if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) {
-                FAIL_IF(compiler->error);
-                src2_r = sugg_src2_r;
-            }
-        } while (0);
-    }
+    if (op <= SLJIT_MOVU_P) {
+        if (dst & SLJIT_MEM) {
+            if (inp_flags & BYTE_DATA)
+                inp_flags &= ~SIGNED_DATA;


-    /* src1_r, src2_r and dst_r can be zero (=unprocessed) or non-zero.
-       If they are zero, they must not be registers. */
-    if (src1_r == 0 && src2_r == 0 && dst_r == 0) {
-        if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
-            SLJIT_ASSERT(!(flags & ARGS_SWAPPED));
-            flags |= ARGS_SWAPPED;
-            FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src2, src2w, src1, src1w));
-            FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src1, src1w, dst, dstw));
+            if (FAST_IS_REG(src2))
+                return emit_op_mem(compiler, inp_flags, src2, dst, dstw, TMP_REG2);
         }
-        else {
-            FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
-            FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
-        }
-        src1_r = TMP_REG1;
-        src2_r = TMP_REG2;
+
+        if (FAST_IS_REG(src2) && dst_reg != TMP_REG2)
+            flags |= MOVE_REG_CONV;
     }
-    else if (src1_r == 0 && src2_r == 0) {
-        FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
-        src1_r = TMP_REG1;
-    }
-    else if (src1_r == 0 && dst_r == 0) {
-        FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
-        src1_r = TMP_REG1;
-    }
-    else if (src2_r == 0 && dst_r == 0) {
-        FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
-        src2_r = sugg_src2_r;
-    }


-    if (dst_r == 0)
-        dst_r = TMP_REG2;
+    /* Source 2. */
+    if (src2_reg == 0) {
+        src2_reg = (op <= SLJIT_MOVU_P) ? dst_reg : TMP_REG2;


-    if (src1_r == 0) {
-        FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0));
-        src1_r = TMP_REG1;
+        if (FAST_IS_REG(src2))
+            src2_reg = src2;
+        else if (src2 & SLJIT_MEM)
+            FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG2));
+        else
+            FAIL_IF(load_immediate(compiler, src2_reg, src2w));
     }


-    if (src2_r == 0) {
-        FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0));
-        src2_r = sugg_src2_r;
-    }
+    FAIL_IF(emit_single_op(compiler, op, flags, dst_reg, src1_reg, src2_reg));


-    FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
+    if (!(dst & SLJIT_MEM))
+        return SLJIT_SUCCESS;


-    if (flags & (FAST_DEST | SLOW_DEST)) {
-        if (flags & FAST_DEST)
-            FAIL_IF(getput_arg_fast(compiler, inp_flags, dst_r, dst, dstw));
-        else
-            FAIL_IF(getput_arg(compiler, inp_flags, dst_r, dst, dstw, 0, 0));
-    }
-    return SLJIT_SUCCESS;
+    return emit_op_mem(compiler, inp_flags, dst_reg, dst, dstw, TMP_REG1);
 }


#ifdef __cplusplus
@@ -1807,6 +1548,9 @@

 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
 {
+    sljit_sw saved_reg_list[3];
+    sljit_sw saved_reg_count;
+
     CHECK_ERROR();
     CHECK(check_sljit_emit_op0(compiler, op));


@@ -1820,33 +1564,38 @@
         break;
     case SLJIT_LMUL_UW:
     case SLJIT_LMUL_SW:
-#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
         return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL)
             | (reg_map[SLJIT_R1] << 16)
             | (reg_map[SLJIT_R0] << 12)
             | (reg_map[SLJIT_R0] << 8)
             | reg_map[SLJIT_R1]);
-#else
-        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_R1))));
-        return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL)
-            | (reg_map[SLJIT_R1] << 16)
-            | (reg_map[SLJIT_R0] << 12)
-            | (reg_map[SLJIT_R0] << 8)
-            | reg_map[TMP_REG1]);
-#endif
     case SLJIT_DIVMOD_UW:
     case SLJIT_DIVMOD_SW:
     case SLJIT_DIV_UW:
     case SLJIT_DIV_SW:
         SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
-        SLJIT_COMPILE_ASSERT(reg_map[2] == 1 && reg_map[3] == 2, bad_register_mapping);
+        SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3);


-        if ((op >= SLJIT_DIV_UW) && (compiler->scratches >= 3)) {
-            FAIL_IF(push_inst(compiler, 0xe52d2008 /* str r2, [sp, #-8]! */));
-            FAIL_IF(push_inst(compiler, 0xe58d1004 /* str r1, [sp, #4] */));
+        saved_reg_count = 0;
+        if (compiler->scratches >= 4)
+            saved_reg_list[saved_reg_count++] = 3;
+        if (compiler->scratches >= 3)
+            saved_reg_list[saved_reg_count++] = 2;
+        if (op >= SLJIT_DIV_UW)
+            saved_reg_list[saved_reg_count++] = 1;
+
+        if (saved_reg_count > 0) {
+            FAIL_IF(push_inst(compiler, 0xe52d0000 | (saved_reg_count >= 3 ? 16 : 8)
+                        | (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */));
+            if (saved_reg_count >= 2) {
+                SLJIT_ASSERT(saved_reg_list[1] < 8);
+                FAIL_IF(push_inst(compiler, 0xe58d0004 | (saved_reg_list[1] << 12) /* str rX, [sp, #4] */));
+            }
+            if (saved_reg_count >= 3) {
+                SLJIT_ASSERT(saved_reg_list[2] < 8);
+                FAIL_IF(push_inst(compiler, 0xe58d0008 | (saved_reg_list[2] << 12) /* str rX, [sp, #8] */));
+            }
         }
-        else if ((op >= SLJIT_DIV_UW) || (compiler->scratches >= 3))
-            FAIL_IF(push_inst(compiler, 0xe52d0008 | (op >= SLJIT_DIV_UW ? 0x1000 : 0x2000) /* str r1/r2, [sp, #-8]! */));


 #if defined(__GNUC__)
         FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
@@ -1855,12 +1604,18 @@
 #error "Software divmod functions are needed"
 #endif


-        if ((op >= SLJIT_DIV_UW) && (compiler->scratches >= 3)) {
-            FAIL_IF(push_inst(compiler, 0xe59d1004 /* ldr r1, [sp, #4] */));
-            FAIL_IF(push_inst(compiler, 0xe49d2008 /* ldr r2, [sp], #8 */));
+        if (saved_reg_count > 0) {
+            if (saved_reg_count >= 3) {
+                SLJIT_ASSERT(saved_reg_list[2] < 8);
+                FAIL_IF(push_inst(compiler, 0xe59d0008 | (saved_reg_list[2] << 12) /* ldr rX, [sp, #8] */));
+            }
+            if (saved_reg_count >= 2) {
+                SLJIT_ASSERT(saved_reg_list[1] < 8);
+                FAIL_IF(push_inst(compiler, 0xe59d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */));
+            }
+            return push_inst(compiler, 0xe49d0000 | (saved_reg_count >= 3 ? 16 : 8)
+                        | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
         }
-        else if ((op >= SLJIT_DIV_UW) || (compiler->scratches >= 3))
-            return push_inst(compiler, 0xe49d0008 | (op >= SLJIT_DIV_UW ? 0x1000 : 0x2000) /* ldr r1/r2, [sp], #8 */);
         return SLJIT_SUCCESS;
     }


@@ -2043,66 +1798,46 @@

 static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
 {
-    sljit_sw tmp;
     sljit_uw imm;
     sljit_sw inst = VSTR_F32 | (flags & (SLJIT_F32_OP | FPU_LOAD));
+
     SLJIT_ASSERT(arg & SLJIT_MEM);
+    arg &= ~SLJIT_MEM;


     if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
-        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))));
-        arg = SLJIT_MEM | TMP_REG1;
+        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG2, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))));
+        arg = TMP_REG2;
         argw = 0;
     }


     /* Fast loads and stores. */
-    if ((arg & REG_MASK)) {
+    if (arg) {
         if (!(argw & ~0x3fc))
             return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, arg & REG_MASK, reg, argw >> 2));
         if (!(-argw & ~0x3fc))
             return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2));
-    }


-    if (compiler->cache_arg == arg) {
-        tmp = argw - compiler->cache_argw;
-        if (!(tmp & ~0x3fc))
-            return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, tmp >> 2));
-        if (!(-tmp & ~0x3fc))
-            return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG3, reg, -tmp >> 2));
-        if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, tmp) != SLJIT_ERR_UNSUPPORTED) {
-            FAIL_IF(compiler->error);
-            compiler->cache_argw = argw;
-            return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0));
-        }
-    }
-
-    if (arg & REG_MASK) {
-        if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) {
-            FAIL_IF(compiler->error);
-            return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, 0));
-        }
         imm = get_imm(argw & ~0x3fc);
         if (imm) {
-            FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, imm)));
-            return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, (argw & 0x3fc) >> 2));
+            FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG2, arg & REG_MASK, imm)));
+            return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, (argw & 0x3fc) >> 2));
         }
         imm = get_imm(-argw & ~0x3fc);
         if (imm) {
             argw = -argw;
-            FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG1, arg & REG_MASK, imm)));
-            return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG1, reg, (argw & 0x3fc) >> 2));
+            FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG2, arg & REG_MASK, imm)));
+            return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG2, reg, (argw & 0x3fc) >> 2));
         }
     }


-    compiler->cache_arg = arg;
-    compiler->cache_argw = argw;
-    if (arg & REG_MASK) {
-        FAIL_IF(load_immediate(compiler, TMP_REG1, argw));
-        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, arg & REG_MASK, reg_map[TMP_REG1])));
+    if (arg) {
+        FAIL_IF(load_immediate(compiler, TMP_REG2, argw));
+        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG2, arg & REG_MASK, RM(TMP_REG2))));
     }
     else
-        FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+        FAIL_IF(load_immediate(compiler, TMP_REG2, argw));


-    return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0));
+    return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, 0));
 }


 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
@@ -2109,6 +1844,8 @@
     sljit_s32 dst, sljit_sw dstw,
     sljit_s32 src, sljit_sw srcw)
 {
+    op ^= SLJIT_F32_OP;
+
     if (src & SLJIT_MEM) {
         FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src, srcw));
         src = TMP_FREG1;
@@ -2132,6 +1869,8 @@
 {
     sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;


+    op ^= SLJIT_F32_OP;
+
     if (FAST_IS_REG(src))
         FAIL_IF(push_inst(compiler, VMOV | RD(src) | (TMP_FREG1 << 16)));
     else if (src & SLJIT_MEM) {
@@ -2154,6 +1893,8 @@
     sljit_s32 src1, sljit_sw src1w,
     sljit_s32 src2, sljit_sw src2w)
 {
+    op ^= SLJIT_F32_OP;
+
     if (src1 & SLJIT_MEM) {
         FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w));
         src1 = TMP_FREG1;
@@ -2175,10 +1916,6 @@
     sljit_s32 dst_r;


     CHECK_ERROR();
-    compiler->cache_arg = 0;
-    compiler->cache_argw = 0;
-    if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
-        op ^= SLJIT_F32_OP;


     SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100), float_transfer_bit_error);
     SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
@@ -2185,6 +1922,9 @@


     dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;


+    if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
+        op ^= SLJIT_F32_OP;
+
     if (src & SLJIT_MEM) {
         FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, dst_r, src, srcw));
         src = dst_r;
@@ -2229,8 +1969,6 @@
     ADJUST_LOCAL_OFFSET(src1, src1w);
     ADJUST_LOCAL_OFFSET(src2, src2w);


-    compiler->cache_arg = 0;
-    compiler->cache_argw = 0;
     op ^= SLJIT_F32_OP;


     dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
@@ -2283,21 +2021,17 @@
     CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
     ADJUST_LOCAL_OFFSET(dst, dstw);


+    SLJIT_ASSERT(reg_map[TMP_REG1] == 14);
+
     /* For UNUSED dst. Uncommon, but possible. */
     if (dst == SLJIT_UNUSED)
         return SLJIT_SUCCESS;


     if (FAST_IS_REG(dst))
-        return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(TMP_REG3)));
+        return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(TMP_REG1)));


     /* Memory. */
-    if (getput_arg_fast(compiler, WORD_DATA, TMP_REG3, dst, dstw))
-        return compiler->error;
-    /* TMP_REG3 is used for caching. */
-    FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG2, SLJIT_UNUSED, RM(TMP_REG3))));
-    compiler->cache_arg = 0;
-    compiler->cache_argw = 0;
-    return getput_arg(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0);
+    return emit_op_mem(compiler, WORD_DATA, TMP_REG1, dst, dstw, TMP_REG2);
 }


 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
@@ -2306,21 +2040,16 @@
     CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
     ADJUST_LOCAL_OFFSET(src, srcw);


+    SLJIT_ASSERT(reg_map[TMP_REG1] == 14);
+
     if (FAST_IS_REG(src))
-        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(src))));
-    else if (src & SLJIT_MEM) {
-        if (getput_arg_fast(compiler, WORD_DATA | LOAD_DATA, TMP_REG3, src, srcw))
-            FAIL_IF(compiler->error);
-        else {
-            compiler->cache_arg = 0;
-            compiler->cache_argw = 0;
-            FAIL_IF(getput_arg(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, 0, 0));
-            FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(TMP_REG2))));
-        }
-    }
+        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, 0, RM(src))));
+    else if (src & SLJIT_MEM)
+        FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG2));
     else if (src & SLJIT_IMM)
-        FAIL_IF(load_immediate(compiler, TMP_REG3, srcw));
-    return push_inst(compiler, BLX | RM(TMP_REG3));
+        FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+
+    return push_inst(compiler, BX | RM(TMP_REG1));
 }


 /* --------------------------------------------------------------------- */
@@ -2403,7 +2132,6 @@
     struct sljit_jump *jump;


     CHECK_ERROR_PTR();
-    CHECK_DYN_CODE_MOD(type & SLJIT_REWRITABLE_JUMP);
     CHECK_PTR(check_sljit_emit_jump(compiler, type));


     jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
@@ -2416,7 +2144,7 @@
     if (type >= SLJIT_FAST_CALL)
         PTR_FAIL_IF(prepare_blx(compiler));
     PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0,
-        type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0));
+        type <= SLJIT_JUMP ? TMP_PC : TMP_REG2, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0));


     if (jump->flags & SLJIT_REWRITABLE_JUMP) {
         jump->addr = compiler->size;
@@ -2433,8 +2161,8 @@
 #else
     if (type >= SLJIT_FAST_CALL)
         jump->flags |= IS_BL;
-    PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
-    PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(type)));
+    PTR_FAIL_IF(emit_imm(compiler, TMP_REG2, 0));
+    PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2)) & ~COND_MASK) | get_cc(type)));
     jump->addr = compiler->size;
 #endif
     return jump;
@@ -2454,7 +2182,7 @@
             return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src));


         SLJIT_ASSERT(src & SLJIT_MEM);
-        FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw));
+        FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG2));
         return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2));
     }


@@ -2466,12 +2194,12 @@
 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
     if (type >= SLJIT_FAST_CALL)
         FAIL_IF(prepare_blx(compiler));
-    FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0));
+    FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, type <= SLJIT_JUMP ? TMP_PC : TMP_REG2, TMP_PC, 0), 0));
     if (type >= SLJIT_FAST_CALL)
         FAIL_IF(emit_blx(compiler));
 #else
-    FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
-    FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)));
+    FAIL_IF(emit_imm(compiler, TMP_REG2, 0));
+    FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2)));
 #endif
     jump->addr = compiler->size;
     return SLJIT_SUCCESS;
@@ -2482,7 +2210,7 @@
     sljit_s32 src, sljit_sw srcw,
     sljit_s32 type)
 {
-    sljit_s32 dst_r, flags = GET_ALL_FLAGS(op);
+    sljit_s32 dst_reg, flags = GET_ALL_FLAGS(op);
     sljit_uw cc, ins;


     CHECK_ERROR();
@@ -2495,12 +2223,12 @@


     op = GET_OPCODE(op);
     cc = get_cc(type & 0xff);
-    dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
+    dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2;


     if (op < SLJIT_ADD) {
-        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 0)));
-        FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc));
-        return (dst_r == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS;
+        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_reg, SLJIT_UNUSED, SRC2_IMM | 0)));
+        FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_reg, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc));
+        return (dst_reg == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw, TMP_REG1) : SLJIT_SUCCESS;
     }


     ins = (op == SLJIT_AND ? AND_DP : (op == SLJIT_OR ? ORR_DP : EOR_DP));
@@ -2507,27 +2235,23 @@
     if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) {
         FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc));
         /* The condition must always be set, even if the ORR/EOR is not executed above. */
-        return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst))) : SLJIT_SUCCESS;
+        return (flags & SLJIT_SET_Z) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst))) : SLJIT_SUCCESS;
     }


-    compiler->cache_arg = 0;
-    compiler->cache_argw = 0;
     if (src & SLJIT_MEM) {
-        FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
+        FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
         src = TMP_REG1;
-        srcw = 0;
     } else if (src & SLJIT_IMM) {
         FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
         src = TMP_REG1;
-        srcw = 0;
     }


-    FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 1) & ~COND_MASK) | cc));
-    FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
-    if (dst_r == TMP_REG2)
-        FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0));
+    FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_reg, src, SRC2_IMM | 1) & ~COND_MASK) | cc));
+    FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_reg, src, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
+    if (dst_reg == TMP_REG2)
+        FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw, TMP_REG1));


-    return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst_r))) : SLJIT_SUCCESS;
+    return (flags & SLJIT_SET_Z) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG2, SLJIT_UNUSED, RM(dst_reg))) : SLJIT_SUCCESS;
 }


 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
@@ -2536,7 +2260,6 @@
     sljit_s32 reg;


     CHECK_ERROR_PTR();
-    CHECK_DYN_CODE_MOD(1);
     CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
     ADJUST_LOCAL_OFFSET(dst, dstw);


@@ -2554,16 +2277,16 @@
     set_const(const_, compiler);


     if (dst & SLJIT_MEM)
-        PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
+        PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw, TMP_REG1));
     return const_;
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
 {
-    inline_set_jump_addr(addr, new_addr, 1);
+    inline_set_jump_addr(addr, executable_offset, new_target, 1);
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
 {
-    inline_set_const(addr, new_constant, 1);
+    inline_set_const(addr, executable_offset, new_constant, 1);
 }


Modified: code/trunk/sljit/sljitNativeARM_64.c
===================================================================
--- code/trunk/sljit/sljitNativeARM_64.c    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/sljit/sljitNativeARM_64.c    2017-01-23 07:41:37 UTC (rev 1676)
@@ -1,7 +1,7 @@
 /*
  *    Stack-less Just-In-Time compiler
  *
- *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *    Copyright Zoltan Herczeg (hzmester@???). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are
  * permitted provided that the following conditions are met:
@@ -151,7 +151,7 @@
     inst[3] = MOVK | dst | ((new_imm >> 48) << 5) | (3 << 21);
 }


-static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
+static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
 {
     sljit_sw diff;
     sljit_uw target_addr;
@@ -165,10 +165,11 @@
         target_addr = jump->u.target;
     else {
         SLJIT_ASSERT(jump->flags & JUMP_LABEL);
-        target_addr = (sljit_uw)(code + jump->u.label->size);
+        target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
     }
-    diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4);


+    diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4) - executable_offset;
+
     if (jump->flags & IS_COND) {
         diff += sizeof(sljit_ins);
         if (diff <= 0xfffff && diff >= -0x100000) {
@@ -211,6 +212,7 @@
     sljit_ins *buf_ptr;
     sljit_ins *buf_end;
     sljit_uw word_count;
+    sljit_sw executable_offset;
     sljit_uw addr;
     sljit_s32 dst;


@@ -228,6 +230,8 @@

     code_ptr = code;
     word_count = 0;
+    executable_offset = SLJIT_EXEC_OFFSET(code);
+
     label = compiler->labels;
     jump = compiler->jumps;
     const_ = compiler->consts;
@@ -242,13 +246,13 @@
             SLJIT_ASSERT(!jump || jump->addr >= word_count);
             SLJIT_ASSERT(!const_ || const_->addr >= word_count);
             if (label && label->size == word_count) {
-                label->addr = (sljit_uw)code_ptr;
+                label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
                 label->size = code_ptr - code;
                 label = label->next;
             }
             if (jump && jump->addr == word_count) {
                     jump->addr = (sljit_uw)(code_ptr - 4);
-                    code_ptr -= detect_jump_type(jump, code_ptr, code);
+                    code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset);
                     jump = jump->next;
             }
             if (const_ && const_->addr == word_count) {
@@ -263,7 +267,7 @@
     } while (buf);


     if (label && label->size == word_count) {
-        label->addr = (sljit_uw)code_ptr;
+        label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
         label->size = code_ptr - code;
         label = label->next;
     }
@@ -277,9 +281,10 @@
     while (jump) {
         do {
             addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
-            buf_ptr = (sljit_ins*)jump->addr;
+            buf_ptr = (sljit_ins *)jump->addr;
+
             if (jump->flags & PATCH_B) {
-                addr = (sljit_sw)(addr - jump->addr) >> 2;
+                addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
                 SLJIT_ASSERT((sljit_sw)addr <= 0x1ffffff && (sljit_sw)addr >= -0x2000000);
                 buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (addr & 0x3ffffff);
                 if (jump->flags & IS_COND)
@@ -287,7 +292,7 @@
                 break;
             }
             if (jump->flags & PATCH_COND) {
-                addr = (sljit_sw)(addr - jump->addr) >> 2;
+                addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
                 SLJIT_ASSERT((sljit_sw)addr <= 0x3ffff && (sljit_sw)addr >= -0x40000);
                 buf_ptr[0] = (buf_ptr[0] & ~0xffffe0) | ((addr & 0x7ffff) << 5);
                 break;
@@ -308,8 +313,12 @@
     }


     compiler->error = SLJIT_ERR_COMPILED;
+    compiler->executable_offset = executable_offset;
     compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
-    SLJIT_ENABLE_EXEC(code, code_ptr);
+
+    code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
+    code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+
     SLJIT_CACHE_FLUSH(code, code_ptr);
     return code;
 }
@@ -366,7 +375,7 @@
     uimm = (sljit_uw)imm;
     while (1) {
         if (len <= 0) {
-            SLJIT_ASSERT_STOP();
+            SLJIT_UNREACHABLE();
             return 0;
         }
         mask = ((sljit_uw)1 << len) - 1;
@@ -636,7 +645,7 @@
             }
             goto set_flags;
         default:
-            SLJIT_ASSERT_STOP();
+            SLJIT_UNREACHABLE();
             break;
         }


@@ -756,7 +765,7 @@
         goto set_flags;
     }


-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return SLJIT_SUCCESS;


 set_flags:
@@ -927,7 +936,9 @@
         other_r = OFFS_REG(arg);
         if (!other_r) {
             other_r = arg & REG_MASK;
-            if (other_r != reg && argw >= 0 && argw <= 0xffffff) {
+            SLJIT_ASSERT(other_r != reg);
+
+            if (argw >= 0 && argw <= 0xffffff) {
                 if ((argw & 0xfff) != 0)
                     FAIL_IF(push_inst(compiler, ADDI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10)));
                 if (argw >> 12)
@@ -934,7 +945,7 @@
                     FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(other_r) | RN(other_r) | ((argw >> 12) << 10)));
                 return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(other_r));
             }
-            else if (other_r != reg && argw < 0 && argw >= -0xffffff) {
+            else if (argw < 0 && argw >= -0xffffff) {
                 argw = -argw;
                 if ((argw & 0xfff) != 0)
                     FAIL_IF(push_inst(compiler, SUBI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10)));
@@ -967,18 +978,8 @@


         /* No caching here. */
         arg &= REG_MASK;
-        argw &= 0x3;
-        if (!argw || argw == shift) {
-            FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(other_r) | (argw ? (1 << 12) : 0)));
-            return push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r) | (argw << 10));
-        }
-        if (arg != reg) {
-            FAIL_IF(push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r) | (argw << 10)));
-            return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg));
-        }
-        FAIL_IF(push_inst(compiler, ADD | RD(TMP_LR) | RN(arg) | RM(other_r) | (argw << 10)));
-        FAIL_IF(push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(TMP_LR)));
-        return push_inst(compiler, ORR | RD(arg) | RN(TMP_ZERO) | RM(TMP_LR));
+        FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(other_r)));
+        return push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r));
     }


     if (arg & OFFS_REG_MASK) {
@@ -1365,7 +1366,7 @@
                 srcw = (sljit_s32)srcw;
             break;
         default:
-            SLJIT_ASSERT_STOP();
+            SLJIT_UNREACHABLE();
             flags = 0;
             break;
         }
@@ -1392,7 +1393,7 @@
         return SLJIT_SUCCESS;
     }


-    flags = GET_FLAGS(op_flags) ? SET_FLAGS : 0;
+    flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0;
     mem_flags = WORD_SIZE;
     if (op_flags & SLJIT_I32_OP) {
         flags |= INT_OP;
@@ -1445,7 +1446,7 @@
     compiler->cache_argw = 0;


     dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
-    flags = GET_FLAGS(op) ? SET_FLAGS : 0;
+    flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
     mem_flags = WORD_SIZE;
     if (op & SLJIT_I32_OP) {
         flags |= INT_OP;
@@ -1857,7 +1858,7 @@
         return 0x6;


     default:
-        SLJIT_ASSERT_STOP();
+        SLJIT_UNREACHABLE();
         return 0xe;
     }
 }
@@ -1883,7 +1884,6 @@
     struct sljit_jump *jump;


     CHECK_ERROR_PTR();
-    CHECK_DYN_CODE_MOD(type & SLJIT_REWRITABLE_JUMP);
     CHECK_PTR(check_sljit_emit_jump(compiler, type));


     jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
@@ -1994,7 +1994,7 @@


     compiler->cache_arg = 0;
     compiler->cache_argw = 0;
-    flags = GET_FLAGS(op) ? SET_FLAGS : 0;
+    flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
     mem_flags = WORD_SIZE;
     if (op & SLJIT_I32_OP) {
         flags |= INT_OP;
@@ -2022,7 +2022,6 @@
     sljit_s32 dst_r;


     CHECK_ERROR_PTR();
-    CHECK_DYN_CODE_MOD(1);
     CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
     ADJUST_LOCAL_OFFSET(dst, dstw);


@@ -2038,16 +2037,18 @@
     return const_;
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
 {
     sljit_ins* inst = (sljit_ins*)addr;
-    modify_imm64_const(inst, new_addr);
+    modify_imm64_const(inst, new_target);
+    inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
     SLJIT_CACHE_FLUSH(inst, inst + 4);
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
 {
     sljit_ins* inst = (sljit_ins*)addr;
     modify_imm64_const(inst, new_constant);
+    inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
     SLJIT_CACHE_FLUSH(inst, inst + 4);
 }


Modified: code/trunk/sljit/sljitNativeARM_T2_32.c
===================================================================
--- code/trunk/sljit/sljitNativeARM_T2_32.c    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/sljit/sljitNativeARM_T2_32.c    2017-01-23 07:41:37 UTC (rev 1676)
@@ -1,7 +1,7 @@
 /*
  *    Stack-less Just-In-Time compiler
  *
- *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *    Copyright Zoltan Herczeg (hzmester@???). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are
  * permitted provided that the following conditions are met:
@@ -35,15 +35,14 @@
 /* Last register + 1. */
 #define TMP_REG1    (SLJIT_NUMBER_OF_REGISTERS + 2)
 #define TMP_REG2    (SLJIT_NUMBER_OF_REGISTERS + 3)
-#define TMP_REG3    (SLJIT_NUMBER_OF_REGISTERS + 4)
-#define TMP_PC        (SLJIT_NUMBER_OF_REGISTERS + 5)
+#define TMP_PC        (SLJIT_NUMBER_OF_REGISTERS + 4)


 #define TMP_FREG1    (0)
 #define TMP_FREG2    (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)


 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
-static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
-    0, 0, 1, 2, 12, 11, 10, 9, 8, 7, 6, 5, 13, 3, 4, 14, 15
+static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
+    0, 0, 1, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 13, 3, 14, 15
 };


 #define COPY_BITS(src, from, to, bits) \
@@ -221,7 +220,7 @@
     inst[3] = dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16);
 }


-static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code)
+static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset)
 {
     sljit_sw diff;


@@ -232,7 +231,7 @@
         /* Branch to ARM code is not optimized yet. */
         if (!(jump->u.target & 0x1))
             return 0;
-        diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2)) >> 1;
+        diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset) >> 1;
     }
     else {
         SLJIT_ASSERT(jump->flags & JUMP_LABEL);
@@ -276,7 +275,7 @@
     return 0;
 }


-static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump)
+static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump, sljit_sw executable_offset)
 {
     sljit_s32 type = (jump->flags >> 4) & 0xf;
     sljit_sw diff;
@@ -290,10 +289,12 @@


     if (jump->flags & JUMP_ADDR) {
         SLJIT_ASSERT(jump->u.target & 0x1);
-        diff = ((sljit_sw)jump->u.target - (sljit_sw)(jump->addr + 4)) >> 1;
+        diff = ((sljit_sw)jump->u.target - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1;
     }
-    else
-        diff = ((sljit_sw)(jump->u.label->addr) - (sljit_sw)(jump->addr + 4)) >> 1;
+    else {
+        SLJIT_ASSERT(jump->u.label->addr & 0x1);
+        diff = ((sljit_sw)(jump->u.label->addr) - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1;
+    }
     jump_inst = (sljit_u16*)jump->addr;


     switch (type) {
@@ -336,7 +337,7 @@
     else if (type == 6) /* Encoding T1 of 'BL' instruction */
         jump_inst[1] |= 0xd000;
     else
-        SLJIT_ASSERT_STOP();
+        SLJIT_UNREACHABLE();
 }


 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
@@ -347,6 +348,7 @@
     sljit_u16 *buf_ptr;
     sljit_u16 *buf_end;
     sljit_uw half_count;
+    sljit_sw executable_offset;


     struct sljit_label *label;
     struct sljit_jump *jump;
@@ -362,6 +364,8 @@


     code_ptr = code;
     half_count = 0;
+    executable_offset = SLJIT_EXEC_OFFSET(code);
+
     label = compiler->labels;
     jump = compiler->jumps;
     const_ = compiler->consts;
@@ -376,13 +380,13 @@
             SLJIT_ASSERT(!jump || jump->addr >= half_count);
             SLJIT_ASSERT(!const_ || const_->addr >= half_count);
             if (label && label->size == half_count) {
-                label->addr = ((sljit_uw)code_ptr) | 0x1;
+                label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1;
                 label->size = code_ptr - code;
                 label = label->next;
             }
             if (jump && jump->addr == half_count) {
                     jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8);
-                    code_ptr -= detect_jump_type(jump, code_ptr, code);
+                    code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset);
                     jump = jump->next;
             }
             if (const_ && const_->addr == half_count) {
@@ -397,7 +401,7 @@
     } while (buf);


     if (label && label->size == half_count) {
-        label->addr = ((sljit_uw)code_ptr) | 0x1;
+        label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1;
         label->size = code_ptr - code;
         label = label->next;
     }
@@ -409,13 +413,17 @@


     jump = compiler->jumps;
     while (jump) {
-        set_jump_instruction(jump);
+        set_jump_instruction(jump, executable_offset);
         jump = jump->next;
     }


     compiler->error = SLJIT_ERR_COMPILED;
+    compiler->executable_offset = executable_offset;
     compiler->executable_size = (code_ptr - code) * sizeof(sljit_u16);
-    SLJIT_ENABLE_EXEC(code, code_ptr);
+
+    code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
+    code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+
     SLJIT_CACHE_FLUSH(code, code_ptr);
     /* Set thumb mode flag. */
     return (void*)((sljit_uw)code | 0x1);
@@ -501,24 +509,20 @@


 #define ARG1_IMM    0x0010000
 #define ARG2_IMM    0x0020000
-#define KEEP_FLAGS    0x0040000
 /* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */
 #define SET_FLAGS    0x0100000
 #define UNUSED_RETURN    0x0200000
-#define SLOW_DEST    0x0400000
-#define SLOW_SRC1    0x0800000
-#define SLOW_SRC2    0x1000000


 static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_uw arg1, sljit_uw arg2)
 {
     /* dst must be register, TMP_REG1
-       arg1 must be register, TMP_REG1, imm
-       arg2 must be register, TMP_REG2, imm */
+       arg1 must be register, imm
+       arg2 must be register, imm */
     sljit_s32 reg;
     sljit_uw imm, nimm;


     if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) {
-        /* Both are immediates. */
+        /* Both are immediates, no temporaries are used. */
         flags &= ~ARG1_IMM;
         FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
         arg1 = TMP_REG1;
@@ -534,7 +538,7 @@
             /* No form with immediate operand. */
             break;
         case SLJIT_MOV:
-            SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1);
+            SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG2);
             return load_immediate(compiler, dst, imm);
         case SLJIT_NOT:
             if (!(flags & SET_FLAGS))
@@ -544,7 +548,7 @@
             break;
         case SLJIT_ADD:
             nimm = -imm;
-            if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) {
+            if (IS_2_LO_REGS(reg, dst)) {
                 if (imm <= 0x7)
                     return push_inst16(compiler, ADDSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
                 if (nimm <= 0x7)
@@ -573,7 +577,7 @@
             break;
         case SLJIT_SUB:
             if (flags & ARG1_IMM) {
-                if (!(flags & KEEP_FLAGS) && imm == 0 && IS_2_LO_REGS(reg, dst))
+                if (imm == 0 && IS_2_LO_REGS(reg, dst))
                     return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg));
                 imm = get_imm(imm);
                 if (imm != INVALID_IMM)
@@ -581,7 +585,7 @@
                 break;
             }
             nimm = -imm;
-            if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) {
+            if (IS_2_LO_REGS(reg, dst)) {
                 if (imm <= 0x7)
                     return push_inst16(compiler, SUBSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
                 if (nimm <= 0x7)
@@ -648,31 +652,35 @@
             }
             switch (flags & 0xffff) {
             case SLJIT_SHL:
-                if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg))
+                if (IS_2_LO_REGS(dst, reg))
                     return push_inst16(compiler, LSLSI | RD3(dst) | RN3(reg) | (imm << 6));
                 return push_inst32(compiler, LSL_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
             case SLJIT_LSHR:
-                if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg))
+                if (IS_2_LO_REGS(dst, reg))
                     return push_inst16(compiler, LSRSI | RD3(dst) | RN3(reg) | (imm << 6));
                 return push_inst32(compiler, LSR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
             default: /* SLJIT_ASHR */
-                if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg))
+                if (IS_2_LO_REGS(dst, reg))
                     return push_inst16(compiler, ASRSI | RD3(dst) | RN3(reg) | (imm << 6));
                 return push_inst32(compiler, ASR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
             }
         default:
-            SLJIT_ASSERT_STOP();
+            SLJIT_UNREACHABLE();
             break;
         }


         if (flags & ARG2_IMM) {
-            FAIL_IF(load_immediate(compiler, TMP_REG2, arg2));
-            arg2 = TMP_REG2;
+            imm = arg2;
+            arg2 = (arg1 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
+            FAIL_IF(load_immediate(compiler, arg2, imm));
         }
         else {
-            FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
-            arg1 = TMP_REG1;
+            imm = arg1;
+            arg1 = (arg2 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
+            FAIL_IF(load_immediate(compiler, arg1, imm));
         }
+
+        SLJIT_ASSERT(arg1 != arg2);
     }


     /* Both arguments are registers. */
@@ -685,41 +693,41 @@
     case SLJIT_MOVU_U32:
     case SLJIT_MOVU_S32:
     case SLJIT_MOVU_P:
-        SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+        SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
         if (dst == arg2)
             return SLJIT_SUCCESS;
         return push_inst16(compiler, MOV | SET_REGS44(dst, arg2));
     case SLJIT_MOV_U8:
     case SLJIT_MOVU_U8:
-        SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+        SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
         if (IS_2_LO_REGS(dst, arg2))
             return push_inst16(compiler, UXTB | RD3(dst) | RN3(arg2));
         return push_inst32(compiler, UXTB_W | RD4(dst) | RM4(arg2));
     case SLJIT_MOV_S8:
     case SLJIT_MOVU_S8:
-        SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+        SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
         if (IS_2_LO_REGS(dst, arg2))
             return push_inst16(compiler, SXTB | RD3(dst) | RN3(arg2));
         return push_inst32(compiler, SXTB_W | RD4(dst) | RM4(arg2));
     case SLJIT_MOV_U16:
     case SLJIT_MOVU_U16:
-        SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+        SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
         if (IS_2_LO_REGS(dst, arg2))
             return push_inst16(compiler, UXTH | RD3(dst) | RN3(arg2));
         return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(arg2));
     case SLJIT_MOV_S16:
     case SLJIT_MOVU_S16:
-        SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+        SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
         if (IS_2_LO_REGS(dst, arg2))
             return push_inst16(compiler, SXTH | RD3(dst) | RN3(arg2));
         return push_inst32(compiler, SXTH_W | RD4(dst) | RM4(arg2));
     case SLJIT_NOT:
-        SLJIT_ASSERT(arg1 == TMP_REG1);
-        if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+        SLJIT_ASSERT(arg1 == TMP_REG2);
+        if (IS_2_LO_REGS(dst, arg2))
             return push_inst16(compiler, MVNS | RD3(dst) | RN3(arg2));
         return push_inst32(compiler, MVN_W | (flags & SET_FLAGS) | RD4(dst) | RM4(arg2));
     case SLJIT_CLZ:
-        SLJIT_ASSERT(arg1 == TMP_REG1);
+        SLJIT_ASSERT(arg1 == TMP_REG2);
         FAIL_IF(push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2)));
         if (flags & SET_FLAGS) {
             if (reg_map[dst] <= 7)
@@ -728,61 +736,59 @@
         }
         return SLJIT_SUCCESS;
     case SLJIT_ADD:
-        if (!(flags & KEEP_FLAGS) && IS_3_LO_REGS(dst, arg1, arg2))
+        if (IS_3_LO_REGS(dst, arg1, arg2))
             return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2));
         if (dst == arg1 && !(flags & SET_FLAGS))
             return push_inst16(compiler, ADD | SET_REGS44(dst, arg2));
         return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
     case SLJIT_ADDC:
-        if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+        if (dst == arg1 && IS_2_LO_REGS(dst, arg2))
             return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2));
         return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
     case SLJIT_SUB:
-        if (!(flags & KEEP_FLAGS) && IS_3_LO_REGS(dst, arg1, arg2))
+        if (IS_3_LO_REGS(dst, arg1, arg2))
             return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2));
         return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
     case SLJIT_SUBC:
-        if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+        if (dst == arg1 && IS_2_LO_REGS(dst, arg2))
             return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2));
         return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
     case SLJIT_MUL:
         if (!(flags & SET_FLAGS))
             return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2));
-        SLJIT_ASSERT(reg_map[TMP_REG2] <= 7 && dst != TMP_REG2);
+        SLJIT_ASSERT(dst != TMP_REG2);
         FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(TMP_REG2) | RN4(arg1) | RM4(arg2)));
         /* cmp TMP_REG2, dst asr #31. */
         return push_inst32(compiler, CMP_W | RN4(TMP_REG2) | 0x70e0 | RM4(dst));
     case SLJIT_AND:
-        if (!(flags & KEEP_FLAGS)) {
-            if (dst == arg1 && IS_2_LO_REGS(dst, arg2))
-                return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2));
-            if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2))
-                return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2));
-        }
+        if (dst == arg1 && IS_2_LO_REGS(dst, arg2))
+            return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2));
+        if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2))
+            return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2));
         return push_inst32(compiler, AND_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
     case SLJIT_OR:
-        if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+        if (dst == arg1 && IS_2_LO_REGS(dst, arg2))
             return push_inst16(compiler, ORRS | RD3(dst) | RN3(arg2));
         return push_inst32(compiler, ORR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
     case SLJIT_XOR:
-        if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+        if (dst == arg1 && IS_2_LO_REGS(dst, arg2))
             return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2));
         return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
     case SLJIT_SHL:
-        if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+        if (dst == arg1 && IS_2_LO_REGS(dst, arg2))
             return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2));
         return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
     case SLJIT_LSHR:
-        if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+        if (dst == arg1 && IS_2_LO_REGS(dst, arg2))
             return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2));
         return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
     case SLJIT_ASHR:
-        if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+        if (dst == arg1 && IS_2_LO_REGS(dst, arg2))
             return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2));
         return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
     }


-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return SLJIT_SUCCESS;
 }


@@ -794,7 +800,6 @@
 #define HALF_SIZE    0x08


 #define UPDATE        0x10
-#define ARG_TEST    0x20


 #define IS_WORD_SIZE(flags)        (!(flags & (BYTE_SIZE | HALF_SIZE)))
 #define OFFSET_CHECK(imm, shift)    (!(argw & ~(imm << shift)))
@@ -888,20 +893,61 @@
     return SLJIT_ERR_UNSUPPORTED;
 }


-/* Can perform an operation using at most 1 instruction. */
-static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
+static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
+    sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
 {
-    sljit_s32 other_r, shift;
+    sljit_s32 other_r;
+    sljit_s32 update = flags & UPDATE;
+    sljit_uw tmp;


     SLJIT_ASSERT(arg & SLJIT_MEM);
+    SLJIT_ASSERT((arg & REG_MASK) != tmp_reg);
+    flags &= ~UPDATE;
+    arg &= ~SLJIT_MEM;


-    if (SLJIT_UNLIKELY(flags & UPDATE)) {
-        if ((arg & REG_MASK) && !(arg & OFFS_REG_MASK) && argw <= 0xff && argw >= -0xff) {
-            if (SLJIT_UNLIKELY(flags & ARG_TEST))
-                return 1;
+    if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
+        FAIL_IF(load_immediate(compiler, tmp_reg, argw));
+        if (IS_2_LO_REGS(reg, tmp_reg) && sljit_mem16_imm5[flags])
+            return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(tmp_reg));
+        return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg));
+    }


-            flags &= ~UPDATE;
+    if (SLJIT_UNLIKELY(update)) {
+        SLJIT_ASSERT(reg != arg);
+
+        if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
+            other_r = OFFS_REG(arg);
             arg &= 0xf;
+
+            if (IS_3_LO_REGS(reg, arg, other_r))
+                FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r)));
+            else
+                FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r)));
+            return push_inst16(compiler, ADD | SET_REGS44(arg, other_r));
+        }
+
+        if (argw > 0xff) {
+            tmp = get_imm(argw & ~0xff);
+            if (tmp != INVALID_IMM) {
+                push_inst32(compiler, ADD_WI | RD4(arg) | RN4(arg) | tmp);
+                argw = argw & 0xff;
+            }
+        }
+        else if (argw < -0xff) {
+            tmp = get_imm(-argw & ~0xff);
+            if (tmp != INVALID_IMM) {
+                push_inst32(compiler, SUB_WI | RD4(arg) | RN4(arg) | tmp);
+                argw = -(-argw & 0xff);
+            }
+        }
+
+        if (argw == 0) {
+            if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags])
+                return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg));
+            return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg));
+        }
+
+        if (argw <= 0xff && argw >= -0xff) {
             if (argw >= 0)
                 argw |= 0x200;
             else {
@@ -909,221 +955,85 @@
             }


             SLJIT_ASSERT(argw >= 0 && (argw & 0xff) <= 0xff);
-            FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | 0x100 | argw));
-            return -1;
+            return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | 0x100 | argw);
         }
-        return 0;
+
+        FAIL_IF(load_immediate(compiler, tmp_reg, argw));
+
+        SLJIT_ASSERT(reg != tmp_reg);
+
+        if (IS_3_LO_REGS(reg, arg, tmp_reg))
+            FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp_reg)));
+        else
+            FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp_reg)));
+        return push_inst16(compiler, ADD | SET_REGS44(arg, tmp_reg));
     }


     if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
-        if (SLJIT_UNLIKELY(flags & ARG_TEST))
-            return 1;
-
         argw &= 0x3;
         other_r = OFFS_REG(arg);
         arg &= 0xf;


         if (!argw && IS_3_LO_REGS(reg, arg, other_r))
-            FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r)));
-        else
-            FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | (argw << 4)));
-        return -1;
+            return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r));
+        return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | (argw << 4));
     }


-    if (!(arg & REG_MASK) || argw > 0xfff || argw < -0xff)
-        return 0;
+    if (argw > 0xfff) {
+        tmp = get_imm(argw & ~0xfff);
+        if (tmp != INVALID_IMM) {
+            push_inst32(compiler, ADD_WI | RD4(tmp_reg) | RN4(arg) | tmp);
+            arg = tmp_reg;
+            argw = argw & 0xfff;
+        }
+    }
+    else if (argw < -0xff) {
+        tmp = get_imm(-argw & ~0xff);
+        if (tmp != INVALID_IMM) {
+            push_inst32(compiler, SUB_WI | RD4(tmp_reg) | RN4(arg) | tmp);
+            arg = tmp_reg;
+            argw = -(-argw & 0xff);
+        }
+    }


-    if (SLJIT_UNLIKELY(flags & ARG_TEST))
-        return 1;
-
-    arg &= 0xf;
     if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags]) {
-        shift = 3;
+        tmp = 3;
         if (IS_WORD_SIZE(flags)) {
             if (OFFSET_CHECK(0x1f, 2))
-                shift = 2;
+                tmp = 2;
         }
         else if (flags & BYTE_SIZE)
         {
             if (OFFSET_CHECK(0x1f, 0))
-                shift = 0;
+                tmp = 0;
         }
         else {
             SLJIT_ASSERT(flags & HALF_SIZE);
             if (OFFSET_CHECK(0x1f, 1))
-                shift = 1;
+                tmp = 1;
         }


-        if (shift != 3) {
-            FAIL_IF(push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | (argw << (6 - shift))));
-            return -1;
-        }
+        if (tmp < 3)
+            return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | (argw << (6 - tmp)));
     }
-
-    /* SP based immediate. */
-    if (SLJIT_UNLIKELY(arg == SLJIT_SP) && OFFSET_CHECK(0xff, 2) && IS_WORD_SIZE(flags) && reg_map[reg] <= 7) {
-        FAIL_IF(push_inst16(compiler, STR_SP | ((flags & STORE) ? 0 : 0x800) | RDN3(reg) | (argw >> 2)));
-        return -1;
+    else if (SLJIT_UNLIKELY(arg == SLJIT_SP) && IS_WORD_SIZE(flags) && OFFSET_CHECK(0xff, 2) && reg_map[reg] <= 7) {
+        /* SP based immediate. */
+        return push_inst16(compiler, STR_SP | ((flags & STORE) ? 0 : 0x800) | RDN3(reg) | (argw >> 2));
     }


-    if (argw >= 0)
-        FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw));
-    else
-        FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | -argw));
-    return -1;
-}
+    if (argw >= 0 && argw <= 0xfff)
+        return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw);
+    else if (argw < 0 && argw >= -0xff)
+        return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | -argw);


-/* see getput_arg below.
-   Note: can_cache is called only for binary operators. Those
-   operators always uses word arguments without write back. */
-static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
-{
-    sljit_sw diff;
-    if ((arg & OFFS_REG_MASK) || !(next_arg & SLJIT_MEM))
-        return 0;
+    SLJIT_ASSERT(arg != tmp_reg);


-    if (!(arg & REG_MASK)) {
-        diff = argw - next_argw;
-        if (diff <= 0xfff && diff >= -0xfff)
-            return 1;
-        return 0;
-    }
-
-    if (argw == next_argw)
-        return 1;
-
-    diff = argw - next_argw;
-    if (arg == next_arg && diff <= 0xfff && diff >= -0xfff)
-        return 1;
-
-    return 0;
+    FAIL_IF(load_immediate(compiler, tmp_reg, argw));
+    if (IS_3_LO_REGS(reg, arg, tmp_reg))
+        return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp_reg));
+    return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp_reg));
 }


-/* Emit the necessary instructions. See can_cache above. */
-static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
-    sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
-{
-    sljit_s32 tmp_r, other_r;
-    sljit_sw diff;
-
-    SLJIT_ASSERT(arg & SLJIT_MEM);
-    if (!(next_arg & SLJIT_MEM)) {
-        next_arg = 0;
-        next_argw = 0;
-    }
-
-    tmp_r = (flags & STORE) ? TMP_REG3 : reg;
-
-    if (SLJIT_UNLIKELY((flags & UPDATE) && (arg & REG_MASK))) {
-        /* Update only applies if a base register exists. */
-        /* There is no caching here. */
-        other_r = OFFS_REG(arg);
-        arg &= 0xf;
-        flags &= ~UPDATE;
-
-        if (!other_r) {
-            if (!(argw & ~0xfff)) {
-                FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw));
-                return push_inst32(compiler, ADDWI | RD4(arg) | RN4(arg) | IMM12(argw));
-            }
-
-            if (compiler->cache_arg == SLJIT_MEM) {
-                if (argw == compiler->cache_argw) {
-                    other_r = TMP_REG3;
-                    argw = 0;
-                }
-                else if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
-                    FAIL_IF(compiler->error);
-                    compiler->cache_argw = argw;
-                    other_r = TMP_REG3;
-                    argw = 0;
-                }
-            }
-
-            if (argw) {
-                FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
-                compiler->cache_arg = SLJIT_MEM;
-                compiler->cache_argw = argw;
-                other_r = TMP_REG3;
-                argw = 0;
-            }
-        }
-
-        argw &= 0x3;
-        if (!argw && IS_3_LO_REGS(reg, arg, other_r)) {
-            FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r)));
-            return push_inst16(compiler, ADD | SET_REGS44(arg, other_r));
-        }
-        FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | (argw << 4)));
-        return push_inst32(compiler, ADD_W | RD4(arg) | RN4(arg) | RM4(other_r) | (argw << 6));
-    }
-    flags &= ~UPDATE;
-
-    SLJIT_ASSERT(!(arg & OFFS_REG_MASK));
-
-    if (compiler->cache_arg == arg) {
-        diff = argw - compiler->cache_argw;
-        if (!(diff & ~0xfff))
-            return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | diff);
-        if (!((compiler->cache_argw - argw) & ~0xff))
-            return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(TMP_REG3) | (compiler->cache_argw - argw));
-        if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, diff) != SLJIT_ERR_UNSUPPORTED) {
-            FAIL_IF(compiler->error);
-            return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | 0);
-        }
-    }
-
-    next_arg = (arg & REG_MASK) && (arg == next_arg) && (argw != next_argw);
-    arg &= 0xf;
-    if (arg && compiler->cache_arg == SLJIT_MEM) {
-        if (compiler->cache_argw == argw)
-            return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3));
-        if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
-            FAIL_IF(compiler->error);
-            compiler->cache_argw = argw;
-            return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3));
-        }
-    }
-
-    compiler->cache_argw = argw;
-    if (next_arg && emit_set_delta(compiler, TMP_REG3, arg, argw) != SLJIT_ERR_UNSUPPORTED) {
-        FAIL_IF(compiler->error);
-        compiler->cache_arg = SLJIT_MEM | arg;
-        arg = 0;
-    }
-    else {
-        FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
-        compiler->cache_arg = SLJIT_MEM;
-
-        diff = argw - next_argw;
-        if (next_arg && diff <= 0xfff && diff >= -0xfff) {
-            FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG3, arg)));
-            compiler->cache_arg = SLJIT_MEM | arg;
-            arg = 0;
-        }
-    }
-
-    if (arg)
-        return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3));
-    return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | 0);
-}
-
-static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
-{
-    if (getput_arg_fast(compiler, flags, reg, arg, argw))
-        return compiler->error;
-    compiler->cache_arg = 0;
-    compiler->cache_argw = 0;
-    return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
-}
-
-static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
-{
-    if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
-        return compiler->error;
-    return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
-}
-
 /* --------------------------------------------------------------------- */
 /*  Entry, exit                                                          */
 /* --------------------------------------------------------------------- */
@@ -1133,14 +1043,12 @@
     sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
 {
     sljit_s32 size, i, tmp;
-    sljit_ins push;
+    sljit_ins push = 0;


     CHECK_ERROR();
     CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
     set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);


-    push = (1 << 4);
-
     tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
     for (i = SLJIT_S0; i >= tmp; i--)
         push |= 1 << reg_map[i];
@@ -1153,7 +1061,7 @@
         : push_inst16(compiler, PUSH | (1 << 8) | push));


     /* Stack must be aligned to 8 bytes: (LR, R4) */
-    size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
+    size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
     local_size = ((size + local_size + 7) & ~7) - size;
     compiler->local_size = local_size;
     if (local_size > 0) {
@@ -1183,7 +1091,7 @@
     CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
     set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);


-    size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
+    size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
     compiler->local_size = ((size + local_size + 7) & ~7) - size;
     return SLJIT_SUCCESS;
 }
@@ -1191,7 +1099,7 @@
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
 {
     sljit_s32 i, tmp;
-    sljit_ins pop;
+    sljit_ins pop = 0;


     CHECK_ERROR();
     CHECK(check_sljit_emit_return(compiler, op, src, srcw));
@@ -1205,8 +1113,6 @@
             FAIL_IF(emit_op_imm(compiler, SLJIT_ADD | ARG2_IMM, SLJIT_SP, SLJIT_SP, compiler->local_size));
     }


-    pop = (1 << 4);
-
     tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
     for (i = SLJIT_S0; i >= tmp; i--)
         pop |= 1 << reg_map[i];
@@ -1264,7 +1170,7 @@
     case SLJIT_DIV_UW:
     case SLJIT_DIV_SW:
         SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
-        SLJIT_COMPILE_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 12, bad_register_mapping);
+        SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 12);


         saved_reg_count = 0;
         if (compiler->scratches >= 4)
@@ -1324,9 +1230,6 @@
     ADJUST_LOCAL_OFFSET(dst, dstw);
     ADJUST_LOCAL_OFFSET(src, srcw);


-    compiler->cache_arg = 0;
-    compiler->cache_argw = 0;
-
     dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;


     op = GET_OPCODE(op);
@@ -1385,31 +1288,25 @@
                 srcw = (sljit_s16)srcw;
             break;
         default:
-            SLJIT_ASSERT_STOP();
+            SLJIT_UNREACHABLE();
             flags = 0;
             break;
         }


         if (src & SLJIT_IMM)
-            FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw));
+            FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG2, srcw));
         else if (src & SLJIT_MEM) {
-            if (getput_arg_fast(compiler, flags, dst_r, src, srcw))
-                FAIL_IF(compiler->error);
-            else
-                FAIL_IF(getput_arg(compiler, flags, dst_r, src, srcw, dst, dstw));
+            FAIL_IF(emit_op_mem(compiler, flags, dst_r, src, srcw, ((flags & UPDATE) && dst_r == TMP_REG1) ? TMP_REG2 : TMP_REG1));
         } else {
             if (dst_r != TMP_REG1)
-                return emit_op_imm(compiler, op, dst_r, TMP_REG1, src);
+                return emit_op_imm(compiler, op, dst_r, TMP_REG2, src);
             dst_r = src;
         }


-        if (dst & SLJIT_MEM) {
-            if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw))
-                return compiler->error;
-            else
-                return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0);
-        }
-        return SLJIT_SUCCESS;
+        if (!(dst & SLJIT_MEM))
+            return SLJIT_SUCCESS;
+
+        return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, (dst_r == TMP_REG1) ? TMP_REG2 : TMP_REG1);
     }


     if (op == SLJIT_NEG) {
@@ -1420,29 +1317,22 @@
         return sljit_emit_op2(compiler, SLJIT_SUB | op_flags, dst, dstw, SLJIT_IMM, 0, src, srcw);
     }


-    flags = (GET_FLAGS(op_flags) ? SET_FLAGS : 0) | ((op_flags & SLJIT_KEEP_FLAGS) ? KEEP_FLAGS : 0);
-    if (src & SLJIT_MEM) {
-        if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG2, src, srcw))
-            FAIL_IF(compiler->error);
-        else
-            FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src, srcw, dst, dstw));
-        src = TMP_REG2;
-    }
+    flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0;


     if (src & SLJIT_IMM)
         flags |= ARG2_IMM;
+    else if (src & SLJIT_MEM) {
+        FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
+        srcw = TMP_REG1;
+    }
     else
         srcw = src;


-    emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, srcw);
+    emit_op_imm(compiler, flags | op, dst_r, TMP_REG2, srcw);


-    if (dst & SLJIT_MEM) {
-        if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw))
-            return compiler->error;
-        else
-            return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0);
-    }
-    return SLJIT_SUCCESS;
+    if (!(dst & SLJIT_MEM))
+        return SLJIT_SUCCESS;
+    return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2);
 }


 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
@@ -1450,7 +1340,7 @@
     sljit_s32 src1, sljit_sw src1w,
     sljit_s32 src2, sljit_sw src2w)
 {
-    sljit_s32 dst_r, flags;
+    sljit_s32 dst_reg, flags, src2_reg;


     CHECK_ERROR();
     CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
@@ -1458,54 +1348,25 @@
     ADJUST_LOCAL_OFFSET(src1, src1w);
     ADJUST_LOCAL_OFFSET(src2, src2w);


-    compiler->cache_arg = 0;
-    compiler->cache_argw = 0;
+    dst_reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+    flags = HAS_FLAGS(op) ? SET_FLAGS : 0;


-    dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
-    flags = (GET_FLAGS(op) ? SET_FLAGS : 0) | ((op & SLJIT_KEEP_FLAGS) ? KEEP_FLAGS : 0);
-
-    if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, WORD_SIZE | STORE | ARG_TEST, TMP_REG1, dst, dstw))
-        flags |= SLOW_DEST;
-
-    if (src1 & SLJIT_MEM) {
-        if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG1, src1, src1w))
-            FAIL_IF(compiler->error);
-        else
-            flags |= SLOW_SRC1;
-    }
-    if (src2 & SLJIT_MEM) {
-        if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG2, src2, src2w))
-            FAIL_IF(compiler->error);
-        else
-            flags |= SLOW_SRC2;
-    }
-
-    if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
-        if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
-            FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, src1, src1w));
-            FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, dst, dstw));
-        }
-        else {
-            FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, src2, src2w));
-            FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, dst, dstw));
-        }
-    }
-    else if (flags & SLOW_SRC1)
-        FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, dst, dstw));
-    else if (flags & SLOW_SRC2)
-        FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, dst, dstw));
-
-    if (src1 & SLJIT_MEM)
-        src1 = TMP_REG1;
-    if (src2 & SLJIT_MEM)
-        src2 = TMP_REG2;
-
     if (src1 & SLJIT_IMM)
         flags |= ARG1_IMM;
+    else if (src1 & SLJIT_MEM) {
+        emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1);
+        src1w = TMP_REG1;
+    }
     else
         src1w = src1;
+
     if (src2 & SLJIT_IMM)
         flags |= ARG2_IMM;
+    else if (src2 & SLJIT_MEM) {
+        src2_reg = (!(flags & ARG1_IMM) && (src1w == TMP_REG1)) ? TMP_REG2 : TMP_REG1;
+        emit_op_mem(compiler, WORD_SIZE, src2_reg, src2, src2w, src2_reg);
+        src2w = src2_reg;
+    }
     else
         src2w = src2;


@@ -1512,16 +1373,11 @@
     if (dst == SLJIT_UNUSED)
         flags |= UNUSED_RETURN;


-    emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w);
+    emit_op_imm(compiler, flags | GET_OPCODE(op), dst_reg, src1w, src2w);


-    if (dst & SLJIT_MEM) {
-        if (!(flags & SLOW_DEST)) {
-            getput_arg_fast(compiler, WORD_SIZE | STORE, dst_r, dst, dstw);
-            return compiler->error;
-        }
-        return getput_arg(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, 0, 0);
-    }
-    return SLJIT_SUCCESS;
+    if (!(dst & SLJIT_MEM))
+        return SLJIT_SUCCESS;
+    return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG2);
 }


SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
@@ -1565,7 +1421,6 @@

 static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
 {
-    sljit_sw tmp;
     sljit_uw imm;
     sljit_sw inst = VSTR_F32 | (flags & (SLJIT_F32_OP | FPU_LOAD));


@@ -1573,8 +1428,8 @@

     /* Fast loads and stores. */
     if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
-        FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG2) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | ((argw & 0x3) << 6)));
-        arg = SLJIT_MEM | TMP_REG2;
+        FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | ((argw & 0x3) << 6)));
+        arg = SLJIT_MEM | TMP_REG1;
         argw = 0;
     }


@@ -1585,21 +1440,6 @@
             return push_inst32(compiler, inst | RN4(arg & REG_MASK) | DD4(reg) | (-argw >> 2));
     }


-    /* Slow cases */
-    SLJIT_ASSERT(!(arg & OFFS_REG_MASK));
-    if (compiler->cache_arg == arg) {
-        tmp = argw - compiler->cache_argw;
-        if (!(tmp & ~0x3fc))
-            return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg) | (tmp >> 2));
-        if (!(-tmp & ~0x3fc))
-            return push_inst32(compiler, inst | RN4(TMP_REG3) | DD4(reg) | (-tmp >> 2));
-        if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, tmp) != SLJIT_ERR_UNSUPPORTED) {
-            FAIL_IF(compiler->error);
-            compiler->cache_argw = argw;
-            return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg));
-        }
-    }
-
     if (arg & REG_MASK) {
         if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) {
             FAIL_IF(compiler->error);
@@ -1618,13 +1458,10 @@
         }
     }


-    compiler->cache_arg = arg;
-    compiler->cache_argw = argw;
-
-    FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+    FAIL_IF(load_immediate(compiler, TMP_REG1, argw));
     if (arg & REG_MASK)
-        FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG3, (arg & REG_MASK))));
-    return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg));
+        FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, (arg & REG_MASK))));
+    return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg));
 }


 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
@@ -1631,6 +1468,8 @@
     sljit_s32 dst, sljit_sw dstw,
     sljit_s32 src, sljit_sw srcw)
 {
+    op ^= SLJIT_F32_OP;
+
     if (src & SLJIT_MEM) {
         FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src, srcw));
         src = TMP_FREG1;
@@ -1654,6 +1493,8 @@
 {
     sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;


+    op ^= SLJIT_F32_OP;
+
     if (FAST_IS_REG(src))
         FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | DN4(TMP_FREG1)));
     else if (src & SLJIT_MEM) {
@@ -1676,6 +1517,8 @@
     sljit_s32 src1, sljit_sw src1w,
     sljit_s32 src2, sljit_sw src2w)
 {
+    op ^= SLJIT_F32_OP;
+
     if (src1 & SLJIT_MEM) {
         emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w);
         src1 = TMP_FREG1;
@@ -1697,10 +1540,6 @@
     sljit_s32 dst_r;


     CHECK_ERROR();
-    compiler->cache_arg = 0;
-    compiler->cache_argw = 0;
-    if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
-        op ^= SLJIT_F32_OP;


     SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100), float_transfer_bit_error);
     SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
@@ -1707,6 +1546,9 @@


     dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;


+    if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
+        op ^= SLJIT_F32_OP;
+
     if (src & SLJIT_MEM) {
         emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, dst_r, src, srcw);
         src = dst_r;
@@ -1751,8 +1593,6 @@
     ADJUST_LOCAL_OFFSET(src1, src1w);
     ADJUST_LOCAL_OFFSET(src2, src2w);


-    compiler->cache_arg = 0;
-    compiler->cache_argw = 0;
     op ^= SLJIT_F32_OP;


     dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
@@ -1797,21 +1637,17 @@
     CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
     ADJUST_LOCAL_OFFSET(dst, dstw);


+    SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
+
     /* For UNUSED dst. Uncommon, but possible. */
     if (dst == SLJIT_UNUSED)
         return SLJIT_SUCCESS;


     if (FAST_IS_REG(dst))
-        return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG3));
+        return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG2));


     /* Memory. */
-    if (getput_arg_fast(compiler, WORD_SIZE | STORE, TMP_REG3, dst, dstw))
-        return compiler->error;
-    /* TMP_REG3 is used for caching. */
-    FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, TMP_REG3)));
-    compiler->cache_arg = 0;
-    compiler->cache_argw = 0;
-    return getput_arg(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, 0, 0);
+    return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, TMP_REG1);
 }


 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
@@ -1820,21 +1656,16 @@
     CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
     ADJUST_LOCAL_OFFSET(src, srcw);


+    SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
+
     if (FAST_IS_REG(src))
-        FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, src)));
+        FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, src)));
     else if (src & SLJIT_MEM) {
-        if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG3, src, srcw))
-            FAIL_IF(compiler->error);
-        else {
-            compiler->cache_arg = 0;
-            compiler->cache_argw = 0;
-            FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src, srcw, 0, 0));
-            FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, TMP_REG2)));
-        }
+        FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src, srcw, TMP_REG2));
     }
     else if (src & SLJIT_IMM)
-        FAIL_IF(load_immediate(compiler, TMP_REG3, srcw));
-    return push_inst16(compiler, BLX | RN3(TMP_REG3));
+        FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
+    return push_inst16(compiler, BX | RN3(TMP_REG2));
 }


 /* --------------------------------------------------------------------- */
@@ -1891,7 +1722,7 @@
         return 0x7;


     default: /* SLJIT_JUMP */
-        SLJIT_ASSERT_STOP();
+        SLJIT_UNREACHABLE();
         return 0xe;
     }
 }
@@ -1918,7 +1749,6 @@
     sljit_ins cc;


     CHECK_ERROR_PTR();
-    CHECK_DYN_CODE_MOD(type & SLJIT_REWRITABLE_JUMP);
     CHECK_PTR(check_sljit_emit_jump(compiler, type));


     jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
@@ -1959,7 +1789,7 @@
         if (FAST_IS_REG(src))
             return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src));


-        FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw));
+        FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw, TMP_REG1));
         if (type >= SLJIT_FAST_CALL)
             return push_inst16(compiler, BLX | RN3(TMP_REG1));
     }
@@ -1992,7 +1822,7 @@


     op = GET_OPCODE(op);
     cc = get_cc(type & 0xff);
-    dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
+    dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;


     if (op < SLJIT_ADD) {
         FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
@@ -2003,17 +1833,18 @@
             FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 1));
             FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 0));
         }
-        if (dst_r != TMP_REG2)
+        if (dst_r != TMP_REG1)
             return SLJIT_SUCCESS;
-        return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw);
+        return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2);
     }


     ins = (op == SLJIT_AND ? ANDI : (op == SLJIT_OR ? ORRI : EORI));
+
     if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) {
         /* Does not change the other bits. */
         FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
         FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst) | 1));
-        if (flags & SLJIT_SET_E) {
+        if (flags & SLJIT_SET_Z) {
             /* The condition must always be set, even if the ORRI/EORI is not executed above. */
             if (reg_map[dst] <= 7)
                 return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst));
@@ -2022,10 +1853,8 @@
         return SLJIT_SUCCESS;
     }


-    compiler->cache_arg = 0;
-    compiler->cache_argw = 0;
     if (src & SLJIT_MEM) {
-        FAIL_IF(emit_op_mem2(compiler, WORD_SIZE, TMP_REG2, src, srcw, dst, dstw));
+        FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src, srcw, TMP_REG2));
         src = TMP_REG2;
         srcw = 0;
     } else if (src & SLJIT_IMM) {
@@ -2044,10 +1873,10 @@
         FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 1));
     }


-    if (dst_r == TMP_REG2)
-        FAIL_IF(emit_op_mem2(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, 0, 0));
+    if (dst_r == TMP_REG1)
+        FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2));


-    if (flags & SLJIT_SET_E) {
+    if (flags & SLJIT_SET_Z) {
         /* The condition must always be set, even if the ORR/EORI is not executed above. */
         if (reg_map[dst_r] <= 7)
             return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst_r));
@@ -2062,7 +1891,6 @@
     sljit_s32 dst_r;


     CHECK_ERROR_PTR();
-    CHECK_DYN_CODE_MOD(1);
     CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
     ADJUST_LOCAL_OFFSET(dst, dstw);


@@ -2074,20 +1902,22 @@
     PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, init_value));


     if (dst & SLJIT_MEM)
-        PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw));
+        PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
     return const_;
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
 {
     sljit_u16 *inst = (sljit_u16*)addr;
-    modify_imm32_const(inst, new_addr);
+    modify_imm32_const(inst, new_target);
+    inst = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
     SLJIT_CACHE_FLUSH(inst, inst + 4);
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
 {
     sljit_u16 *inst = (sljit_u16*)addr;
     modify_imm32_const(inst, new_constant);
+    inst = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
     SLJIT_CACHE_FLUSH(inst, inst + 4);
 }


Modified: code/trunk/sljit/sljitNativeMIPS_32.c
===================================================================
--- code/trunk/sljit/sljitNativeMIPS_32.c    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/sljit/sljitNativeMIPS_32.c    2017-01-23 07:41:37 UTC (rev 1676)
@@ -1,7 +1,7 @@
 /*
  *    Stack-less Just-In-Time compiler
  *
- *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *    Copyright Zoltan Herczeg (hzmester@???). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are
  * permitted provided that the following conditions are met:
@@ -40,29 +40,29 @@


 #define EMIT_LOGICAL(op_imm, op_norm) \
     if (flags & SRC2_IMM) { \
-        if (op & SLJIT_SET_E) \
+        if (op & SLJIT_SET_Z) \
             FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \
-        if (CHECK_FLAGS(SLJIT_SET_E)) \
+        if (!(flags & UNUSED_DEST)) \
             FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \
     } \
     else { \
-        if (op & SLJIT_SET_E) \
+        if (op & SLJIT_SET_Z) \
             FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
-        if (CHECK_FLAGS(SLJIT_SET_E)) \
+        if (!(flags & UNUSED_DEST)) \
             FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \
     }


 #define EMIT_SHIFT(op_imm, op_v) \
     if (flags & SRC2_IMM) { \
-        if (op & SLJIT_SET_E) \
+        if (op & SLJIT_SET_Z) \
             FAIL_IF(push_inst(compiler, op_imm | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \
-        if (CHECK_FLAGS(SLJIT_SET_E)) \
+        if (!(flags & UNUSED_DEST)) \
             FAIL_IF(push_inst(compiler, op_imm | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \
     } \
     else { \
-        if (op & SLJIT_SET_E) \
+        if (op & SLJIT_SET_Z) \
             FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
-        if (CHECK_FLAGS(SLJIT_SET_E)) \
+        if (!(flags & UNUSED_DEST)) \
             FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | D(dst), DR(dst))); \
     }


@@ -69,6 +69,8 @@
 static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
     sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
 {
+    sljit_s32 is_overflow, is_carry, is_handled;
+
     switch (GET_OPCODE(op)) {
     case SLJIT_MOV:
     case SLJIT_MOV_U32:
@@ -93,8 +95,9 @@
             }
             return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
         }
-        else if (dst != src2)
-            SLJIT_ASSERT_STOP();
+        else {
+            SLJIT_ASSERT(dst == src2);
+        }
         return SLJIT_SUCCESS;


     case SLJIT_MOV_U16:
@@ -111,15 +114,16 @@
             }
             return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
         }
-        else if (dst != src2)
-            SLJIT_ASSERT_STOP();
+        else {
+            SLJIT_ASSERT(dst == src2);
+        }
         return SLJIT_SUCCESS;


     case SLJIT_NOT:
         SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
-        if (op & SLJIT_SET_E)
+        if (op & SLJIT_SET_Z)
             FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
-        if (CHECK_FLAGS(SLJIT_SET_E))
+        if (!(flags & UNUSED_DEST))
             FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst)));
         return SLJIT_SUCCESS;


@@ -126,9 +130,9 @@
     case SLJIT_CLZ:
         SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
 #if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
-        if (op & SLJIT_SET_E)
+        if (op & SLJIT_SET_Z)
             FAIL_IF(push_inst(compiler, CLZ | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
-        if (CHECK_FLAGS(SLJIT_SET_E))
+        if (!(flags & UNUSED_DEST))
             FAIL_IF(push_inst(compiler, CLZ | S(src2) | T(dst) | D(dst), DR(dst)));
 #else
         if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) {
@@ -145,130 +149,194 @@
         FAIL_IF(push_inst(compiler, ADDIU | S(dst) | T(dst) | IMM(1), DR(dst)));
         FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
         FAIL_IF(push_inst(compiler, SLL | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS));
-        if (op & SLJIT_SET_E)
+        if (op & SLJIT_SET_Z)
             return push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG);
 #endif
         return SLJIT_SUCCESS;


     case SLJIT_ADD:
+        is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW || GET_FLAG_TYPE(op) == SLJIT_NOT_OVERFLOW;
+        is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
         if (flags & SRC2_IMM) {
-            if (op & SLJIT_SET_O) {
+            if (is_overflow) {
                 if (src2 >= 0)
-                    FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                    FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
                 else
-                    FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                    FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
             }
-            if (op & SLJIT_SET_E)
+            else if (op & SLJIT_SET_Z)
                 FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
-            if (op & (SLJIT_SET_C | SLJIT_SET_O)) {
+
+            if (is_overflow || is_carry) {
                 if (src2 >= 0)
-                    FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
+                    FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
                 else {
-                    FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
-                    FAIL_IF(push_inst(compiler, OR | S(src1) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
+                    FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
+                    FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
                 }
             }
             /* dst may be the same as src1 or src2. */
-            if (CHECK_FLAGS(SLJIT_SET_E))
+            if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
                 FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst)));
         }
         else {
-            if (op & SLJIT_SET_O)
-                FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
-            if (op & SLJIT_SET_E)
+            if (is_overflow)
+                FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+            else if (op & SLJIT_SET_Z)
                 FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
-            if (op & (SLJIT_SET_C | SLJIT_SET_O))
-                FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
+
+            if (is_overflow || is_carry)
+                FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
             /* dst may be the same as src1 or src2. */
-            if (CHECK_FLAGS(SLJIT_SET_E))
+            if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
                 FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst)));
         }


         /* a + b >= a | b (otherwise, the carry should be set to 1). */
-        if (op & (SLJIT_SET_C | SLJIT_SET_O))
-            FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
-        if (!(op & SLJIT_SET_O))
+        if (is_overflow || is_carry)
+            FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
+        if (!is_overflow)
             return SLJIT_SUCCESS;
-        FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
-        FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
-        FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
-        return push_inst(compiler, SLL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
+        FAIL_IF(push_inst(compiler, SLL | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
+        FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
+        FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
+        if (op & SLJIT_SET_Z)
+            FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG));
+        return push_inst(compiler, SRL | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG);


     case SLJIT_ADDC:
+        is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
         if (flags & SRC2_IMM) {
-            if (op & SLJIT_SET_C) {
+            if (is_carry) {
                 if (src2 >= 0)
-                    FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
+                    FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
                 else {
-                    FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
-                    FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                    FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
+                    FAIL_IF(push_inst(compiler, OR | S(src1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
                 }
             }
             FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst)));
         } else {
-            if (op & SLJIT_SET_C)
-                FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+            if (is_carry)
+                FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
             /* dst may be the same as src1 or src2. */
             FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst)));
         }
-        if (op & SLJIT_SET_C)
-            FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+        if (is_carry)
+            FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));


-        FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
-        if (!(op & SLJIT_SET_C))
+        FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
+        if (!is_carry)
             return SLJIT_SUCCESS;


-        /* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */
-        FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
+        /* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */
+        FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
         /* Set carry flag. */
-        return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(OVERFLOW_FLAG) | DA(ULESS_FLAG), ULESS_FLAG);
+        return push_inst(compiler, OR | SA(OTHER_FLAG) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);


     case SLJIT_SUB:
-        if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) {
+        if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
             FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
             src2 = TMP_REG2;
             flags &= ~SRC2_IMM;
         }


+        is_handled = 0;
+
         if (flags & SRC2_IMM) {
-            if (op & SLJIT_SET_O) {
+            if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) {
+                FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
+                is_handled = 1;
+            }
+            else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) {
+                FAIL_IF(push_inst(compiler, SLTI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
+                is_handled = 1;
+            }
+        }
+
+        if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) {
+            is_handled = 1;
+
+            if (flags & SRC2_IMM) {
+                FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
+                src2 = TMP_REG2;
+                flags &= ~SRC2_IMM;
+            }
+
+            if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) {
+                FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
+            }
+            else if (GET_FLAG_TYPE(op) == SLJIT_GREATER || GET_FLAG_TYPE(op) == SLJIT_LESS_EQUAL)
+            {
+                FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG));
+            }
+            else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) {
+                FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
+            }
+            else if (GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER || GET_FLAG_TYPE(op) == SLJIT_SIG_LESS_EQUAL)
+            {
+                FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG));
+            }
+        }
+
+        if (is_handled) {
+            if (flags & SRC2_IMM) {
+                if (op & SLJIT_SET_Z)
+                    FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
+                if (!(flags & UNUSED_DEST))
+                    return push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst));
+            }
+            else {
+                if (op & SLJIT_SET_Z)
+                    FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+                if (!(flags & UNUSED_DEST))
+                    return push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst));
+            }
+            return SLJIT_SUCCESS;
+        }
+
+        is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW || GET_FLAG_TYPE(op) == SLJIT_NOT_OVERFLOW;
+        is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
+        if (flags & SRC2_IMM) {
+            if (is_overflow) {
                 if (src2 >= 0)
-                    FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                    FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
                 else
-                    FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                    FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
             }
-            if (op & SLJIT_SET_E)
+            else if (op & SLJIT_SET_Z)
                 FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
-            if (op & (SLJIT_SET_C | SLJIT_SET_O))
-                FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
+
+            if (is_overflow || is_carry)
+                FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
             /* dst may be the same as src1 or src2. */
-            if (CHECK_FLAGS(SLJIT_SET_E))
+            if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
                 FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst)));
         }
         else {
-            if (op & SLJIT_SET_O)
-                FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
-            if (op & SLJIT_SET_E)
+            if (is_overflow)
+                FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+            else if (op & SLJIT_SET_Z)
                 FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
-            if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O))
-                FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
-            if (op & SLJIT_SET_U)
-                FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(UGREATER_FLAG), UGREATER_FLAG));
-            if (op & SLJIT_SET_S) {
-                FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(LESS_FLAG), LESS_FLAG));
-                FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(GREATER_FLAG), GREATER_FLAG));
-            }
+
+            if (is_overflow || is_carry)
+                FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
             /* dst may be the same as src1 or src2. */
-            if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))
+            if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
                 FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst)));
         }


-        if (!(op & SLJIT_SET_O))
+        if (!is_overflow)
             return SLJIT_SUCCESS;
-        FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
-        FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
-        FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
-        return push_inst(compiler, SRL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
+        FAIL_IF(push_inst(compiler, SLL | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
+        FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
+        FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
+        if (op & SLJIT_SET_Z)
+            FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG));
+        return push_inst(compiler, SRL | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG);


     case SLJIT_SUBC:
         if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
@@ -277,28 +345,31 @@
             flags &= ~SRC2_IMM;
         }


+        is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
         if (flags & SRC2_IMM) {
-            if (op & SLJIT_SET_C)
-                FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
+            if (is_carry)
+                FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
             /* dst may be the same as src1 or src2. */
             FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst)));
         }
         else {
-            if (op & SLJIT_SET_C)
-                FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+            if (is_carry)
+                FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
             /* dst may be the same as src1 or src2. */
             FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst)));
         }


-        if (op & SLJIT_SET_C)
-            FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(LESS_FLAG), LESS_FLAG));
+        if (is_carry)
+            FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1)));


-        FAIL_IF(push_inst(compiler, SUBU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
-        return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(OVERFLOW_FLAG) | TA(LESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS;
+        FAIL_IF(push_inst(compiler, SUBU | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
+        return (is_carry) ? push_inst(compiler, OR | SA(EQUAL_FLAG) | T(TMP_REG1) | DA(OTHER_FLAG), OTHER_FLAG) : SLJIT_SUCCESS;


     case SLJIT_MUL:
         SLJIT_ASSERT(!(flags & SRC2_IMM));
-        if (!(op & SLJIT_SET_O)) {
+
+        if (GET_FLAG_TYPE(op) != SLJIT_MUL_OVERFLOW && GET_FLAG_TYPE(op) != SLJIT_MUL_NOT_OVERFLOW) {
 #if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
             return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
 #else
@@ -307,10 +378,10 @@
 #endif
         }
         FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
-        FAIL_IF(push_inst(compiler, MFHI | DA(ULESS_FLAG), ULESS_FLAG));
+        FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG));
         FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
-        FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(UGREATER_FLAG) | SH_IMM(31), UGREATER_FLAG));
-        return push_inst(compiler, SUBU | SA(ULESS_FLAG) | TA(UGREATER_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG);
+        FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG));
+        return push_inst(compiler, SUBU | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);


     case SLJIT_AND:
         EMIT_LOGICAL(ANDI, AND);
@@ -337,7 +408,7 @@
         return SLJIT_SUCCESS;
     }


-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return SLJIT_SUCCESS;
 }


@@ -347,20 +418,22 @@
     return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst));
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
 {
-    sljit_ins *inst = (sljit_ins*)addr;
+    sljit_ins *inst = (sljit_ins *)addr;


-    inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 16) & 0xffff);
-    inst[1] = (inst[1] & 0xffff0000) | (new_addr & 0xffff);
+    inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 16) & 0xffff);
+    inst[1] = (inst[1] & 0xffff0000) | (new_target & 0xffff);
+    inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
     SLJIT_CACHE_FLUSH(inst, inst + 2);
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
 {
-    sljit_ins *inst = (sljit_ins*)addr;
+    sljit_ins *inst = (sljit_ins *)addr;


     inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
     inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff);
+    inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
     SLJIT_CACHE_FLUSH(inst, inst + 2);
 }


Modified: code/trunk/sljit/sljitNativeMIPS_64.c
===================================================================
--- code/trunk/sljit/sljitNativeMIPS_64.c    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/sljit/sljitNativeMIPS_64.c    2017-01-23 07:41:37 UTC (rev 1676)
@@ -1,7 +1,7 @@
 /*
  *    Stack-less Just-In-Time compiler
  *
- *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *    Copyright Zoltan Herczeg (hzmester@???). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are
  * permitted provided that the following conditions are met:
@@ -123,15 +123,15 @@


 #define EMIT_LOGICAL(op_imm, op_norm) \
     if (flags & SRC2_IMM) { \
-        if (op & SLJIT_SET_E) \
+        if (op & SLJIT_SET_Z) \
             FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \
-        if (CHECK_FLAGS(SLJIT_SET_E)) \
+        if (!(flags & UNUSED_DEST)) \
             FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \
     } \
     else { \
-        if (op & SLJIT_SET_E) \
+        if (op & SLJIT_SET_Z) \
             FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
-        if (CHECK_FLAGS(SLJIT_SET_E)) \
+        if (!(flags & UNUSED_DEST)) \
             FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \
     }


@@ -144,16 +144,16 @@
         } \
         else \
             ins = (op & SLJIT_I32_OP) ? op_imm : op_dimm; \
-        if (op & SLJIT_SET_E) \
+        if (op & SLJIT_SET_Z) \
             FAIL_IF(push_inst(compiler, ins | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \
-        if (CHECK_FLAGS(SLJIT_SET_E)) \
+        if (!(flags & UNUSED_DEST)) \
             FAIL_IF(push_inst(compiler, ins | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \
     } \
     else { \
         ins = (op & SLJIT_I32_OP) ? op_v : op_dv; \
-        if (op & SLJIT_SET_E) \
+        if (op & SLJIT_SET_Z) \
             FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
-        if (CHECK_FLAGS(SLJIT_SET_E)) \
+        if (!(flags & UNUSED_DEST)) \
             FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | D(dst), DR(dst))); \
     }


@@ -161,6 +161,7 @@
     sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
 {
     sljit_ins ins;
+    sljit_s32 is_overflow, is_carry, is_handled;


     switch (GET_OPCODE(op)) {
     case SLJIT_MOV:
@@ -180,8 +181,9 @@
             }
             return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
         }
-        else if (dst != src2)
-            SLJIT_ASSERT_STOP();
+        else {
+            SLJIT_ASSERT(dst == src2);
+        }
         return SLJIT_SUCCESS;


     case SLJIT_MOV_U16:
@@ -194,8 +196,9 @@
             }
             return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
         }
-        else if (dst != src2)
-            SLJIT_ASSERT_STOP();
+        else {
+            SLJIT_ASSERT(dst == src2);
+        }
         return SLJIT_SUCCESS;


     case SLJIT_MOV_U32:
@@ -209,9 +212,9 @@


     case SLJIT_NOT:
         SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
-        if (op & SLJIT_SET_E)
+        if (op & SLJIT_SET_Z)
             FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
-        if (CHECK_FLAGS(SLJIT_SET_E))
+        if (!(flags & UNUSED_DEST))
             FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst)));
         return SLJIT_SUCCESS;


@@ -218,9 +221,9 @@
     case SLJIT_CLZ:
         SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
 #if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
-        if (op & SLJIT_SET_E)
+        if (op & SLJIT_SET_Z)
             FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
-        if (CHECK_FLAGS(SLJIT_SET_E))
+        if (!(flags & UNUSED_DEST))
             FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | T(dst) | D(dst), DR(dst)));
 #else
         if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) {
@@ -237,130 +240,194 @@
         FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(dst) | T(dst) | IMM(1), DR(dst)));
         FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
         FAIL_IF(push_inst(compiler, SELECT_OP(DSLL, SLL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS));
-        if (op & SLJIT_SET_E)
+        if (op & SLJIT_SET_Z)
             return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG);
 #endif
         return SLJIT_SUCCESS;


     case SLJIT_ADD:
+        is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW || GET_FLAG_TYPE(op) == SLJIT_NOT_OVERFLOW;
+        is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
         if (flags & SRC2_IMM) {
-            if (op & SLJIT_SET_O) {
+            if (is_overflow) {
                 if (src2 >= 0)
-                    FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                    FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
                 else
-                    FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                    FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
             }
-            if (op & SLJIT_SET_E)
+            else if (op & SLJIT_SET_Z)
                 FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
-            if (op & (SLJIT_SET_C | SLJIT_SET_O)) {
+
+            if (is_overflow || is_carry) {
                 if (src2 >= 0)
-                    FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
+                    FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
                 else {
-                    FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
-                    FAIL_IF(push_inst(compiler, OR | S(src1) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
+                    FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
+                    FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
                 }
             }
             /* dst may be the same as src1 or src2. */
-            if (CHECK_FLAGS(SLJIT_SET_E))
+            if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
                 FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst)));
         }
         else {
-            if (op & SLJIT_SET_O)
-                FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
-            if (op & SLJIT_SET_E)
+            if (is_overflow)
+                FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+            else if (op & SLJIT_SET_Z)
                 FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
-            if (op & (SLJIT_SET_C | SLJIT_SET_O))
-                FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
+
+            if (is_overflow || is_carry)
+                FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
             /* dst may be the same as src1 or src2. */
-            if (CHECK_FLAGS(SLJIT_SET_E))
+            if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
                 FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst)));
         }


         /* a + b >= a | b (otherwise, the carry should be set to 1). */
-        if (op & (SLJIT_SET_C | SLJIT_SET_O))
-            FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
-        if (!(op & SLJIT_SET_O))
+        if (is_overflow || is_carry)
+            FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
+        if (!is_overflow)
             return SLJIT_SUCCESS;
-        FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
-        FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
-        FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
-        return push_inst(compiler, SELECT_OP(DSRL32, SLL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
+        FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
+        FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
+        FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
+        if (op & SLJIT_SET_Z)
+            FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG));
+        return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG);


     case SLJIT_ADDC:
+        is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
         if (flags & SRC2_IMM) {
-            if (op & SLJIT_SET_C) {
+            if (is_carry) {
                 if (src2 >= 0)
-                    FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
+                    FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
                 else {
-                    FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
-                    FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                    FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
+                    FAIL_IF(push_inst(compiler, OR | S(src1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
                 }
             }
             FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst)));
         } else {
-            if (op & SLJIT_SET_C)
-                FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+            if (is_carry)
+                FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
             /* dst may be the same as src1 or src2. */
             FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst)));
         }
-        if (op & SLJIT_SET_C)
-            FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+        if (is_carry)
+            FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));


-        FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
-        if (!(op & SLJIT_SET_C))
+        FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
+        if (!is_carry)
             return SLJIT_SUCCESS;


-        /* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */
-        FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
+        /* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */
+        FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
         /* Set carry flag. */
-        return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(OVERFLOW_FLAG) | DA(ULESS_FLAG), ULESS_FLAG);
+        return push_inst(compiler, OR | SA(OTHER_FLAG) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);


     case SLJIT_SUB:
-        if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) {
+        if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
             FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
             src2 = TMP_REG2;
             flags &= ~SRC2_IMM;
         }


+        is_handled = 0;
+
         if (flags & SRC2_IMM) {
-            if (op & SLJIT_SET_O) {
+            if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) {
+                FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
+                is_handled = 1;
+            }
+            else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) {
+                FAIL_IF(push_inst(compiler, SLTI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
+                is_handled = 1;
+            }
+        }
+
+        if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) {
+            is_handled = 1;
+
+            if (flags & SRC2_IMM) {
+                FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
+                src2 = TMP_REG2;
+                flags &= ~SRC2_IMM;
+            }
+
+            if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) {
+                FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
+            }
+            else if (GET_FLAG_TYPE(op) == SLJIT_GREATER || GET_FLAG_TYPE(op) == SLJIT_LESS_EQUAL)
+            {
+                FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG));
+            }
+            else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) {
+                FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
+            }
+            else if (GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER || GET_FLAG_TYPE(op) == SLJIT_SIG_LESS_EQUAL)
+            {
+                FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG));
+            }
+        }
+
+        if (is_handled) {
+            if (flags & SRC2_IMM) {
+                if (op & SLJIT_SET_Z)
+                    FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
+                if (!(flags & UNUSED_DEST))
+                    return push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst));
+            }
+            else {
+                if (op & SLJIT_SET_Z)
+                    FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+                if (!(flags & UNUSED_DEST))
+                    return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst));
+            }
+            return SLJIT_SUCCESS;
+        }
+
+        is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW || GET_FLAG_TYPE(op) == SLJIT_NOT_OVERFLOW;
+        is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
+        if (flags & SRC2_IMM) {
+            if (is_overflow) {
                 if (src2 >= 0)
-                    FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                    FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
                 else
-                    FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+                    FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
             }
-            if (op & SLJIT_SET_E)
+            else if (op & SLJIT_SET_Z)
                 FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
-            if (op & (SLJIT_SET_C | SLJIT_SET_O))
-                FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
+
+            if (is_overflow || is_carry)
+                FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
             /* dst may be the same as src1 or src2. */
-            if (CHECK_FLAGS(SLJIT_SET_E))
+            if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
                 FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)));
         }
         else {
-            if (op & SLJIT_SET_O)
-                FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
-            if (op & SLJIT_SET_E)
+            if (is_overflow)
+                FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+            else if (op & SLJIT_SET_Z)
                 FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
-            if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O))
-                FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
-            if (op & SLJIT_SET_U)
-                FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(UGREATER_FLAG), UGREATER_FLAG));
-            if (op & SLJIT_SET_S) {
-                FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(LESS_FLAG), LESS_FLAG));
-                FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(GREATER_FLAG), GREATER_FLAG));
-            }
+
+            if (is_overflow || is_carry)
+                FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
             /* dst may be the same as src1 or src2. */
-            if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))
+            if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
                 FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)));
         }


-        if (!(op & SLJIT_SET_O))
+        if (!is_overflow)
             return SLJIT_SUCCESS;
-        FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
-        FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
-        FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
-        return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
+        FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
+        FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
+        FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
+        if (op & SLJIT_SET_Z)
+            FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG));
+        return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG);


     case SLJIT_SUBC:
         if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
@@ -369,28 +436,31 @@
             flags &= ~SRC2_IMM;
         }


+        is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
         if (flags & SRC2_IMM) {
-            if (op & SLJIT_SET_C)
-                FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
+            if (is_carry)
+                FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
             /* dst may be the same as src1 or src2. */
             FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)));
         }
         else {
-            if (op & SLJIT_SET_C)
-                FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+            if (is_carry)
+                FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
             /* dst may be the same as src1 or src2. */
             FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)));
         }


-        if (op & SLJIT_SET_C)
-            FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(LESS_FLAG), LESS_FLAG));
+        if (is_carry)
+            FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1)));


-        FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
-        return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(OVERFLOW_FLAG) | TA(LESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS;
+        FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
+        return (is_carry) ? push_inst(compiler, OR | SA(EQUAL_FLAG) | T(TMP_REG1) | DA(OTHER_FLAG), OTHER_FLAG) : SLJIT_SUCCESS;


     case SLJIT_MUL:
         SLJIT_ASSERT(!(flags & SRC2_IMM));
-        if (!(op & SLJIT_SET_O)) {
+
+        if (GET_FLAG_TYPE(op) != SLJIT_MUL_OVERFLOW && GET_FLAG_TYPE(op) != SLJIT_MUL_NOT_OVERFLOW) {
 #if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
             if (op & SLJIT_I32_OP)
                 return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
@@ -402,10 +472,10 @@
 #endif
         }
         FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS));
-        FAIL_IF(push_inst(compiler, MFHI | DA(ULESS_FLAG), ULESS_FLAG));
+        FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG));
         FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
-        FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(UGREATER_FLAG) | SH_IMM(31), UGREATER_FLAG));
-        return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(ULESS_FLAG) | TA(UGREATER_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG);
+        FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG));
+        return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);


     case SLJIT_AND:
         EMIT_LOGICAL(ANDI, AND);
@@ -432,7 +502,7 @@
         return SLJIT_SUCCESS;
     }


-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return SLJIT_SUCCESS;
 }


@@ -446,24 +516,26 @@
     return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst));
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
 {
-    sljit_ins *inst = (sljit_ins*)addr;
+    sljit_ins *inst = (sljit_ins *)addr;


-    inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 48) & 0xffff);
-    inst[1] = (inst[1] & 0xffff0000) | ((new_addr >> 32) & 0xffff);
-    inst[3] = (inst[3] & 0xffff0000) | ((new_addr >> 16) & 0xffff);
-    inst[5] = (inst[5] & 0xffff0000) | (new_addr & 0xffff);
+    inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 48) & 0xffff);
+    inst[1] = (inst[1] & 0xffff0000) | ((new_target >> 32) & 0xffff);
+    inst[3] = (inst[3] & 0xffff0000) | ((new_target >> 16) & 0xffff);
+    inst[5] = (inst[5] & 0xffff0000) | (new_target & 0xffff);
+    inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
     SLJIT_CACHE_FLUSH(inst, inst + 6);
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
 {
-    sljit_ins *inst = (sljit_ins*)addr;
+    sljit_ins *inst = (sljit_ins *)addr;


     inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 48) & 0xffff);
     inst[1] = (inst[1] & 0xffff0000) | ((new_constant >> 32) & 0xffff);
     inst[3] = (inst[3] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
     inst[5] = (inst[5] & 0xffff0000) | (new_constant & 0xffff);
+    inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
     SLJIT_CACHE_FLUSH(inst, inst + 6);
 }


Modified: code/trunk/sljit/sljitNativeMIPS_common.c
===================================================================
--- code/trunk/sljit/sljitNativeMIPS_common.c    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/sljit/sljitNativeMIPS_common.c    2017-01-23 07:41:37 UTC (rev 1676)
@@ -1,7 +1,7 @@
 /*
  *    Stack-less Just-In-Time compiler
  *
- *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *    Copyright Zoltan Herczeg (hzmester@???). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are
  * permitted provided that the following conditions are met:
@@ -57,19 +57,14 @@
 #define RETURN_ADDR_REG    31


 /* Flags are kept in volatile registers. */
-#define EQUAL_FLAG    12
-/* And carry flag as well. */
-#define ULESS_FLAG    13
-#define UGREATER_FLAG    14
-#define LESS_FLAG    15
-#define GREATER_FLAG    31
-#define OVERFLOW_FLAG    1
+#define EQUAL_FLAG    31
+#define OTHER_FLAG    1


 #define TMP_FREG1    (0)
 #define TMP_FREG2    ((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) << 1)


 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
-    0, 2, 5, 6, 7, 8, 9, 10, 11, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 3, 25, 4
+    0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 3, 25, 4
 };


 /* --------------------------------------------------------------------- */
@@ -218,7 +213,7 @@
     return (flags & IS_BIT26_COND) ? (1 << 26) : (1 << 16);
 }


-static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
+static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
 {
     sljit_sw diff;
     sljit_uw target_addr;
@@ -237,9 +232,10 @@
         target_addr = jump->u.target;
     else {
         SLJIT_ASSERT(jump->flags & JUMP_LABEL);
-        target_addr = (sljit_uw)(code + jump->u.label->size);
+        target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
     }
-    inst = (sljit_ins*)jump->addr;
+
+    inst = (sljit_ins *)jump->addr;
     if (jump->flags & IS_COND)
         inst--;


@@ -250,7 +246,7 @@

     /* B instructions. */
     if (jump->flags & IS_MOVABLE) {
-        diff = ((sljit_sw)target_addr - (sljit_sw)(inst)) >> 2;
+        diff = ((sljit_sw)target_addr - (sljit_sw)inst - executable_offset) >> 2;
         if (diff <= SIMM_MAX && diff >= SIMM_MIN) {
             jump->flags |= PATCH_B;


@@ -268,7 +264,7 @@
         }
     }
     else {
-        diff = ((sljit_sw)target_addr - (sljit_sw)(inst + 1)) >> 2;
+        diff = ((sljit_sw)target_addr - (sljit_sw)(inst + 1) - executable_offset) >> 2;
         if (diff <= SIMM_MAX && diff >= SIMM_MIN) {
             jump->flags |= PATCH_B;


@@ -364,6 +360,7 @@
     sljit_ins *buf_ptr;
     sljit_ins *buf_end;
     sljit_uw word_count;
+    sljit_sw executable_offset;
     sljit_uw addr;


     struct sljit_label *label;
@@ -380,9 +377,12 @@


     code_ptr = code;
     word_count = 0;
+    executable_offset = SLJIT_EXEC_OFFSET(code);
+
     label = compiler->labels;
     jump = compiler->jumps;
     const_ = compiler->consts;
+
     do {
         buf_ptr = (sljit_ins*)buf->memory;
         buf_end = buf_ptr + (buf->used_size >> 2);
@@ -393,8 +393,7 @@
             SLJIT_ASSERT(!const_ || const_->addr >= word_count);
             /* These structures are ordered by their address. */
             if (label && label->size == word_count) {
-                /* Just recording the address. */
-                label->addr = (sljit_uw)code_ptr;
+                label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
                 label->size = code_ptr - code;
                 label = label->next;
             }
@@ -404,7 +403,7 @@
 #else
                 jump->addr = (sljit_uw)(code_ptr - 7);
 #endif
-                code_ptr = detect_jump_type(jump, code_ptr, code);
+                code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
                 jump = jump->next;
             }
             if (const_ && const_->addr == word_count) {
@@ -434,16 +433,16 @@
     while (jump) {
         do {
             addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
-            buf_ptr = (sljit_ins*)jump->addr;
+            buf_ptr = (sljit_ins *)jump->addr;


             if (jump->flags & PATCH_B) {
-                addr = (sljit_sw)(addr - (jump->addr + sizeof(sljit_ins))) >> 2;
+                addr = (sljit_sw)(addr - ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) + sizeof(sljit_ins))) >> 2;
                 SLJIT_ASSERT((sljit_sw)addr <= SIMM_MAX && (sljit_sw)addr >= SIMM_MIN);
                 buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | (addr & 0xffff);
                 break;
             }
             if (jump->flags & PATCH_J) {
-                SLJIT_ASSERT((addr & ~0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~0xfffffff));
+                SLJIT_ASSERT((addr & ~0xfffffff) == (((sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) + sizeof(sljit_ins)) & ~0xfffffff));
                 buf_ptr[0] |= (addr >> 2) & 0x03ffffff;
                 break;
             }
@@ -476,8 +475,12 @@
     }


     compiler->error = SLJIT_ERR_COMPILED;
+    compiler->executable_offset = executable_offset;
     compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
-    SLJIT_ENABLE_EXEC(code, code_ptr);
+
+    code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
+    code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+
 #ifndef __GNUC__
     SLJIT_CACHE_FLUSH(code, code_ptr);
 #else
@@ -521,10 +524,6 @@
 #define SLOW_SRC2    0x20000
 #define SLOW_DEST    0x40000


-/* Only these flags are set. UNUSED_DEST is not set when no flags should be set. */
-#define CHECK_FLAGS(list) \
-    (!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list))))
-
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
 #define STACK_STORE    SW
 #define STACK_LOAD    LW
@@ -760,35 +759,29 @@
     base = arg & REG_MASK;


     if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
-        argw &= 0x3;
-        if ((flags & WRITE_BACK) && reg_ar == DR(base)) {
-            SLJIT_ASSERT(!(flags & LOAD_DATA) && DR(TMP_REG1) != reg_ar);
-            FAIL_IF(push_inst(compiler, ADDU_W | SA(reg_ar) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
-            reg_ar = DR(TMP_REG1);
+        if (SLJIT_UNLIKELY(flags & WRITE_BACK)) {
+            SLJIT_ASSERT(argw == 0);
+            FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(OFFS_REG(arg)) | D(base), DR(base)));
+            return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot);
         }


+        argw &= 0x3;
+
         /* Using the cache. */
         if (argw == compiler->cache_argw) {
-            if (!(flags & WRITE_BACK)) {
-                if (arg == compiler->cache_arg)
+            if (arg == compiler->cache_arg)
+                return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot);
+
+            if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
+                if (arg == next_arg && argw == (next_argw & 0x3)) {
+                    compiler->cache_arg = arg;
+                    compiler->cache_argw = argw;
+                    FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(TMP_REG3), DR(TMP_REG3)));
                     return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot);
-                if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
-                    if (arg == next_arg && argw == (next_argw & 0x3)) {
-                        compiler->cache_arg = arg;
-                        compiler->cache_argw = argw;
-                        FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(TMP_REG3), DR(TMP_REG3)));
-                        return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot);
-                    }
-                    FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | DA(tmp_ar), tmp_ar));
-                    return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
                 }
+                FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | DA(tmp_ar), tmp_ar));
+                return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
             }
-            else {
-                if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
-                    FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base)));
-                    return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot);
-                }
-            }
         }


         if (SLJIT_UNLIKELY(argw)) {
@@ -797,35 +790,18 @@
             FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(arg)) | D(TMP_REG3) | SH_IMM(argw), DR(TMP_REG3)));
         }


-        if (!(flags & WRITE_BACK)) {
-            if (arg == next_arg && argw == (next_argw & 0x3)) {
-                compiler->cache_arg = arg;
-                compiler->cache_argw = argw;
-                FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | D(TMP_REG3), DR(TMP_REG3)));
-                tmp_ar = DR(TMP_REG3);
-            }
-            else
-                FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | DA(tmp_ar), tmp_ar));
-            return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
+        if (arg == next_arg && argw == (next_argw & 0x3)) {
+            compiler->cache_arg = arg;
+            compiler->cache_argw = argw;
+            FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | D(TMP_REG3), DR(TMP_REG3)));
+            tmp_ar = DR(TMP_REG3);
         }
-        FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | D(base), DR(base)));
-        return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot);
+        else
+            FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | DA(tmp_ar), tmp_ar));
+        return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
     }


     if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) {
-        /* Update only applies if a base register exists. */
-        if (reg_ar == DR(base)) {
-            SLJIT_ASSERT(!(flags & LOAD_DATA) && DR(TMP_REG1) != reg_ar);
-            if (argw <= SIMM_MAX && argw >= SIMM_MIN) {
-                FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar) | IMM(argw), MOVABLE_INS));
-                if (argw)
-                    return push_inst(compiler, ADDIU_W | S(base) | T(base) | IMM(argw), DR(base));
-                return SLJIT_SUCCESS;
-            }
-            FAIL_IF(push_inst(compiler, ADDU_W | SA(reg_ar) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
-            reg_ar = DR(TMP_REG1);
-        }
-
         if (argw <= SIMM_MAX && argw >= SIMM_MIN) {
             if (argw)
                 FAIL_IF(push_inst(compiler, ADDIU_W | S(base) | T(base) | IMM(argw), DR(base)));
@@ -917,7 +893,7 @@
     if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
         if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM))
             return SLJIT_SUCCESS;
-        if (GET_FLAGS(op))
+        if (HAS_FLAGS(op))
             flags |= UNUSED_DEST;
     }
     else if (FAST_IS_REG(dst)) {
@@ -1365,6 +1341,8 @@
     sljit_s32 src1, sljit_sw src1w,
     sljit_s32 src2, sljit_sw src2w)
 {
+    sljit_ins inst;
+
     if (src1 & SLJIT_MEM) {
         FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
         src1 = TMP_FREG1;
@@ -1379,25 +1357,26 @@
     else
         src2 <<= 1;


-    /* src2 and src1 are swapped. */
-    if (op & SLJIT_SET_E) {
-        FAIL_IF(push_inst(compiler, C_UEQ_S | FMT(op) | FT(src2) | FS(src1), UNMOVABLE_INS));
-        FAIL_IF(push_inst(compiler, CFC1 | TA(EQUAL_FLAG) | DA(FCSR_REG), EQUAL_FLAG));
-        FAIL_IF(push_inst(compiler, SRL | TA(EQUAL_FLAG) | DA(EQUAL_FLAG) | SH_IMM(23), EQUAL_FLAG));
-        FAIL_IF(push_inst(compiler, ANDI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG));
+    switch (GET_FLAG_TYPE(op)) {
+    case SLJIT_EQUAL_F64:
+    case SLJIT_NOT_EQUAL_F64:
+        inst = C_UEQ_S;
+        break;
+    case SLJIT_LESS_F64:
+    case SLJIT_GREATER_EQUAL_F64:
+        inst = C_ULT_S;
+        break;
+    case SLJIT_GREATER_F64:
+    case SLJIT_LESS_EQUAL_F64:
+        inst = C_ULE_S;
+        break;
+    default:
+        SLJIT_ASSERT(GET_FLAG_TYPE(op) == SLJIT_UNORDERED_F64 || GET_FLAG_TYPE(op) == SLJIT_ORDERED_F64);
+        inst = C_UN_S;
+        break;
     }
-    if (op & SLJIT_SET_S) {
-        /* Mixing the instructions for the two checks. */
-        FAIL_IF(push_inst(compiler, C_ULT_S | FMT(op) | FT(src2) | FS(src1), UNMOVABLE_INS));
-        FAIL_IF(push_inst(compiler, CFC1 | TA(ULESS_FLAG) | DA(FCSR_REG), ULESS_FLAG));
-        FAIL_IF(push_inst(compiler, C_ULT_S | FMT(op) | FT(src1) | FS(src2), UNMOVABLE_INS));
-        FAIL_IF(push_inst(compiler, SRL | TA(ULESS_FLAG) | DA(ULESS_FLAG) | SH_IMM(23), ULESS_FLAG));
-        FAIL_IF(push_inst(compiler, ANDI | SA(ULESS_FLAG) | TA(ULESS_FLAG) | IMM(1), ULESS_FLAG));
-        FAIL_IF(push_inst(compiler, CFC1 | TA(UGREATER_FLAG) | DA(FCSR_REG), UGREATER_FLAG));
-        FAIL_IF(push_inst(compiler, SRL | TA(UGREATER_FLAG) | DA(UGREATER_FLAG) | SH_IMM(23), UGREATER_FLAG));
-        FAIL_IF(push_inst(compiler, ANDI | SA(UGREATER_FLAG) | TA(UGREATER_FLAG) | IMM(1), UGREATER_FLAG));
-    }
-    return push_inst(compiler, C_UN_S | FMT(op) | FT(src2) | FS(src1), FCSR_FCC);
+
+    return push_inst(compiler, inst | FMT(op) | FT(src2) | FS(src1), UNMOVABLE_INS);
 }


 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
@@ -1626,7 +1605,6 @@
     sljit_s32 delay_check = UNMOVABLE_INS;


     CHECK_ERROR_PTR();
-    CHECK_DYN_CODE_MOD(type & SLJIT_REWRITABLE_JUMP);
     CHECK_PTR(check_sljit_emit_jump(compiler, type));


     jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
@@ -1636,55 +1614,39 @@


     switch (type) {
     case SLJIT_EQUAL:
-    case SLJIT_NOT_EQUAL_F64:
         BR_NZ(EQUAL_FLAG);
         break;
     case SLJIT_NOT_EQUAL:
-    case SLJIT_EQUAL_F64:
         BR_Z(EQUAL_FLAG);
         break;
     case SLJIT_LESS:
-    case SLJIT_LESS_F64:
-        BR_Z(ULESS_FLAG);
-        break;
-    case SLJIT_GREATER_EQUAL:
-    case SLJIT_GREATER_EQUAL_F64:
-        BR_NZ(ULESS_FLAG);
-        break;
     case SLJIT_GREATER:
-    case SLJIT_GREATER_F64:
-        BR_Z(UGREATER_FLAG);
-        break;
-    case SLJIT_LESS_EQUAL:
-    case SLJIT_LESS_EQUAL_F64:
-        BR_NZ(UGREATER_FLAG);
-        break;
     case SLJIT_SIG_LESS:
-        BR_Z(LESS_FLAG);
-        break;
-    case SLJIT_SIG_GREATER_EQUAL:
-        BR_NZ(LESS_FLAG);
-        break;
     case SLJIT_SIG_GREATER:
-        BR_Z(GREATER_FLAG);
-        break;
-    case SLJIT_SIG_LESS_EQUAL:
-        BR_NZ(GREATER_FLAG);
-        break;
     case SLJIT_OVERFLOW:
     case SLJIT_MUL_OVERFLOW:
-        BR_Z(OVERFLOW_FLAG);
+        BR_Z(OTHER_FLAG);
         break;
+    case SLJIT_GREATER_EQUAL:
+    case SLJIT_LESS_EQUAL:
+    case SLJIT_SIG_GREATER_EQUAL:
+    case SLJIT_SIG_LESS_EQUAL:
     case SLJIT_NOT_OVERFLOW:
     case SLJIT_MUL_NOT_OVERFLOW:
-        BR_NZ(OVERFLOW_FLAG);
+        BR_NZ(OTHER_FLAG);
         break;
+    case SLJIT_NOT_EQUAL_F64:
+    case SLJIT_GREATER_EQUAL_F64:
+    case SLJIT_GREATER_F64:
+    case SLJIT_ORDERED_F64:
+        BR_T();
+        break;
+    case SLJIT_EQUAL_F64:
+    case SLJIT_LESS_F64:
+    case SLJIT_LESS_EQUAL_F64:
     case SLJIT_UNORDERED_F64:
         BR_F();
         break;
-    case SLJIT_ORDERED_F64:
-        BR_T();
-        break;
     default:
         /* Not conditional branch. */
         inst = 0;
@@ -1744,7 +1706,6 @@
     sljit_ins inst;


     CHECK_ERROR_PTR();
-    CHECK_DYN_CODE_MOD(type & SLJIT_REWRITABLE_JUMP);
     CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w));
     ADJUST_LOCAL_OFFSET(src1, src1w);
     ADJUST_LOCAL_OFFSET(src2, src2w);
@@ -1857,87 +1818,6 @@
 #undef RESOLVE_IMM1
 #undef RESOLVE_IMM2


-SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type,
-    sljit_s32 src1, sljit_sw src1w,
-    sljit_s32 src2, sljit_sw src2w)
-{
-    struct sljit_jump *jump;
-    sljit_ins inst;
-    sljit_s32 if_true;
-
-    CHECK_ERROR_PTR();
-    CHECK_DYN_CODE_MOD(type & SLJIT_REWRITABLE_JUMP);
-    CHECK_PTR(check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w));
-
-    compiler->cache_arg = 0;
-    compiler->cache_argw = 0;
-
-    if (src1 & SLJIT_MEM) {
-        PTR_FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
-        src1 = TMP_FREG1;
-    }
-    else
-        src1 <<= 1;
-
-    if (src2 & SLJIT_MEM) {
-        PTR_FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
-        src2 = TMP_FREG2;
-    }
-    else
-        src2 <<= 1;
-
-    jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
-    PTR_FAIL_IF(!jump);
-    set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
-    jump->flags |= IS_BIT16_COND;
-
-    switch (type & 0xff) {
-    case SLJIT_EQUAL_F64:
-        inst = C_UEQ_S;
-        if_true = 1;
-        break;
-    case SLJIT_NOT_EQUAL_F64:
-        inst = C_UEQ_S;
-        if_true = 0;
-        break;
-    case SLJIT_LESS_F64:
-        inst = C_ULT_S;
-        if_true = 1;
-        break;
-    case SLJIT_GREATER_EQUAL_F64:
-        inst = C_ULT_S;
-        if_true = 0;
-        break;
-    case SLJIT_GREATER_F64:
-        inst = C_ULE_S;
-        if_true = 0;
-        break;
-    case SLJIT_LESS_EQUAL_F64:
-        inst = C_ULE_S;
-        if_true = 1;
-        break;
-    case SLJIT_UNORDERED_F64:
-        inst = C_UN_S;
-        if_true = 1;
-        break;
-    default: /* Make compilers happy. */
-        SLJIT_ASSERT_STOP();
-    case SLJIT_ORDERED_F64:
-        inst = C_UN_S;
-        if_true = 0;
-        break;
-    }
-
-    PTR_FAIL_IF(push_inst(compiler, inst | FMT(type) | FT(src2) | FS(src1), UNMOVABLE_INS));
-    /* Intentionally the other opcode. */
-    PTR_FAIL_IF(push_inst(compiler, (if_true ? BC1F : BC1T) | JUMP_LENGTH, UNMOVABLE_INS));
-    PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0));
-    PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS));
-    jump->addr = compiler->size;
-    PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-    return jump;
-}
-
 #undef JUMP_LENGTH
 #undef BR_Z
 #undef BR_NZ
@@ -2047,41 +1927,19 @@
         FAIL_IF(push_inst(compiler, SLTIU | SA(EQUAL_FLAG) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
         dst_ar = sugg_dst_ar;
         break;
-    case SLJIT_LESS:
-    case SLJIT_GREATER_EQUAL:
-    case SLJIT_LESS_F64:
-    case SLJIT_GREATER_EQUAL_F64:
-        dst_ar = ULESS_FLAG;
-        break;
-    case SLJIT_GREATER:
-    case SLJIT_LESS_EQUAL:
-    case SLJIT_GREATER_F64:
-    case SLJIT_LESS_EQUAL_F64:
-        dst_ar = UGREATER_FLAG;
-        break;
-    case SLJIT_SIG_LESS:
-    case SLJIT_SIG_GREATER_EQUAL:
-        dst_ar = LESS_FLAG;
-        break;
-    case SLJIT_SIG_GREATER:
-    case SLJIT_SIG_LESS_EQUAL:
-        dst_ar = GREATER_FLAG;
-        break;
-    case SLJIT_OVERFLOW:
-    case SLJIT_NOT_OVERFLOW:
-        dst_ar = OVERFLOW_FLAG;
-        break;
     case SLJIT_MUL_OVERFLOW:
     case SLJIT_MUL_NOT_OVERFLOW:
-        FAIL_IF(push_inst(compiler, SLTIU | SA(OVERFLOW_FLAG) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
+        FAIL_IF(push_inst(compiler, SLTIU | SA(OTHER_FLAG) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
         dst_ar = sugg_dst_ar;
         type ^= 0x1; /* Flip type bit for the XORI below. */
         break;
+    case SLJIT_GREATER_F64:
+    case SLJIT_LESS_EQUAL_F64:
+        type ^= 0x1; /* Flip type bit for the XORI below. */
     case SLJIT_EQUAL_F64:
     case SLJIT_NOT_EQUAL_F64:
-        dst_ar = EQUAL_FLAG;
-        break;
-
+    case SLJIT_LESS_F64:
+    case SLJIT_GREATER_EQUAL_F64:
     case SLJIT_UNORDERED_F64:
     case SLJIT_ORDERED_F64:
         FAIL_IF(push_inst(compiler, CFC1 | TA(sugg_dst_ar) | DA(FCSR_REG), sugg_dst_ar));
@@ -2091,8 +1949,7 @@
         break;


     default:
-        SLJIT_ASSERT_STOP();
-        dst_ar = sugg_dst_ar;
+        dst_ar = OTHER_FLAG;
         break;
     }


@@ -2125,7 +1982,6 @@
     sljit_s32 reg;


     CHECK_ERROR_PTR();
-    CHECK_DYN_CODE_MOD(1);
     CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
     ADJUST_LOCAL_OFFSET(dst, dstw);



Modified: code/trunk/sljit/sljitNativePPC_32.c
===================================================================
--- code/trunk/sljit/sljitNativePPC_32.c    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/sljit/sljitNativePPC_32.c    2017-01-23 07:41:37 UTC (rev 1676)
@@ -1,7 +1,7 @@
 /*
  *    Stack-less Just-In-Time compiler
  *
- *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *    Copyright Zoltan Herczeg (hzmester@???). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are
  * permitted provided that the following conditions are met:
@@ -119,46 +119,34 @@
         return push_inst(compiler, ADDC | OERC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2));


     case SLJIT_ADDC:
-        if (flags & ALT_FORM1) {
-            FAIL_IF(push_inst(compiler, MFXER | D(0)));
-            FAIL_IF(push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)));
-            return push_inst(compiler, MTXER | S(0));
-        }
         return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2));


     case SLJIT_SUB:
         if (flags & ALT_FORM1) {
-            /* Flags does not set: BIN_IMM_EXTS unnecessary. */
             SLJIT_ASSERT(src2 == TMP_REG2);
             return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm);
         }
         if (flags & (ALT_FORM2 | ALT_FORM3)) {
             SLJIT_ASSERT(src2 == TMP_REG2);
-            if (flags & ALT_FORM2)
-                FAIL_IF(push_inst(compiler, CMPI | CRD(0) | A(src1) | compiler->imm));
-            if (flags & ALT_FORM3)
-                return push_inst(compiler, CMPLI | CRD(4) | A(src1) | compiler->imm);
-            return SLJIT_SUCCESS;
+            return push_inst(compiler, ((flags & ALT_FORM2) ? CMPI : CMPLI) | CRD(0) | A(src1) | compiler->imm);
         }
         if (flags & (ALT_FORM4 | ALT_FORM5)) {
-            if (flags & ALT_FORM4)
-                FAIL_IF(push_inst(compiler, CMPL | CRD(4) | A(src1) | B(src2)));
-            if (flags & ALT_FORM5)
-                FAIL_IF(push_inst(compiler, CMP | CRD(0) | A(src1) | B(src2)));
-            return SLJIT_SUCCESS;
+            return push_inst(compiler, ((flags & ALT_FORM4) ? CMP : CMPL) | CRD(0) | A(src1) | B(src2));
         }
+        if (flags & ALT_FORM6) {
+            SLJIT_ASSERT(src2 == TMP_REG2);
+            FAIL_IF(push_inst(compiler, CMPLI | CRD(0) | A(src1) | compiler->imm));
+            return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff));
+        }
+        if (flags & ALT_FORM7) {
+            FAIL_IF(push_inst(compiler, CMPL | CRD(0) | A(src1) | B(src2)));
+            return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
+        }
         if (!(flags & ALT_SET_FLAGS))
             return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
-        if (flags & ALT_FORM6)
-            FAIL_IF(push_inst(compiler, CMPL | CRD(4) | A(src1) | B(src2)));
         return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));


     case SLJIT_SUBC:
-        if (flags & ALT_FORM1) {
-            FAIL_IF(push_inst(compiler, MFXER | D(0)));
-            FAIL_IF(push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)));
-            return push_inst(compiler, MTXER | S(0));
-        }
         return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1));


     case SLJIT_MUL:
@@ -228,19 +216,15 @@
         return push_inst(compiler, SRW | RC(flags) | S(src1) | A(dst) | B(src2));


     case SLJIT_ASHR:
-        if (flags & ALT_FORM3)
-            FAIL_IF(push_inst(compiler, MFXER | D(0)));
         if (flags & ALT_FORM1) {
             SLJIT_ASSERT(src2 == TMP_REG2);
             compiler->imm &= 0x1f;
-            FAIL_IF(push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11)));
+            return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11));
         }
-        else
-            FAIL_IF(push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2)));
-        return (flags & ALT_FORM3) ? push_inst(compiler, MTXER | S(0)) : SLJIT_SUCCESS;
+        return push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2));
     }


-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return SLJIT_SUCCESS;
 }


@@ -250,20 +234,22 @@
     return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value));
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
 {
-    sljit_ins *inst = (sljit_ins*)addr;
+    sljit_ins *inst = (sljit_ins *)addr;


-    inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 16) & 0xffff);
-    inst[1] = (inst[1] & 0xffff0000) | (new_addr & 0xffff);
+    inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 16) & 0xffff);
+    inst[1] = (inst[1] & 0xffff0000) | (new_target & 0xffff);
+    inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
     SLJIT_CACHE_FLUSH(inst, inst + 2);
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
 {
-    sljit_ins *inst = (sljit_ins*)addr;
+    sljit_ins *inst = (sljit_ins *)addr;


     inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
     inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff);
+    inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
     SLJIT_CACHE_FLUSH(inst, inst + 2);
 }


Modified: code/trunk/sljit/sljitNativePPC_64.c
===================================================================
--- code/trunk/sljit/sljitNativePPC_64.c    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/sljit/sljitNativePPC_64.c    2017-01-23 07:41:37 UTC (rev 1676)
@@ -1,7 +1,7 @@
 /*
  *    Stack-less Just-In-Time compiler
  *
- *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *    Copyright Zoltan Herczeg (hzmester@???). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are
  * permitted provided that the following conditions are met:
@@ -240,11 +240,6 @@
         return push_inst(compiler, ADDC | OERC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2));


     case SLJIT_ADDC:
-        if (flags & ALT_FORM1) {
-            FAIL_IF(push_inst(compiler, MFXER | D(0)));
-            FAIL_IF(push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)));
-            return push_inst(compiler, MTXER | S(0));
-        }
         BIN_EXTS();
         return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2));


@@ -256,32 +251,26 @@
         }
         if (flags & (ALT_FORM2 | ALT_FORM3)) {
             SLJIT_ASSERT(src2 == TMP_REG2);
-            if (flags & ALT_FORM2)
-                FAIL_IF(push_inst(compiler, CMPI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm));
-            if (flags & ALT_FORM3)
-                return push_inst(compiler, CMPLI | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm);
-            return SLJIT_SUCCESS;
+            return push_inst(compiler, ((flags & ALT_FORM2) ? CMPI : CMPLI) | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm);
         }
         if (flags & (ALT_FORM4 | ALT_FORM5)) {
-            if (flags & ALT_FORM4)
-                FAIL_IF(push_inst(compiler, CMPL | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)));
-            if (flags & ALT_FORM5)
-                return push_inst(compiler, CMP | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2));
-            return SLJIT_SUCCESS;
+            return push_inst(compiler, ((flags & ALT_FORM4) ? CMP : CMPL) | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2));
         }
+        if (flags & ALT_FORM6) {
+            SLJIT_ASSERT(src2 == TMP_REG2);
+            FAIL_IF(push_inst(compiler, CMPLI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm));
+            return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff));
+        }
+        if (flags & ALT_FORM7) {
+            FAIL_IF(push_inst(compiler, CMPL | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)));
+            return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
+        }
         if (!(flags & ALT_SET_FLAGS))
             return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
         BIN_EXTS();
-        if (flags & ALT_FORM6)
-            FAIL_IF(push_inst(compiler, CMPL | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)));
         return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));


     case SLJIT_SUBC:
-        if (flags & ALT_FORM1) {
-            FAIL_IF(push_inst(compiler, MFXER | D(0)));
-            FAIL_IF(push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)));
-            return push_inst(compiler, MTXER | S(0));
-        }
         BIN_EXTS();
         return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1));


@@ -345,10 +334,8 @@
                 compiler->imm &= 0x1f;
                 return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11) | ((31 - compiler->imm) << 1));
             }
-            else {
-                compiler->imm &= 0x3f;
-                return push_inst(compiler, RLDI(dst, src1, compiler->imm, 63 - compiler->imm, 1) | RC(flags));
-            }
+            compiler->imm &= 0x3f;
+            return push_inst(compiler, RLDI(dst, src1, compiler->imm, 63 - compiler->imm, 1) | RC(flags));
         }
         return push_inst(compiler, ((flags & ALT_FORM2) ? SLW : SLD) | RC(flags) | S(src1) | A(dst) | B(src2));


@@ -359,33 +346,25 @@
                 compiler->imm &= 0x1f;
                 return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (((32 - compiler->imm) & 0x1f) << 11) | (compiler->imm << 6) | (31 << 1));
             }
-            else {
-                compiler->imm &= 0x3f;
-                return push_inst(compiler, RLDI(dst, src1, 64 - compiler->imm, compiler->imm, 0) | RC(flags));
-            }
+            compiler->imm &= 0x3f;
+            return push_inst(compiler, RLDI(dst, src1, 64 - compiler->imm, compiler->imm, 0) | RC(flags));
         }
         return push_inst(compiler, ((flags & ALT_FORM2) ? SRW : SRD) | RC(flags) | S(src1) | A(dst) | B(src2));


     case SLJIT_ASHR:
-        if (flags & ALT_FORM3)
-            FAIL_IF(push_inst(compiler, MFXER | D(0)));
         if (flags & ALT_FORM1) {
             SLJIT_ASSERT(src2 == TMP_REG2);
             if (flags & ALT_FORM2) {
                 compiler->imm &= 0x1f;
-                FAIL_IF(push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11)));
+                return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11));
             }
-            else {
-                compiler->imm &= 0x3f;
-                FAIL_IF(push_inst(compiler, SRADI | RC(flags) | S(src1) | A(dst) | ((compiler->imm & 0x1f) << 11) | ((compiler->imm & 0x20) >> 4)));
-            }
+            compiler->imm &= 0x3f;
+            return push_inst(compiler, SRADI | RC(flags) | S(src1) | A(dst) | ((compiler->imm & 0x1f) << 11) | ((compiler->imm & 0x20) >> 4));
         }
-        else
-            FAIL_IF(push_inst(compiler, ((flags & ALT_FORM2) ? SRAW : SRAD) | RC(flags) | S(src1) | A(dst) | B(src2)));
-        return (flags & ALT_FORM3) ? push_inst(compiler, MTXER | S(0)) : SLJIT_SUCCESS;
+        return push_inst(compiler, ((flags & ALT_FORM2) ? SRAW : SRAD) | RC(flags) | S(src1) | A(dst) | B(src2));
     }


-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return SLJIT_SUCCESS;
 }


@@ -398,18 +377,19 @@
     return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value));
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
 {
     sljit_ins *inst = (sljit_ins*)addr;


-    inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 48) & 0xffff);
-    inst[1] = (inst[1] & 0xffff0000) | ((new_addr >> 32) & 0xffff);
-    inst[3] = (inst[3] & 0xffff0000) | ((new_addr >> 16) & 0xffff);
-    inst[4] = (inst[4] & 0xffff0000) | (new_addr & 0xffff);
+    inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 48) & 0xffff);
+    inst[1] = (inst[1] & 0xffff0000) | ((new_target >> 32) & 0xffff);
+    inst[3] = (inst[3] & 0xffff0000) | ((new_target >> 16) & 0xffff);
+    inst[4] = (inst[4] & 0xffff0000) | (new_target & 0xffff);
+    inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
     SLJIT_CACHE_FLUSH(inst, inst + 5);
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
 {
     sljit_ins *inst = (sljit_ins*)addr;


@@ -417,5 +397,6 @@
     inst[1] = (inst[1] & 0xffff0000) | ((new_constant >> 32) & 0xffff);
     inst[3] = (inst[3] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
     inst[4] = (inst[4] & 0xffff0000) | (new_constant & 0xffff);
+    inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
     SLJIT_CACHE_FLUSH(inst, inst + 5);
 }


Modified: code/trunk/sljit/sljitNativePPC_common.c
===================================================================
--- code/trunk/sljit/sljitNativePPC_common.c    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/sljit/sljitNativePPC_common.c    2017-01-23 07:41:37 UTC (rev 1676)
@@ -1,7 +1,7 @@
 /*
  *    Stack-less Just-In-Time compiler
  *
- *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *    Copyright Zoltan Herczeg (hzmester@???). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are
  * permitted provided that the following conditions are met:
@@ -249,7 +249,7 @@
     return SLJIT_SUCCESS;
 }


-static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
+static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
 {
     sljit_sw diff;
     sljit_uw target_addr;
@@ -267,7 +267,7 @@
         target_addr = jump->u.target;
     else {
         SLJIT_ASSERT(jump->flags & JUMP_LABEL);
-        target_addr = (sljit_uw)(code + jump->u.label->size);
+        target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
     }


 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
@@ -275,7 +275,7 @@
         goto keep_address;
 #endif


-    diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr)) & ~0x3l;
+    diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr) - executable_offset) & ~0x3l;


     extra_jump_flags = 0;
     if (jump->flags & IS_COND) {
@@ -296,6 +296,7 @@
         jump->flags |= PATCH_B | extra_jump_flags;
         return 1;
     }
+
     if (target_addr <= 0x03ffffff) {
         jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags;
         return 1;
@@ -309,6 +310,7 @@
         jump->flags |= PATCH_ABS32;
         return 1;
     }
+
     if (target_addr <= 0x7fffffffffffl) {
         jump->flags |= PATCH_ABS48;
         return 1;
@@ -326,6 +328,7 @@
     sljit_ins *buf_ptr;
     sljit_ins *buf_end;
     sljit_uw word_count;
+    sljit_sw executable_offset;
     sljit_uw addr;


     struct sljit_label *label;
@@ -349,9 +352,12 @@


     code_ptr = code;
     word_count = 0;
+    executable_offset = SLJIT_EXEC_OFFSET(code);
+
     label = compiler->labels;
     jump = compiler->jumps;
     const_ = compiler->consts;
+
     do {
         buf_ptr = (sljit_ins*)buf->memory;
         buf_end = buf_ptr + (buf->used_size >> 2);
@@ -363,7 +369,7 @@
             /* These structures are ordered by their address. */
             if (label && label->size == word_count) {
                 /* Just recording the address. */
-                label->addr = (sljit_uw)code_ptr;
+                label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
                 label->size = code_ptr - code;
                 label = label->next;
             }
@@ -373,7 +379,7 @@
 #else
                 jump->addr = (sljit_uw)(code_ptr - 6);
 #endif
-                if (detect_jump_type(jump, code_ptr, code)) {
+                if (detect_jump_type(jump, code_ptr, code, executable_offset)) {
 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
                     code_ptr[-3] = code_ptr[0];
                     code_ptr -= 3;
@@ -420,7 +426,7 @@
     } while (buf);


     if (label && label->size == word_count) {
-        label->addr = (sljit_uw)code_ptr;
+        label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
         label->size = code_ptr - code;
         label = label->next;
     }
@@ -438,11 +444,12 @@
     while (jump) {
         do {
             addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
-            buf_ptr = (sljit_ins*)jump->addr;
+            buf_ptr = (sljit_ins *)jump->addr;
+
             if (jump->flags & PATCH_B) {
                 if (jump->flags & IS_COND) {
                     if (!(jump->flags & PATCH_ABS_B)) {
-                        addr = addr - jump->addr;
+                        addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset);
                         SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000);
                         *buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001);
                     }
@@ -453,7 +460,7 @@
                 }
                 else {
                     if (!(jump->flags & PATCH_ABS_B)) {
-                        addr = addr - jump->addr;
+                        addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset);
                         SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000);
                         *buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1);
                     }
@@ -464,6 +471,7 @@
                 }
                 break;
             }
+
             /* Set the fields of immediate loads. */
 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
             buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
@@ -492,21 +500,26 @@
     }


     compiler->error = SLJIT_ERR_COMPILED;
+    compiler->executable_offset = executable_offset;
     compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
-    SLJIT_ENABLE_EXEC(code, code_ptr);
-    SLJIT_CACHE_FLUSH(code, code_ptr);


+    code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
+
 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
     if (((sljit_sw)code_ptr) & 0x4)
         code_ptr++;
+#endif
     sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
+#endif
+
+    code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+
+    SLJIT_CACHE_FLUSH(code, code_ptr);
+
+#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
     return code_ptr;
 #else
-    sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
-    return code_ptr;
-#endif
-#else
     return code;
 #endif
 }
@@ -546,6 +559,7 @@
 #define ALT_FORM4    0x080000
 #define ALT_FORM5    0x100000
 #define ALT_FORM6    0x200000
+#define ALT_FORM7    0x400000


 /* Source and destination is register. */
 #define REG_DEST    0x000001
@@ -560,7 +574,7 @@
 ALT_SET_FLAGS        0x000400
 ALT_FORM1        0x010000
 ...
-ALT_FORM6        0x200000 */
+ALT_FORM7        0x400000 */


 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
 #include "sljitNativePPC_32.c"
@@ -851,7 +865,7 @@
     sljit_ins inst;


     /* Should work when (arg & REG_MASK) == 0. */
-    SLJIT_COMPILE_ASSERT(A(0) == 0, a0_must_be_0);
+    SLJIT_ASSERT(A(0) == 0);
     SLJIT_ASSERT(arg & SLJIT_MEM);


     if (arg & OFFS_REG_MASK) {
@@ -1006,10 +1020,6 @@
 #endif


         if (inp_flags & WRITE_BACK) {
-            if (arg == reg) {
-                FAIL_IF(push_inst(compiler, OR | S(reg) | A(tmp_r) | B(reg)));
-                reg = tmp_r;
-            }
             tmp_r = arg;
             FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16)));
         }
@@ -1132,7 +1142,7 @@
     sljit_s32 src1_r;
     sljit_s32 src2_r;
     sljit_s32 sugg_src2_r = TMP_REG2;
-    sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS);
+    sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_FORM7 | ALT_SIGN_EXT | ALT_SET_FLAGS);


     if (!(input_flags & ALT_KEEP_CACHE)) {
         compiler->cache_arg = 0;
@@ -1302,7 +1312,7 @@
     sljit_s32 dst, sljit_sw dstw,
     sljit_s32 src, sljit_sw srcw)
 {
-    sljit_s32 flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
+    sljit_s32 flags = HAS_FLAGS(op) ? ALT_SET_FLAGS : 0;
     sljit_s32 op_flags = GET_ALL_FLAGS(op);


     CHECK_ERROR();
@@ -1314,7 +1324,7 @@
     if ((src & SLJIT_IMM) && srcw == 0)
         src = TMP_ZERO;


-    if (op_flags & SLJIT_SET_O)
+    if (GET_FLAG_TYPE(op_flags) == SLJIT_OVERFLOW || GET_FLAG_TYPE(op_flags) == SLJIT_NOT_OVERFLOW)
         FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));


     if (op_flags & SLJIT_I32_OP) {
@@ -1458,7 +1468,7 @@
     sljit_s32 src1, sljit_sw src1w,
     sljit_s32 src2, sljit_sw src2w)
 {
-    sljit_s32 flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
+    sljit_s32 flags = HAS_FLAGS(op) ? ALT_SET_FLAGS : 0;


     CHECK_ERROR();
     CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
@@ -1479,11 +1489,11 @@
             src1w = (sljit_s32)(src1w);
         if (src2 & SLJIT_IMM)
             src2w = (sljit_s32)(src2w);
-        if (GET_FLAGS(op))
+        if (HAS_FLAGS(op))
             flags |= ALT_SIGN_EXT;
     }
 #endif
-    if (op & SLJIT_SET_O)
+    if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW || GET_FLAG_TYPE(op) == SLJIT_NOT_OVERFLOW)
         FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
     if (src2 == TMP_REG2)
         flags |= ALT_KEEP_CACHE;
@@ -1490,7 +1500,7 @@


     switch (GET_OPCODE(op)) {
     case SLJIT_ADD:
-        if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
+        if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
             if (TEST_SL_IMM(src2, src2w)) {
                 compiler->imm = src2w & 0xffff;
                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
@@ -1517,7 +1527,7 @@
                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0);
             }
         }
-        if (!(GET_FLAGS(op) & (SLJIT_SET_E | SLJIT_SET_O))) {
+        if (HAS_FLAGS(op)) {
             if (TEST_SL_IMM(src2, src2w)) {
                 compiler->imm = src2w & 0xffff;
                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
@@ -1530,10 +1540,29 @@
         return emit_op(compiler, SLJIT_ADD, flags, dst, dstw, src1, src1w, src2, src2w);


     case SLJIT_ADDC:
-        return emit_op(compiler, SLJIT_ADDC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
+        return emit_op(compiler, SLJIT_ADDC, flags, dst, dstw, src1, src1w, src2, src2w);


     case SLJIT_SUB:
-        if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
+        if (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_LESS_EQUAL)
+        {
+            if (dst == SLJIT_UNUSED)
+            {
+                if (TEST_UL_IMM(src2, src2w)) {
+                    compiler->imm = src2w & 0xffff;
+                    return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
+                }
+                return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM5, dst, dstw, src1, src1w, src2, src2w);
+            }
+
+            if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= (SIMM_MAX + 1))
+            {
+                compiler->imm = src2w;
+                return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM6, dst, dstw, src1, src1w, TMP_REG2, 0);
+            }
+            return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM7, dst, dstw, src1, src1w, src2, src2w);
+        }
+
+        if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
             if (TEST_SL_IMM(src2, -src2w)) {
                 compiler->imm = (-src2w) & 0xffff;
                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
@@ -1552,43 +1581,25 @@
                 return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
             }
         }
-        if (dst == SLJIT_UNUSED && (op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S)) && !(op & (SLJIT_SET_O | SLJIT_SET_C))) {
-            if (!(op & SLJIT_SET_U)) {
-                /* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
-                if (TEST_SL_IMM(src2, src2w)) {
-                    compiler->imm = src2w & 0xffff;
-                    return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
-                }
-                if (GET_FLAGS(op) == SLJIT_SET_E && TEST_SL_IMM(src1, src1w)) {
-                    compiler->imm = src1w & 0xffff;
-                    return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
-                }
+
+        if (dst == SLJIT_UNUSED && GET_FLAG_TYPE(op) != GET_FLAG_TYPE(SLJIT_SET_CARRY)
+                && GET_FLAG_TYPE(op) == SLJIT_OVERFLOW && GET_FLAG_TYPE(op) == SLJIT_NOT_OVERFLOW) {
+            if (TEST_SL_IMM(src2, src2w)) {
+                compiler->imm = src2w & 0xffff;
+                return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
             }
-            if (!(op & (SLJIT_SET_E | SLJIT_SET_S))) {
-                /* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
-                if (TEST_UL_IMM(src2, src2w)) {
-                    compiler->imm = src2w & 0xffff;
-                    return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
-                }
-                return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w);
-            }
-            if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= 0x7fff) {
-                compiler->imm = src2w;
-                return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
-            }
-            return emit_op(compiler, SLJIT_SUB, flags | ((op & SLJIT_SET_U) ? ALT_FORM4 : 0) | ((op & (SLJIT_SET_E | SLJIT_SET_S)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
+            return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w);
         }
-        if (!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))) {
-            if (TEST_SL_IMM(src2, -src2w)) {
-                compiler->imm = (-src2w) & 0xffff;
-                return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
-            }
+
+        if (TEST_SL_IMM(src2, -src2w)) {
+            compiler->imm = (-src2w) & 0xffff;
+            return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
         }
         /* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
-        return emit_op(compiler, SLJIT_SUB, flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM6), dst, dstw, src1, src1w, src2, src2w);
+        return emit_op(compiler, SLJIT_SUB, flags, dst, dstw, src1, src1w, src2, src2w);


     case SLJIT_SUBC:
-        return emit_op(compiler, SLJIT_SUBC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
+        return emit_op(compiler, SLJIT_SUBC, flags, dst, dstw, src1, src1w, src2, src2w);


     case SLJIT_MUL:
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
@@ -1595,7 +1606,7 @@
         if (op & SLJIT_I32_OP)
             flags |= ALT_FORM2;
 #endif
-        if (!GET_FLAGS(op)) {
+        if (!HAS_FLAGS(op)) {
             if (TEST_SL_IMM(src2, src2w)) {
                 compiler->imm = src2w & 0xffff;
                 return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
@@ -1605,6 +1616,8 @@
                 return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
             }
         }
+        else
+            FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
         return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w);


     case SLJIT_AND:
@@ -1611,7 +1624,7 @@
     case SLJIT_OR:
     case SLJIT_XOR:
         /* Commutative unsigned operations. */
-        if (!GET_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) {
+        if (!HAS_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) {
             if (TEST_UL_IMM(src2, src2w)) {
                 compiler->imm = src2w;
                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
@@ -1629,7 +1642,8 @@
                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
             }
         }
-        if (!GET_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) {
+        if (GET_OPCODE(op) != SLJIT_AND && GET_OPCODE(op) != SLJIT_AND) {
+            /* Unlike or and xor, and resets unwanted bits as well. */
             if (TEST_UI_IMM(src2, src2w)) {
                 compiler->imm = src2w;
                 return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
@@ -1641,12 +1655,9 @@
         }
         return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);


-    case SLJIT_ASHR:
-        if (op & SLJIT_KEEP_FLAGS)
-            flags |= ALT_FORM3;
-        /* Fall through. */
     case SLJIT_SHL:
     case SLJIT_LSHR:
+    case SLJIT_ASHR:
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
         if (op & SLJIT_I32_OP)
             flags |= ALT_FORM2;
@@ -2080,33 +2091,33 @@
         return (4 << 21) | (2 << 16);


     case SLJIT_LESS:
+    case SLJIT_SIG_LESS:
+        return (12 << 21) | (0 << 16);
+
+    case SLJIT_GREATER_EQUAL:
+    case SLJIT_SIG_GREATER_EQUAL:
+        return (4 << 21) | (0 << 16);
+
+    case SLJIT_GREATER:
+    case SLJIT_SIG_GREATER:
+        return (12 << 21) | (1 << 16);
+
+    case SLJIT_LESS_EQUAL:
+    case SLJIT_SIG_LESS_EQUAL:
+        return (4 << 21) | (1 << 16);
+
     case SLJIT_LESS_F64:
         return (12 << 21) | ((4 + 0) << 16);


-    case SLJIT_GREATER_EQUAL:
     case SLJIT_GREATER_EQUAL_F64:
         return (4 << 21) | ((4 + 0) << 16);


-    case SLJIT_GREATER:
     case SLJIT_GREATER_F64:
         return (12 << 21) | ((4 + 1) << 16);


-    case SLJIT_LESS_EQUAL:
     case SLJIT_LESS_EQUAL_F64:
         return (4 << 21) | ((4 + 1) << 16);


-    case SLJIT_SIG_LESS:
-        return (12 << 21) | (0 << 16);
-
-    case SLJIT_SIG_GREATER_EQUAL:
-        return (4 << 21) | (0 << 16);
-
-    case SLJIT_SIG_GREATER:
-        return (12 << 21) | (1 << 16);
-
-    case SLJIT_SIG_LESS_EQUAL:
-        return (4 << 21) | (1 << 16);
-
     case SLJIT_OVERFLOW:
     case SLJIT_MUL_OVERFLOW:
         return (12 << 21) | (3 << 16);
@@ -2139,7 +2150,6 @@
     sljit_ins bo_bi_flags;


     CHECK_ERROR_PTR();
-    CHECK_DYN_CODE_MOD(type & SLJIT_REWRITABLE_JUMP);
     CHECK_PTR(check_sljit_emit_jump(compiler, type));


     bo_bi_flags = get_bo_bi_flags(type & 0xff);
@@ -2211,7 +2221,6 @@


 /* Get a bit from CR, all other bits are zeroed. */
 #define GET_CR_BIT(bit, dst) \
-    FAIL_IF(push_inst(compiler, MFCR | D(dst))); \
     FAIL_IF(push_inst(compiler, RLWINM | S(dst) | A(dst) | ((1 + (bit)) << 11) | (31 << 6) | (31 << 1)));


 #define INVERT_BIT(dst) \
@@ -2250,6 +2259,8 @@
         srcw = 0;
     }


+    FAIL_IF(push_inst(compiler, MFCR | D(reg)));
+
     switch (type & 0xff) {
     case SLJIT_EQUAL:
         GET_CR_BIT(2, reg);
@@ -2261,42 +2272,42 @@
         break;


     case SLJIT_LESS:
-    case SLJIT_LESS_F64:
-        GET_CR_BIT(4 + 0, reg);
+    case SLJIT_SIG_LESS:
+        GET_CR_BIT(0, reg);
         break;


     case SLJIT_GREATER_EQUAL:
-    case SLJIT_GREATER_EQUAL_F64:
-        GET_CR_BIT(4 + 0, reg);
+    case SLJIT_SIG_GREATER_EQUAL:
+        GET_CR_BIT(0, reg);
         INVERT_BIT(reg);
         break;


     case SLJIT_GREATER:
-    case SLJIT_GREATER_F64:
-        GET_CR_BIT(4 + 1, reg);
+    case SLJIT_SIG_GREATER:
+        GET_CR_BIT(1, reg);
         break;


     case SLJIT_LESS_EQUAL:
-    case SLJIT_LESS_EQUAL_F64:
-        GET_CR_BIT(4 + 1, reg);
+    case SLJIT_SIG_LESS_EQUAL:
+        GET_CR_BIT(1, reg);
         INVERT_BIT(reg);
         break;


-    case SLJIT_SIG_LESS:
-        GET_CR_BIT(0, reg);
+    case SLJIT_LESS_F64:
+        GET_CR_BIT(4 + 0, reg);
         break;


-    case SLJIT_SIG_GREATER_EQUAL:
-        GET_CR_BIT(0, reg);
+    case SLJIT_GREATER_EQUAL_F64:
+        GET_CR_BIT(4 + 0, reg);
         INVERT_BIT(reg);
         break;


-    case SLJIT_SIG_GREATER:
-        GET_CR_BIT(1, reg);
+    case SLJIT_GREATER_F64:
+        GET_CR_BIT(4 + 1, reg);
         break;


-    case SLJIT_SIG_LESS_EQUAL:
-        GET_CR_BIT(1, reg);
+    case SLJIT_LESS_EQUAL_F64:
+        GET_CR_BIT(4 + 1, reg);
         INVERT_BIT(reg);
         break;


@@ -2330,7 +2341,7 @@
         break;


     default:
-        SLJIT_ASSERT_STOP();
+        SLJIT_UNREACHABLE();
         break;
     }


@@ -2364,7 +2375,6 @@
     sljit_s32 reg;


     CHECK_ERROR_PTR();
-    CHECK_DYN_CODE_MOD(1);
     CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
     ADJUST_LOCAL_OFFSET(dst, dstw);



Modified: code/trunk/sljit/sljitNativeSPARC_32.c
===================================================================
--- code/trunk/sljit/sljitNativeSPARC_32.c    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/sljit/sljitNativeSPARC_32.c    2017-01-23 07:41:37 UTC (rev 1676)
@@ -1,7 +1,7 @@
 /*
  *    Stack-less Just-In-Time compiler
  *
- *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *    Copyright Zoltan Herczeg (hzmester@???). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are
  * permitted provided that the following conditions are met:
@@ -60,7 +60,7 @@
             return push_inst(compiler, SRA | D(dst) | S1(dst) | IMM(24), DR(dst));
         }
         else if (dst != src2)
-            SLJIT_ASSERT_STOP();
+            SLJIT_UNREACHABLE();
         return SLJIT_SUCCESS;


     case SLJIT_MOV_U16:
@@ -71,7 +71,7 @@
             return push_inst(compiler, (op == SLJIT_MOV_S16 ? SRA : SRL) | D(dst) | S1(dst) | IMM(16), DR(dst));
         }
         else if (dst != src2)
-            SLJIT_ASSERT_STOP();
+            SLJIT_UNREACHABLE();
         return SLJIT_SUCCESS;


     case SLJIT_NOT:
@@ -135,7 +135,7 @@
         return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS);
     }


-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return SLJIT_SUCCESS;
 }


@@ -145,20 +145,22 @@
     return push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (init_value & 0x3ff), DR(dst));
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
 {
-    sljit_ins *inst = (sljit_ins*)addr;
+    sljit_ins *inst = (sljit_ins *)addr;


-    inst[0] = (inst[0] & 0xffc00000) | ((new_addr >> 10) & 0x3fffff);
-    inst[1] = (inst[1] & 0xfffffc00) | (new_addr & 0x3ff);
+    inst[0] = (inst[0] & 0xffc00000) | ((new_target >> 10) & 0x3fffff);
+    inst[1] = (inst[1] & 0xfffffc00) | (new_target & 0x3ff);
+    inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
     SLJIT_CACHE_FLUSH(inst, inst + 2);
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
 {
-    sljit_ins *inst = (sljit_ins*)addr;
+    sljit_ins *inst = (sljit_ins *)addr;


     inst[0] = (inst[0] & 0xffc00000) | ((new_constant >> 10) & 0x3fffff);
     inst[1] = (inst[1] & 0xfffffc00) | (new_constant & 0x3ff);
+    inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
     SLJIT_CACHE_FLUSH(inst, inst + 2);
 }


Modified: code/trunk/sljit/sljitNativeSPARC_common.c
===================================================================
--- code/trunk/sljit/sljitNativeSPARC_common.c    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/sljit/sljitNativeSPARC_common.c    2017-01-23 07:41:37 UTC (rev 1676)
@@ -1,7 +1,7 @@
 /*
  *    Stack-less Just-In-Time compiler
  *
- *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *    Copyright Zoltan Herczeg (hzmester@???). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are
  * permitted provided that the following conditions are met:
@@ -199,7 +199,7 @@
     return SLJIT_SUCCESS;
 }


-static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
+static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
 {
     sljit_sw diff;
     sljit_uw target_addr;
@@ -213,7 +213,7 @@
         target_addr = jump->u.target;
     else {
         SLJIT_ASSERT(jump->flags & JUMP_LABEL);
-        target_addr = (sljit_uw)(code + jump->u.label->size);
+        target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
     }
     inst = (sljit_ins*)jump->addr;


@@ -239,8 +239,9 @@
     if (jump->flags & IS_COND)
         inst--;


+    diff = ((sljit_sw)target_addr - (sljit_sw)(inst - 1) - executable_offset) >> 2;
+
     if (jump->flags & IS_MOVABLE) {
-        diff = ((sljit_sw)target_addr - (sljit_sw)(inst - 1)) >> 2;
         if (diff <= MAX_DISP && diff >= MIN_DISP) {
             jump->flags |= PATCH_B;
             inst--;
@@ -257,7 +258,8 @@
         }
     }


-    diff = ((sljit_sw)target_addr - (sljit_sw)(inst)) >> 2;
+    diff += sizeof(sljit_ins);
+
     if (diff <= MAX_DISP && diff >= MIN_DISP) {
         jump->flags |= PATCH_B;
         if (jump->flags & IS_COND)
@@ -280,6 +282,7 @@
     sljit_ins *buf_ptr;
     sljit_ins *buf_end;
     sljit_uw word_count;
+    sljit_sw executable_offset;
     sljit_uw addr;


     struct sljit_label *label;
@@ -296,9 +299,12 @@


     code_ptr = code;
     word_count = 0;
+    executable_offset = SLJIT_EXEC_OFFSET(code);
+
     label = compiler->labels;
     jump = compiler->jumps;
     const_ = compiler->consts;
+
     do {
         buf_ptr = (sljit_ins*)buf->memory;
         buf_end = buf_ptr + (buf->used_size >> 2);
@@ -310,7 +316,7 @@
             /* These structures are ordered by their address. */
             if (label && label->size == word_count) {
                 /* Just recording the address. */
-                label->addr = (sljit_uw)code_ptr;
+                label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
                 label->size = code_ptr - code;
                 label = label->next;
             }
@@ -320,7 +326,7 @@
 #else
                 jump->addr = (sljit_uw)(code_ptr - 6);
 #endif
-                code_ptr = detect_jump_type(jump, code_ptr, code);
+                code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
                 jump = jump->next;
             }
             if (const_ && const_->addr == word_count) {
@@ -336,7 +342,7 @@
     } while (buf);


     if (label && label->size == word_count) {
-        label->addr = (sljit_uw)code_ptr;
+        label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
         label->size = code_ptr - code;
         label = label->next;
     }
@@ -350,16 +356,16 @@
     while (jump) {
         do {
             addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
-            buf_ptr = (sljit_ins*)jump->addr;
+            buf_ptr = (sljit_ins *)jump->addr;


             if (jump->flags & PATCH_CALL) {
-                addr = (sljit_sw)(addr - jump->addr) >> 2;
+                addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
                 SLJIT_ASSERT((sljit_sw)addr <= 0x1fffffff && (sljit_sw)addr >= -0x20000000);
                 buf_ptr[0] = CALL | (addr & 0x3fffffff);
                 break;
             }
             if (jump->flags & PATCH_B) {
-                addr = (sljit_sw)(addr - jump->addr) >> 2;
+                addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
                 SLJIT_ASSERT((sljit_sw)addr <= MAX_DISP && (sljit_sw)addr >= MIN_DISP);
                 buf_ptr[0] = (buf_ptr[0] & ~DISP_MASK) | (addr & DISP_MASK);
                 break;
@@ -378,8 +384,12 @@



     compiler->error = SLJIT_ERR_COMPILED;
+    compiler->executable_offset = executable_offset;
     compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
-    SLJIT_ENABLE_EXEC(code, code_ptr);
+
+    code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
+    code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+
     SLJIT_CACHE_FLUSH(code, code_ptr);
     return code;
 }
@@ -568,7 +578,6 @@
     base = arg & REG_MASK;
     if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
         argw &= 0x3;
-        SLJIT_ASSERT(argw != 0);


         /* Using the cache. */
         if (((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) && (argw == compiler->cache_argw))
@@ -813,7 +822,7 @@
     sljit_s32 dst, sljit_sw dstw,
     sljit_s32 src, sljit_sw srcw)
 {
-    sljit_s32 flags = GET_FLAGS(op) ? SET_FLAGS : 0;
+    sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;


     CHECK_ERROR();
     CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
@@ -882,7 +891,7 @@
     sljit_s32 src1, sljit_sw src1w,
     sljit_s32 src2, sljit_sw src2w)
 {
-    sljit_s32 flags = GET_FLAGS(op) ? SET_FLAGS : 0;
+    sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;


     CHECK_ERROR();
     CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
@@ -911,7 +920,7 @@
         if (src2 & SLJIT_IMM)
             src2w &= 0x1f;
 #else
-        SLJIT_ASSERT_STOP();
+        SLJIT_UNREACHABLE();
 #endif
         return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
     }
@@ -1286,7 +1295,7 @@
         return DA(0xf);


     default:
-        SLJIT_ASSERT_STOP();
+        SLJIT_UNREACHABLE();
         return DA(0x8);
     }
 }
@@ -1296,7 +1305,6 @@
     struct sljit_jump *jump;


     CHECK_ERROR_PTR();
-    CHECK_DYN_CODE_MOD(type & SLJIT_REWRITABLE_JUMP);
     CHECK_PTR(check_sljit_emit_jump(compiler, type));


     jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
@@ -1378,7 +1386,7 @@
     sljit_s32 src, sljit_sw srcw,
     sljit_s32 type)
 {
-    sljit_s32 reg, flags = (GET_FLAGS(op) ? SET_FLAGS : 0);
+    sljit_s32 reg, flags = HAS_FLAGS(op) ? SET_FLAGS : 0;


     CHECK_ERROR();
     CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
@@ -1424,7 +1432,6 @@
     struct sljit_const *const_;


     CHECK_ERROR_PTR();
-    CHECK_DYN_CODE_MOD(1);
     CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
     ADJUST_LOCAL_OFFSET(dst, dstw);



Modified: code/trunk/sljit/sljitNativeTILEGX-encoder.c
===================================================================
--- code/trunk/sljit/sljitNativeTILEGX-encoder.c    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/sljit/sljitNativeTILEGX-encoder.c    2017-01-23 07:41:37 UTC (rev 1676)
@@ -2,7 +2,7 @@
  *    Stack-less Just-In-Time compiler
  *
  *    Copyright 2013-2013 Tilera Corporation(jiwang@???). All rights reserved.
- *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *    Copyright Zoltan Herczeg (hzmester@???). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are
  * permitted provided that the following conditions are met:


Modified: code/trunk/sljit/sljitNativeTILEGX_64.c
===================================================================
--- code/trunk/sljit/sljitNativeTILEGX_64.c    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/sljit/sljitNativeTILEGX_64.c    2017-01-23 07:41:37 UTC (rev 1676)
@@ -2,7 +2,7 @@
  *    Stack-less Just-In-Time compiler
  *
  *    Copyright 2013-2013 Tilera Corporation(jiwang@???). All rights reserved.
- *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *    Copyright Zoltan Herczeg (hzmester@???). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are
  * permitted provided that the following conditions are met:
@@ -687,7 +687,7 @@
             inst_buf[0] = inst1;
             inst_buf_index = 1;
         } else
-            SLJIT_ASSERT_STOP();
+            SLJIT_UNREACHABLE();


 #ifdef TILEGX_JIT_DEBUG
         return push_inst_nodebug(compiler, bits);
@@ -727,10 +727,10 @@
             return push_inst(compiler, bits);
 #endif
         } else
-            SLJIT_ASSERT_STOP();
+            SLJIT_UNREACHABLE();
     }


-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
 }


 static sljit_s32 flush_buffer(struct sljit_compiler *compiler)
@@ -814,7 +814,7 @@
         break;
     default:
         printf("unrecoginzed opc: %s\n", opcode->name);
-        SLJIT_ASSERT_STOP();
+        SLJIT_UNREACHABLE();
     }


     inst_buf_index++;
@@ -859,7 +859,7 @@
         break;
     default:
         printf("unrecoginzed opc: %s\n", opcode->name);
-        SLJIT_ASSERT_STOP();
+        SLJIT_UNREACHABLE();
     }


     inst_buf_index++;
@@ -1113,7 +1113,6 @@


     compiler->error = SLJIT_ERR_COMPILED;
     compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
-    SLJIT_ENABLE_EXEC(code, code_ptr);
     SLJIT_CACHE_FLUSH(code, code_ptr);
     return code;
 }
@@ -1953,7 +1952,7 @@
         return SLJIT_SUCCESS;
     }


-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
     return SLJIT_SUCCESS;
 }


@@ -2144,7 +2143,7 @@
         break;


     default:
-        SLJIT_ASSERT_STOP();
+        SLJIT_UNREACHABLE();
         dst_ar = sugg_dst_ar;
         break;
     }
@@ -2187,7 +2186,7 @@
     case SLJIT_DIVMOD_SW:
     case SLJIT_DIV_UW:
     case SLJIT_DIV_SW:
-        SLJIT_ASSERT_STOP();
+        SLJIT_UNREACHABLE();
     }


     return SLJIT_SUCCESS;
@@ -2413,7 +2412,6 @@
     flush_buffer(compiler);


     CHECK_ERROR_PTR();
-    CHECK_DYN_CODE_MOD(type & SLJIT_REWRITABLE_JUMP);
     CHECK_PTR(check_sljit_emit_jump(compiler, type));


     jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
@@ -2496,12 +2494,12 @@


 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw)
 {
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
 }


 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w)
 {
-    SLJIT_ASSERT_STOP();
+    SLJIT_UNREACHABLE();
 }


 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const * sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
@@ -2512,7 +2510,6 @@
     flush_buffer(compiler);


     CHECK_ERROR_PTR();
-    CHECK_DYN_CODE_MOD(1);
     CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
     ADJUST_LOCAL_OFFSET(dst, dstw);


@@ -2529,13 +2526,13 @@
     return const_;
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target)
 {
     sljit_ins *inst = (sljit_ins *)addr;


-    inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_addr >> 32) & 0xffff) << 43);
-    inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_addr >> 16) & 0xffff) << 43);
-    inst[2] = (inst[2] & ~(0xFFFFL << 43)) | ((new_addr & 0xffff) << 43);
+    inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_target >> 32) & 0xffff) << 43);
+    inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_target >> 16) & 0xffff) << 43);
+    inst[2] = (inst[2] & ~(0xFFFFL << 43)) | ((new_target & 0xffff) << 43);
     SLJIT_CACHE_FLUSH(inst, inst + 3);
 }



Modified: code/trunk/sljit/sljitNativeX86_32.c
===================================================================
--- code/trunk/sljit/sljitNativeX86_32.c    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/sljit/sljitNativeX86_32.c    2017-01-23 07:41:37 UTC (rev 1676)
@@ -1,7 +1,7 @@
 /*
  *    Stack-less Just-In-Time compiler
  *
- *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *    Copyright Zoltan Herczeg (hzmester@???). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are
  * permitted provided that the following conditions are met:
@@ -38,7 +38,7 @@
     return SLJIT_SUCCESS;
 }


-static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type)
+static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset)
 {
     if (type == SLJIT_JUMP) {
         *code_ptr++ = JMP_i32;
@@ -57,7 +57,7 @@
     if (jump->flags & JUMP_LABEL)
         jump->flags |= PATCH_MW;
     else
-        sljit_unaligned_store_sw(code_ptr, jump->u.target - (jump->addr + 4));
+        sljit_unaligned_store_sw(code_ptr, jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset);
     code_ptr += 4;


     return code_ptr;
@@ -75,9 +75,8 @@
     set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);


     compiler->args = args;
-    compiler->flags_saved = 0;


-    size = 1 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3);
+    size = 1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3);
 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
     size += (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0);
 #else
@@ -94,11 +93,11 @@
         *inst++ = MOD_REG | (reg_map[TMP_REG1] << 3) | 0x4 /* esp */;
     }
 #endif
-    if (saveds > 2 || scratches > 7)
+    if (saveds > 2 || scratches > 9)
         PUSH_REG(reg_map[SLJIT_S2]);
-    if (saveds > 1 || scratches > 8)
+    if (saveds > 1 || scratches > 10)
         PUSH_REG(reg_map[SLJIT_S1]);
-    if (saveds > 0 || scratches > 9)
+    if (saveds > 0 || scratches > 11)
         PUSH_REG(reg_map[SLJIT_S0]);


 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
@@ -137,7 +136,7 @@
     SLJIT_COMPILE_ASSERT(SLJIT_LOCALS_OFFSET >= (2 + 4) * sizeof(sljit_uw), require_at_least_two_words);
 #if defined(__APPLE__)
     /* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */
-    saveds = (2 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
+    saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
     local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
 #else
     if (options & SLJIT_DOUBLE_ALIGNMENT) {
@@ -194,7 +193,7 @@
     compiler->args = args;


 #if defined(__APPLE__)
-    saveds = (2 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
+    saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
     compiler->local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
 #else
     if (options & SLJIT_DOUBLE_ALIGNMENT)
@@ -214,7 +213,6 @@
     CHECK(check_sljit_emit_return(compiler, op, src, srcw));
     SLJIT_ASSERT(compiler->args >= 0);


-    compiler->flags_saved = 0;
     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));


     SLJIT_ASSERT(compiler->local_size > 0);
@@ -247,11 +245,11 @@


     INC_SIZE(size);


-    if (compiler->saveds > 0 || compiler->scratches > 9)
+    if (compiler->saveds > 0 || compiler->scratches > 11)
         POP_REG(reg_map[SLJIT_S0]);
-    if (compiler->saveds > 1 || compiler->scratches > 8)
+    if (compiler->saveds > 1 || compiler->scratches > 10)
         POP_REG(reg_map[SLJIT_S1]);
-    if (compiler->saveds > 2 || compiler->scratches > 7)
+    if (compiler->saveds > 2 || compiler->scratches > 9)
         POP_REG(reg_map[SLJIT_S2]);
     POP_REG(reg_map[TMP_REG1]);
 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)


Modified: code/trunk/sljit/sljitNativeX86_64.c
===================================================================
--- code/trunk/sljit/sljitNativeX86_64.c    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/sljit/sljitNativeX86_64.c    2017-01-23 07:41:37 UTC (rev 1676)
@@ -1,7 +1,7 @@
 /*
  *    Stack-less Just-In-Time compiler
  *
- *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *    Copyright Zoltan Herczeg (hzmester@???). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are
  * permitted provided that the following conditions are met:
@@ -47,7 +47,7 @@
         *code_ptr++ = 10 + 3;
     }


-    SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_first);
+    SLJIT_ASSERT(reg_map[TMP_REG3] == 9);
     *code_ptr++ = REX_W | REX_B;
     *code_ptr++ = MOV_r_i32 + 1;
     jump->addr = (sljit_uw)code_ptr;
@@ -65,28 +65,6 @@
     return code_ptr;
 }


-static sljit_u8* generate_fixed_jump(sljit_u8 *code_ptr, sljit_sw addr, sljit_s32 type)
-{
-    sljit_sw delta = addr - ((sljit_sw)code_ptr + 1 + sizeof(sljit_s32));
-
-    if (delta <= HALFWORD_MAX && delta >= HALFWORD_MIN) {
-        *code_ptr++ = (type == 2) ? CALL_i32 : JMP_i32;
-        sljit_unaligned_store_sw(code_ptr, delta);
-    }
-    else {
-        SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_second);
-        *code_ptr++ = REX_W | REX_B;
-        *code_ptr++ = MOV_r_i32 + 1;
-        sljit_unaligned_store_sw(code_ptr, addr);
-        code_ptr += sizeof(sljit_sw);
-        *code_ptr++ = REX_B;
-        *code_ptr++ = GROUP_FF;
-        *code_ptr++ = (type == 2) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1);
-    }
-
-    return code_ptr;
-}
-
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
     sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
     sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
@@ -98,8 +76,6 @@
     CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
     set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);


-    compiler->flags_saved = 0;
-
     /* Including the return address saved by the call instruction. */
     saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);


@@ -189,7 +165,7 @@
             local_size -= 4 * sizeof(sljit_sw);
         }
         /* Second instruction */
-        SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] < 8, temporary_reg1_is_loreg);
+        SLJIT_ASSERT(reg_map[SLJIT_R0] < 8);
         *inst++ = REX_W;
         *inst++ = MOV_rm_i32;
         *inst++ = MOD_REG | reg_lmap[SLJIT_R0];
@@ -261,7 +237,6 @@
     CHECK_ERROR();
     CHECK(check_sljit_emit_return(compiler, op, src, srcw));


-    compiler->flags_saved = 0;
     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));


 #ifdef _WIN64
@@ -387,8 +362,7 @@
     if (b & SLJIT_MEM) {
         if (!(b & OFFS_REG_MASK)) {
             if (NOT_HALFWORD(immb)) {
-                if (emit_load_imm64(compiler, TMP_REG3, immb))
-                    return NULL;
+                PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG3, immb));
                 immb = 0;
                 if (b & REG_MASK)
                     b |= TO_OFFS_REG(TMP_REG3);
@@ -558,7 +532,7 @@
     sljit_u8 *inst;


 #ifndef _WIN64
-    SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers);
+    SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8);


     inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
     FAIL_IF(!inst);
@@ -572,7 +546,7 @@
     *inst++ = MOV_r_rm;
     *inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_R0];
 #else
-    SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers);
+    SLJIT_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8);


     inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
     FAIL_IF(!inst);


Modified: code/trunk/sljit/sljitNativeX86_common.c
===================================================================
--- code/trunk/sljit/sljitNativeX86_common.c    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/sljit/sljitNativeX86_common.c    2017-01-23 07:41:37 UTC (rev 1676)
@@ -1,7 +1,7 @@
 /*
  *    Stack-less Just-In-Time compiler
  *
- *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *    Copyright Zoltan Herczeg (hzmester@???). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are
  * permitted provided that the following conditions are met:
@@ -67,12 +67,12 @@
 #define TMP_REG1    (SLJIT_NUMBER_OF_REGISTERS + 2)


 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
-    0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5
+    0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5
 };


 #define CHECK_EXTRA_REGS(p, w, do) \
-    if (p >= SLJIT_R3 && p <= SLJIT_R6) { \
-        w = SLJIT_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \
+    if (p >= SLJIT_R3 && p <= SLJIT_R8) { \
+        w = SLJIT_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 6)) * sizeof(sljit_sw); \
         p = SLJIT_MEM1(SLJIT_SP); \
         do; \
     }
@@ -409,13 +409,13 @@
     return 0;
 }


+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset);
+#else
static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type);
-
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-static sljit_u8* generate_fixed_jump(sljit_u8 *code_ptr, sljit_sw addr, sljit_s32 type);
#endif

-static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type)
+static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type, sljit_sw executable_offset)
 {
     sljit_s32 short_jump;
     sljit_uw label_addr;
@@ -423,7 +423,8 @@
     if (jump->flags & JUMP_LABEL)
         label_addr = (sljit_uw)(code + jump->u.label->size);
     else
-        label_addr = jump->u.target;
+        label_addr = jump->u.target - executable_offset;
+
     short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;


 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@@ -476,6 +477,8 @@
     sljit_u8 *buf_ptr;
     sljit_u8 *buf_end;
     sljit_u8 len;
+    sljit_sw executable_offset;
+    sljit_sw jump_addr;


     struct sljit_label *label;
     struct sljit_jump *jump;
@@ -494,6 +497,8 @@
     label = compiler->labels;
     jump = compiler->jumps;
     const_ = compiler->consts;
+    executable_offset = SLJIT_EXEC_OFFSET(code);
+
     do {
         buf_ptr = buf->memory;
         buf_end = buf_ptr + buf->used_size;
@@ -506,35 +511,28 @@
                 buf_ptr += len;
             }
             else {
-                if (*buf_ptr >= 4) {
+                if (*buf_ptr >= 2) {
                     jump->addr = (sljit_uw)code_ptr;
                     if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
-                        code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
-                    else
-                        code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
+                        code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 2, executable_offset);
+                    else {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+                        code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2, executable_offset);
+#else
+                        code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2);
+#endif
+                    }
                     jump = jump->next;
                 }
                 else if (*buf_ptr == 0) {
-                    label->addr = (sljit_uw)code_ptr;
+                    label->addr = ((sljit_uw)code_ptr) + executable_offset;
                     label->size = code_ptr - code;
                     label = label->next;
                 }
-                else if (*buf_ptr == 1) {
+                else { /* *buf_ptr is 1 */
                     const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
                     const_ = const_->next;
                 }
-                else {
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-                    *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
-                    buf_ptr++;
-                    sljit_unaligned_store_sw(code_ptr, *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw)));
-                    code_ptr += sizeof(sljit_sw);
-                    buf_ptr += sizeof(sljit_sw) - 1;
-#else
-                    code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
-                    buf_ptr += sizeof(sljit_sw);
-#endif
-                }
                 buf_ptr++;
             }
         } while (buf_ptr < buf_end);
@@ -548,24 +546,26 @@


     jump = compiler->jumps;
     while (jump) {
+        jump_addr = jump->addr + executable_offset;
+
         if (jump->flags & PATCH_MB) {
-            SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8))) <= 127);
-            *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8)));
+            SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127);
+            *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8)));
         } else if (jump->flags & PATCH_MW) {
             if (jump->flags & JUMP_LABEL) {
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-                sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw))));
+                sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw))));
 #else
-                SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
-                sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))));
+                SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
+                sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))));
 #endif
             }
             else {
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-                sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw))));
+                sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw))));
 #else
-                SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
-                sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump->addr + sizeof(sljit_s32))));
+                SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
+                sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32))));
 #endif
             }
         }
@@ -577,12 +577,12 @@
         jump = jump->next;
     }


-    /* Maybe we waste some space because of short jumps. */
+    /* Some space may be wasted because of short jumps. */
     SLJIT_ASSERT(code_ptr <= code + compiler->size);
     compiler->error = SLJIT_ERR_COMPILED;
+    compiler->executable_offset = executable_offset;
     compiler->executable_size = code_ptr - code;
-    SLJIT_ENABLE_EXEC(code, code_ptr);
-    return (void*)code;
+    return (void*)(code + executable_offset);
 }


 /* --------------------------------------------------------------------- */
@@ -605,53 +605,6 @@
     sljit_s32 dst, sljit_sw dstw,
     sljit_s32 src, sljit_sw srcw);


-static SLJIT_INLINE sljit_s32 emit_save_flags(struct sljit_compiler *compiler)
-{
-    sljit_u8 *inst;
-
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-    inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
-    FAIL_IF(!inst);
-    INC_SIZE(5);
-#else
-    inst = (sljit_u8*)ensure_buf(compiler, 1 + 6);
-    FAIL_IF(!inst);
-    INC_SIZE(6);
-    *inst++ = REX_W;
-#endif
-    *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
-    *inst++ = 0x64;
-    *inst++ = 0x24;
-    *inst++ = (sljit_u8)sizeof(sljit_sw);
-    *inst++ = PUSHF;
-    compiler->flags_saved = 1;
-    return SLJIT_SUCCESS;
-}
-
-static SLJIT_INLINE sljit_s32 emit_restore_flags(struct sljit_compiler *compiler, sljit_s32 keep_flags)
-{
-    sljit_u8 *inst;
-
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-    inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
-    FAIL_IF(!inst);
-    INC_SIZE(5);
-    *inst++ = POPF;
-#else
-    inst = (sljit_u8*)ensure_buf(compiler, 1 + 6);
-    FAIL_IF(!inst);
-    INC_SIZE(6);
-    *inst++ = POPF;
-    *inst++ = REX_W;
-#endif
-    *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
-    *inst++ = 0x64;
-    *inst++ = 0x24;
-    *inst++ = (sljit_u8)(-(sljit_s8)sizeof(sljit_sw));
-    compiler->flags_saved = keep_flags;
-    return SLJIT_SUCCESS;
-}
-
 #ifdef _WIN32
 #include <malloc.h>


@@ -772,20 +725,17 @@
     case SLJIT_DIVMOD_SW:
     case SLJIT_DIV_UW:
     case SLJIT_DIV_SW:
-        compiler->flags_saved = 0;
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 #ifdef _WIN64
-        SLJIT_COMPILE_ASSERT(
+        SLJIT_ASSERT(
             reg_map[SLJIT_R0] == 0
             && reg_map[SLJIT_R1] == 2
-            && reg_map[TMP_REG1] > 7,
-            invalid_register_assignment_for_div_mul);
+            && reg_map[TMP_REG1] > 7);
 #else
-        SLJIT_COMPILE_ASSERT(
+        SLJIT_ASSERT(
             reg_map[SLJIT_R0] == 0
             && reg_map[SLJIT_R1] < 7
-            && reg_map[TMP_REG1] == 2,
-            invalid_register_assignment_for_div_mul);
+            && reg_map[TMP_REG1] == 2);
 #endif
         compiler->mode32 = op & SLJIT_I32_OP;
 #endif
@@ -1283,7 +1233,6 @@
     sljit_s32 dst, sljit_sw dstw,
     sljit_s32 src, sljit_sw srcw)
 {
-    sljit_u8* inst;
     sljit_s32 update = 0;
     sljit_s32 op_flags = GET_ALL_FLAGS(op);
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
@@ -1362,14 +1311,6 @@
 #endif
         }


-        if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) {
-            inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw);
-            FAIL_IF(!inst);
-            *inst = LEA_r_m;
-            src &= SLJIT_MEM | 0xf;
-            srcw = 0;
-        }
-
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
         if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
             SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
@@ -1413,31 +1354,40 @@
             return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
 #endif


-        if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) {
-            inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw);
-            FAIL_IF(!inst);
-            *inst = LEA_r_m;
+        if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK)) {
+            if ((src & OFFS_REG_MASK) != 0) {
+                FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
+                        (src & REG_MASK), 0, (src & REG_MASK), 0, OFFS_REG(dst), 0));
+            }
+            else if (srcw != 0) {
+                FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
+                        (src & REG_MASK), 0, (src & REG_MASK), 0, SLJIT_IMM, srcw));
+            }
         }
+
+        if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK)) {
+            if ((dst & OFFS_REG_MASK) != 0) {
+                FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
+                        (dst & REG_MASK), 0, (dst & REG_MASK), 0, OFFS_REG(dst), 0));
+            }
+            else if (dstw != 0) {
+                FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
+                        (dst & REG_MASK), 0, (dst & REG_MASK), 0, SLJIT_IMM, dstw));
+            }
+        }
         return SLJIT_SUCCESS;
     }


-    if (SLJIT_UNLIKELY(GET_FLAGS(op_flags)))
-        compiler->flags_saved = 0;
-
     switch (op) {
     case SLJIT_NOT:
-        if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E))
+        if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z))
             return emit_not_with_flags(compiler, dst, dstw, src, srcw);
         return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);


     case SLJIT_NEG:
-        if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
-            FAIL_IF(emit_save_flags(compiler));
         return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);


     case SLJIT_CLZ:
-        if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
-            FAIL_IF(emit_save_flags(compiler));
         return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
     }


@@ -1806,7 +1756,7 @@
     return SLJIT_SUCCESS;
 }


-static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler, sljit_s32 keep_flags,
+static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
     sljit_s32 dst, sljit_sw dstw,
     sljit_s32 src1, sljit_sw src1w,
     sljit_s32 src2, sljit_sw src2w)
@@ -1815,12 +1765,10 @@
     sljit_s32 dst_r, done = 0;


     /* These cases better be left to handled by normal way. */
-    if (!keep_flags) {
-        if (dst == src1 && dstw == src1w)
-            return SLJIT_ERR_UNSUPPORTED;
-        if (dst == src2 && dstw == src2w)
-            return SLJIT_ERR_UNSUPPORTED;
-    }
+    if (dst == src1 && dstw == src1w)
+        return SLJIT_ERR_UNSUPPORTED;
+    if (dst == src2 && dstw == src2w)
+        return SLJIT_ERR_UNSUPPORTED;


     dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;


@@ -2168,54 +2116,28 @@
     compiler->mode32 = op & SLJIT_I32_OP;
 #endif


-    if (GET_OPCODE(op) >= SLJIT_MUL) {
-        if (SLJIT_UNLIKELY(GET_FLAGS(op)))
-            compiler->flags_saved = 0;
-        else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
-            FAIL_IF(emit_save_flags(compiler));
-    }
-
     switch (GET_OPCODE(op)) {
     case SLJIT_ADD:
-        if (!GET_FLAGS(op)) {
-            if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
+        if (!HAS_FLAGS(op)) {
+            if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
                 return compiler->error;
         }
-        else
-            compiler->flags_saved = 0;
-        if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
-            FAIL_IF(emit_save_flags(compiler));
         return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
             dst, dstw, src1, src1w, src2, src2w);
     case SLJIT_ADDC:
-        if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
-            FAIL_IF(emit_restore_flags(compiler, 1));
-        else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
-            FAIL_IF(emit_save_flags(compiler));
-        if (SLJIT_UNLIKELY(GET_FLAGS(op)))
-            compiler->flags_saved = 0;
         return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
             dst, dstw, src1, src1w, src2, src2w);
     case SLJIT_SUB:
-        if (!GET_FLAGS(op)) {
-            if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
+        if (!HAS_FLAGS(op)) {
+            if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
                 return compiler->error;
         }
-        else
-            compiler->flags_saved = 0;
-        if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
-            FAIL_IF(emit_save_flags(compiler));
+
         if (dst == SLJIT_UNUSED)
             return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
         return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
             dst, dstw, src1, src1w, src2, src2w);
     case SLJIT_SUBC:
-        if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
-            FAIL_IF(emit_restore_flags(compiler, 1));
-        else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
-            FAIL_IF(emit_save_flags(compiler));
-        if (SLJIT_UNLIKELY(GET_FLAGS(op)))
-            compiler->flags_saved = 0;
         return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
             dst, dstw, src1, src1w, src2, src2w);
     case SLJIT_MUL:
@@ -2232,13 +2154,13 @@
         return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
             dst, dstw, src1, src1w, src2, src2w);
     case SLJIT_SHL:
-        return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op),
+        return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
             dst, dstw, src1, src1w, src2, src2w);
     case SLJIT_LSHR:
-        return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op),
+        return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
             dst, dstw, src1, src1w, src2, src2w);
     case SLJIT_ASHR:
-        return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op),
+        return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
             dst, dstw, src1, src1w, src2, src2w);
     }


@@ -2249,7 +2171,7 @@
 {
     CHECK_REG_INDEX(check_sljit_get_register_index(reg));
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-    if (reg >= SLJIT_R3 && reg <= SLJIT_R6)
+    if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
         return -1;
 #endif
     return reg_map[reg];
@@ -2407,7 +2329,6 @@
     sljit_s32 src1, sljit_sw src1w,
     sljit_s32 src2, sljit_sw src2w)
 {
-    compiler->flags_saved = 0;
     if (!FAST_IS_REG(src1)) {
         FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
         src1 = TMP_FREG;
@@ -2554,11 +2475,6 @@
     CHECK_ERROR_PTR();
     CHECK_PTR(check_sljit_emit_label(compiler));


-    /* We should restore the flags before the label,
-       since other taken jumps has their own flags as well. */
-    if (SLJIT_UNLIKELY(compiler->flags_saved))
-        PTR_FAIL_IF(emit_restore_flags(compiler, 0));
-
     if (compiler->last_label && compiler->last_label->size == compiler->size)
         return compiler->last_label;


@@ -2581,15 +2497,8 @@
     struct sljit_jump *jump;


     CHECK_ERROR_PTR();
-    CHECK_DYN_CODE_MOD(type & SLJIT_REWRITABLE_JUMP);
     CHECK_PTR(check_sljit_emit_jump(compiler, type));


-    if (SLJIT_UNLIKELY(compiler->flags_saved)) {
-        if ((type & 0xff) <= SLJIT_JUMP)
-            PTR_FAIL_IF(emit_restore_flags(compiler, 0));
-        compiler->flags_saved = 0;
-    }
-
     jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
     PTR_FAIL_IF_NULL(jump);
     set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
@@ -2609,7 +2518,7 @@
     PTR_FAIL_IF_NULL(inst);


     *inst++ = 0;
-    *inst++ = type + 4;
+    *inst++ = type + 2;
     return jump;
 }


@@ -2624,12 +2533,6 @@

     CHECK_EXTRA_REGS(src, srcw, (void)0);


-    if (SLJIT_UNLIKELY(compiler->flags_saved)) {
-        if (type <= SLJIT_JUMP)
-            FAIL_IF(emit_restore_flags(compiler, 0));
-        compiler->flags_saved = 0;
-    }
-
     if (type >= SLJIT_CALL1) {
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
@@ -2667,7 +2570,7 @@
         FAIL_IF_NULL(inst);


         *inst++ = 0;
-        *inst++ = type + 4;
+        *inst++ = type + 2;
     }
     else {
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@@ -2706,8 +2609,6 @@


     ADJUST_LOCAL_OFFSET(dst, dstw);
     CHECK_EXTRA_REGS(dst, dstw, (void)0);
-    if (SLJIT_UNLIKELY(compiler->flags_saved))
-        FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));


     type &= 0xff;
     /* setcc = jcc + 0x10. */
@@ -2811,7 +2712,8 @@
     }


     if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
-        SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax);
+        SLJIT_ASSERT(reg_map[SLJIT_R0] == 0);
+
         if (dst != SLJIT_R0) {
             inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
             FAIL_IF(!inst);
@@ -2888,16 +2790,16 @@
     if (NOT_HALFWORD(offset)) {
         FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
-        SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
+        SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
         return compiler->error;
 #else
-        return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
+        return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
 #endif
     }
 #endif


     if (offset != 0)
-        return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
+        return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
     return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
 }


@@ -2910,7 +2812,6 @@
#endif

     CHECK_ERROR_PTR();
-    CHECK_DYN_CODE_MOD(1);
     CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
     ADJUST_LOCAL_OFFSET(dst, dstw);


@@ -2949,17 +2850,19 @@
     return const_;
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
 {
+    SLJIT_UNUSED_ARG(executable_offset);
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-    sljit_unaligned_store_sw((void*)addr, new_addr - (addr + 4));
+    sljit_unaligned_store_sw((void*)addr, new_target - (addr + 4) - (sljit_uw)executable_offset);
 #else
-    sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_addr);
+    sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_target);
 #endif
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
 {
+    SLJIT_UNUSED_ARG(executable_offset);
     sljit_unaligned_store_sw((void*)addr, new_constant);
 }


@@ -2995,6 +2898,11 @@
     CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64);
     CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP));
     FUNCTION_CHECK_SRC(src, srcw);
+
+    if ((type & 0xff) <= SLJIT_NOT_ZERO)
+        CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z);
+    else
+        CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff));
 #endif
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose)) {


Modified: code/trunk/sljit/sljitProtExecAllocator.c
===================================================================
--- code/trunk/sljit/sljitProtExecAllocator.c    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/sljit/sljitProtExecAllocator.c    2017-01-23 07:41:37 UTC (rev 1676)
@@ -1,7 +1,7 @@
 /*
  *    Stack-less Just-In-Time compiler
  *
- *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *    Copyright Zoltan Herczeg (hzmester@???). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are
  * permitted provided that the following conditions are met:
@@ -25,89 +25,397 @@
  */


/*
- This file contains a simple executable memory allocator where the
- allocated regions are not writable and executable in the same time.
+ This file contains a simple executable memory allocator

-   This allocator usually uses more memory than sljitExecAllocator.
+   It is assumed, that executable code blocks are usually medium (or sometimes
+   large) memory blocks, and the allocator is not too frequently called (less
+   optimized than other allocators). Thus, using it as a generic allocator is
+   not suggested.
+
+   How does it work:
+     Memory is allocated in continuous memory areas called chunks by alloc_chunk()
+     Chunk format:
+     [ block ][ block ] ... [ block ][ block terminator ]
+
+   All blocks and the block terminator is started with block_header. The block
+   header contains the size of the previous and the next block. These sizes
+   can also contain special values.
+     Block size:
+       0 - The block is a free_block, with a different size member.
+       1 - The block is a block terminator.
+       n - The block is used at the moment, and the value contains its size.
+     Previous block size:
+       0 - This is the first block of the memory chunk.
+       n - The size of the previous block.
+
+   Using these size values we can go forward or backward on the block chain.
+   The unused blocks are stored in a chain list pointed by free_blocks. This
+   list is useful if we need to find a suitable memory area when the allocator
+   is called.
+
+   When a block is freed, the new free block is connected to its adjacent free
+   blocks if possible.
+
+     [ free block ][ used block ][ free block ]
+   and "used block" is freed, the three blocks are connected together:
+     [           one big free block           ]
 */


-#ifdef _WIN32
+/* --------------------------------------------------------------------- */
+/*  System (OS) functions                                                */
+/* --------------------------------------------------------------------- */


-static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
-{
-    return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
-}
+/* 64 KByte. */
+#define CHUNK_SIZE    0x10000


-static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size)
-{
-    SLJIT_UNUSED_ARG(size);
-    VirtualFree(chunk, 0, MEM_RELEASE);
-}
+struct chunk_header {
+    void *executable;
+    int fd;
+};


-static SLJIT_INLINE void enable_exec_permission(void* chunk, sljit_uw size)
-{
-    sljit_uw *uw_ptr = (sljit_uw *)ptr;
+/*
+   alloc_chunk / free_chunk :
+     * allocate executable system memory chunks
+     * the size is always divisible by CHUNK_SIZE
+   allocator_grab_lock / allocator_release_lock :
+     * make the allocator thread safe
+     * can be empty if the OS (or the application) does not support threading
+     * only the allocator requires this lock, sljit is fully thread safe
+       as it only uses local variables
+*/


-    VirtualProtect(chunk, size, PAGE_EXECUTE_READ, NULL);
-}
+#include <fcntl.h>


-#else
+#ifndef O_NOATIME
+#define O_NOATIME 0
+#endif

-static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
+#ifdef __O_TMPFILE
+#ifndef O_TMPFILE
+#define O_TMPFILE    (__O_TMPFILE | O_DIRECTORY)
+#endif
+#endif
+
+int mkostemp(char *template, int flags);
+char *secure_getenv(const char *name);
+
+static SLJIT_INLINE int create_tempfile(void)
 {
-    void* retval;
+    int fd;


-#ifdef MAP_ANON
-    retval = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+    char tmp_name[256];
+    size_t tmp_name_len;
+    char *dir;
+    size_t len;
+
+#ifdef P_tmpdir
+    len = (P_tmpdir != NULL) ? strlen(P_tmpdir) : 0;
+
+    if (len > 0 && len < sizeof(tmp_name)) {
+        strcpy(tmp_name, P_tmpdir);
+        tmp_name_len = len;
+    }
+    else {
+        strcpy(tmp_name, "/tmp");
+        tmp_name_len = 4;
+    }
 #else
-    if (dev_zero < 0) {
-        if (open_dev_zero())
-            return NULL;
+    strcpy(tmp_name, "/tmp");
+    tmp_name_len = 4;
+#endif
+
+    dir = secure_getenv("TMPDIR");
+    if (dir) {
+        len = strlen(dir);
+        if (len > 0 && len < sizeof(tmp_name)) {
+            strcpy(tmp_name, dir);
+            tmp_name_len = len;
+        }
     }
-    retval = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero, 0);
+
+    SLJIT_ASSERT(tmp_name_len > 0 && tmp_name_len < sizeof(tmp_name));
+
+    while (tmp_name_len > 0 && tmp_name[tmp_name_len - 1] == '/') {
+        tmp_name_len--;
+        tmp_name[tmp_name_len] = '\0';
+    }
+
+#ifdef O_TMPFILE
+    fd = open(tmp_name, O_TMPFILE | O_EXCL | O_RDWR | O_NOATIME | O_CLOEXEC, S_IRUSR | S_IWUSR);
+    if (fd != -1)
+        return fd;
 #endif


-    return (retval != MAP_FAILED) ? retval : NULL;
+    if (tmp_name_len + 7 >= sizeof(tmp_name))
+    {
+        return -1;
+    }
+
+    strcpy(tmp_name + tmp_name_len, "/XXXXXX");
+    fd = mkostemp(tmp_name, O_CLOEXEC | O_NOATIME);
+
+    if (fd == -1)
+        return fd;
+
+    if (unlink(tmp_name)) {
+        close(fd);
+        return -1;
+    }
+
+    return fd;
 }


-static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size)
+static SLJIT_INLINE struct chunk_header* alloc_chunk(sljit_uw size)
 {
-    munmap(chunk, size);
+    struct chunk_header *retval;
+    int fd;
+
+    fd = create_tempfile();
+    if (fd == -1)
+        return NULL;
+
+    if (ftruncate(fd, size)) {
+        close(fd);
+        return NULL;
+    }
+
+    retval = (struct chunk_header *)mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+    if (retval == MAP_FAILED) {
+        close(fd);
+        return NULL;
+    }
+
+    retval->executable = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
+
+    if (retval->executable == MAP_FAILED) {
+        munmap(retval, size);
+        close(fd);
+        return NULL;
+    }
+
+    retval->fd = fd;
+    return retval;
 }


-static SLJIT_INLINE void enable_exec_permission(void* chunk, sljit_uw size)
+static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
 {
-    sljit_uw *uw_ptr = (sljit_uw *)chunk;
+    struct chunk_header *header = ((struct chunk_header *)chunk) - 1;


-    mprotect(uw_ptr - 1, size + sizeof(sljit_uw), PROT_READ | PROT_EXEC);
+    int fd = header->fd;
+    munmap(header->executable, size);
+    munmap(header, size);
+    close(fd);
 }


-#endif
-
 /* --------------------------------------------------------------------- */
 /*  Common functions                                                     */
 /* --------------------------------------------------------------------- */


+#define CHUNK_MASK    (~(CHUNK_SIZE - 1))
+
+struct block_header {
+    sljit_uw size;
+    sljit_uw prev_size;
+    sljit_sw executable_offset;
+};
+
+struct free_block {
+    struct block_header header;
+    struct free_block *next;
+    struct free_block *prev;
+    sljit_uw size;
+};
+
+#define AS_BLOCK_HEADER(base, offset) \
+    ((struct block_header*)(((sljit_u8*)base) + offset))
+#define AS_FREE_BLOCK(base, offset) \
+    ((struct free_block*)(((sljit_u8*)base) + offset))
+#define MEM_START(base)        ((void*)((base) + 1))
+#define ALIGN_SIZE(size)    (((size) + sizeof(struct block_header) + 7) & ~7)
+
+static struct free_block* free_blocks;
+static sljit_uw allocated_size;
+static sljit_uw total_size;
+
+static SLJIT_INLINE void sljit_insert_free_block(struct free_block *free_block, sljit_uw size)
+{
+    free_block->header.size = 0;
+    free_block->size = size;
+
+    free_block->next = free_blocks;
+    free_block->prev = NULL;
+    if (free_blocks)
+        free_blocks->prev = free_block;
+    free_blocks = free_block;
+}
+
+static SLJIT_INLINE void sljit_remove_free_block(struct free_block *free_block)
+{
+    if (free_block->next)
+        free_block->next->prev = free_block->prev;
+
+    if (free_block->prev)
+        free_block->prev->next = free_block->next;
+    else {
+        SLJIT_ASSERT(free_blocks == free_block);
+        free_blocks = free_block->next;
+    }
+}
+
 SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size)
 {
-    sljit_uw *ptr = (sljit_uw *)alloc_chunk(size + sizeof (sljit_uw));
+    struct chunk_header *chunk_header;
+    struct block_header *header;
+    struct block_header *next_header;
+    struct free_block *free_block;
+    sljit_uw chunk_size;
+    sljit_sw executable_offset;


-    *ptr = size;
-    return (void*)(ptr + 1);
+    allocator_grab_lock();
+    if (size < (64 - sizeof(struct block_header)))
+        size = (64 - sizeof(struct block_header));
+    size = ALIGN_SIZE(size);
+
+    free_block = free_blocks;
+    while (free_block) {
+        if (free_block->size >= size) {
+            chunk_size = free_block->size;
+            if (chunk_size > size + 64) {
+                /* We just cut a block from the end of the free block. */
+                chunk_size -= size;
+                free_block->size = chunk_size;
+                header = AS_BLOCK_HEADER(free_block, chunk_size);
+                header->prev_size = chunk_size;
+                header->executable_offset = free_block->header.executable_offset;
+                AS_BLOCK_HEADER(header, size)->prev_size = size;
+            }
+            else {
+                sljit_remove_free_block(free_block);
+                header = (struct block_header*)free_block;
+                size = chunk_size;
+            }
+            allocated_size += size;
+            header->size = size;
+            allocator_release_lock();
+            return MEM_START(header);
+        }
+        free_block = free_block->next;
+    }
+
+    chunk_size = sizeof(struct chunk_header) + sizeof(struct block_header);
+    chunk_size = (chunk_size + size + CHUNK_SIZE - 1) & CHUNK_MASK;
+
+    chunk_header = alloc_chunk(chunk_size);
+    if (!chunk_header) {
+        allocator_release_lock();
+        return NULL;
+    }
+
+    executable_offset = (sljit_sw)((sljit_u8*)chunk_header->executable - (sljit_u8*)chunk_header);
+
+    chunk_size -= sizeof(struct chunk_header) + sizeof(struct block_header);
+    total_size += chunk_size;
+
+    header = (struct block_header *)(chunk_header + 1);
+
+    header->prev_size = 0;
+    header->executable_offset = executable_offset;
+    if (chunk_size > size + 64) {
+        /* Cut the allocated space into a free and a used block. */
+        allocated_size += size;
+        header->size = size;
+        chunk_size -= size;
+
+        free_block = AS_FREE_BLOCK(header, size);
+        free_block->header.prev_size = size;
+        free_block->header.executable_offset = executable_offset;
+        sljit_insert_free_block(free_block, chunk_size);
+        next_header = AS_BLOCK_HEADER(free_block, chunk_size);
+    }
+    else {
+        /* All space belongs to this allocation. */
+        allocated_size += chunk_size;
+        header->size = chunk_size;
+        next_header = AS_BLOCK_HEADER(header, chunk_size);
+    }
+    next_header->size = 1;
+    next_header->prev_size = chunk_size;
+    next_header->executable_offset = executable_offset;
+    allocator_release_lock();
+    return MEM_START(header);
 }


 SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr)
 {
-    sljit_uw *uw_ptr = (sljit_uw *)ptr;
+    struct block_header *header;
+    struct free_block* free_block;


-    free_chunk(uw_ptr - 1, uw_ptr[-1]);
+    allocator_grab_lock();
+    header = AS_BLOCK_HEADER(ptr, -(sljit_sw)sizeof(struct block_header));
+    header = AS_BLOCK_HEADER(header, -header->executable_offset);
+    allocated_size -= header->size;
+
+    /* Connecting free blocks together if possible. */
+
+    /* If header->prev_size == 0, free_block will equal to header.
+       In this case, free_block->header.size will be > 0. */
+    free_block = AS_FREE_BLOCK(header, -(sljit_sw)header->prev_size);
+    if (SLJIT_UNLIKELY(!free_block->header.size)) {
+        free_block->size += header->size;
+        header = AS_BLOCK_HEADER(free_block, free_block->size);
+        header->prev_size = free_block->size;
+    }
+    else {
+        free_block = (struct free_block*)header;
+        sljit_insert_free_block(free_block, header->size);
+    }
+
+    header = AS_BLOCK_HEADER(free_block, free_block->size);
+    if (SLJIT_UNLIKELY(!header->size)) {
+        free_block->size += ((struct free_block*)header)->size;
+        sljit_remove_free_block((struct free_block*)header);
+        header = AS_BLOCK_HEADER(free_block, free_block->size);
+        header->prev_size = free_block->size;
+    }
+
+    /* The whole chunk is free. */
+    if (SLJIT_UNLIKELY(!free_block->header.prev_size && header->size == 1)) {
+        /* If this block is freed, we still have (allocated_size / 2) free space. */
+        if (total_size - free_block->size > (allocated_size * 3 / 2)) {
+            total_size -= free_block->size;
+            sljit_remove_free_block(free_block);
+            free_chunk(free_block, free_block->size + sizeof(struct block_header));
+        }
+    }
+
+    allocator_release_lock();
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_enable_exec(void* from, void *to)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void)
 {
-    enable_exec_permission(from, ((sljit_u8 *)to) - ((sljit_u8 *)from));
+    struct free_block* free_block;
+    struct free_block* next_free_block;
+
+    allocator_grab_lock();
+
+    free_block = free_blocks;
+    while (free_block) {
+        next_free_block = free_block->next;
+        if (!free_block->header.prev_size && 
+                AS_BLOCK_HEADER(free_block, free_block->size)->size == 1) {
+            total_size -= free_block->size;
+            sljit_remove_free_block(free_block);
+            free_chunk(free_block, free_block->size + sizeof(struct block_header));
+        }
+        free_block = next_free_block;
+    }
+
+    SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks));
+    allocator_release_lock();
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void)
+SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr)
 {
+    return ((struct block_header *)(ptr))[-1].executable_offset;
 }


Modified: code/trunk/sljit/sljitUtils.c
===================================================================
--- code/trunk/sljit/sljitUtils.c    2017-01-11 16:05:25 UTC (rev 1675)
+++ code/trunk/sljit/sljitUtils.c    2017-01-23 07:41:37 UTC (rev 1676)
@@ -1,7 +1,7 @@
 /*
  *    Stack-less Just-In-Time compiler
  *
- *    Copyright 2009-2012 Zoltan Herczeg (hzmester@???). All rights reserved.
+ *    Copyright Zoltan Herczeg (hzmester@???). All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are
  * permitted provided that the following conditions are met: