[Pcre-svn] [1491] code/trunk: Major JIT compiler update.

トップ ページ
このメッセージを削除
著者: Subversion repository
日付:  
To: pcre-svn
題目: [Pcre-svn] [1491] code/trunk: Major JIT compiler update.
Revision: 1491
          http://vcs.pcre.org/viewvc?view=rev&revision=1491
Author:   zherczeg
Date:     2014-07-07 08:11:16 +0100 (Mon, 07 Jul 2014)


Log Message:
-----------
Major JIT compiler update.

Modified Paths:
--------------
    code/trunk/pcre_jit_compile.c
    code/trunk/sljit/sljitConfigInternal.h
    code/trunk/sljit/sljitLir.c
    code/trunk/sljit/sljitLir.h
    code/trunk/sljit/sljitNativeARM_32.c
    code/trunk/sljit/sljitNativeARM_64.c
    code/trunk/sljit/sljitNativeARM_T2_32.c
    code/trunk/sljit/sljitNativeMIPS_32.c
    code/trunk/sljit/sljitNativeMIPS_64.c
    code/trunk/sljit/sljitNativeMIPS_common.c
    code/trunk/sljit/sljitNativePPC_common.c
    code/trunk/sljit/sljitNativeSPARC_32.c
    code/trunk/sljit/sljitNativeSPARC_common.c
    code/trunk/sljit/sljitNativeTILEGX_64.c
    code/trunk/sljit/sljitNativeX86_32.c
    code/trunk/sljit/sljitNativeX86_64.c
    code/trunk/sljit/sljitNativeX86_common.c


Modified: code/trunk/pcre_jit_compile.c
===================================================================
--- code/trunk/pcre_jit_compile.c    2014-06-19 07:51:39 UTC (rev 1490)
+++ code/trunk/pcre_jit_compile.c    2014-07-07 07:11:16 UTC (rev 1491)
@@ -475,16 +475,16 @@
 /* Used for accessing the elements of the stack. */
 #define STACK(i)      ((-(i) - 1) * (int)sizeof(sljit_sw))


-#define TMP1          SLJIT_SCRATCH_REG1
-#define TMP2          SLJIT_SCRATCH_REG3
-#define TMP3          SLJIT_SCRATCH_EREG2
-#define STR_PTR       SLJIT_SAVED_REG1
-#define STR_END       SLJIT_SAVED_REG2
-#define STACK_TOP     SLJIT_SCRATCH_REG2
-#define STACK_LIMIT   SLJIT_SAVED_REG3
-#define ARGUMENTS     SLJIT_SAVED_EREG1
-#define COUNT_MATCH   SLJIT_SAVED_EREG2
-#define RETURN_ADDR   SLJIT_SCRATCH_EREG1
+#define TMP1          SLJIT_R0
+#define TMP2          SLJIT_R2
+#define TMP3          SLJIT_R3
+#define STR_PTR       SLJIT_S0
+#define STR_END       SLJIT_S1
+#define STACK_TOP     SLJIT_R1
+#define STACK_LIMIT   SLJIT_S2
+#define COUNT_MATCH   SLJIT_S3
+#define ARGUMENTS     SLJIT_S4
+#define RETURN_ADDR   SLJIT_R4


 /* Local space layout. */
 /* These two locals can be used by the current opcode. */
@@ -1441,7 +1441,7 @@
     SLJIT_ASSERT(common->has_set_som);
     if (!setsom_found)
       {
-      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
       stackpos += (int)sizeof(sljit_sw);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
@@ -1457,7 +1457,7 @@
     SLJIT_ASSERT(common->mark_ptr != 0);
     if (!setmark_found)
       {
-      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
       stackpos += (int)sizeof(sljit_sw);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
@@ -1470,7 +1470,7 @@
     case OP_RECURSE:
     if (common->has_set_som && !setsom_found)
       {
-      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
       stackpos += (int)sizeof(sljit_sw);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
@@ -1479,7 +1479,7 @@
       }
     if (common->mark_ptr != 0 && !setmark_found)
       {
-      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
       stackpos += (int)sizeof(sljit_sw);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
@@ -1488,7 +1488,7 @@
       }
     if (common->capture_last_ptr != 0 && !capture_last_found)
       {
-      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
       stackpos += (int)sizeof(sljit_sw);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
@@ -1504,7 +1504,7 @@
     case OP_SCBRAPOS:
     if (common->capture_last_ptr != 0 && !capture_last_found)
       {
-      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
       stackpos += (int)sizeof(sljit_sw);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
@@ -1514,8 +1514,8 @@
     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
     stackpos += (int)sizeof(sljit_sw);
-    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
-    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
     stackpos += (int)sizeof(sljit_sw);
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
@@ -1895,7 +1895,7 @@
           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
           stackptr += sizeof(sljit_sw);
           }
-        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
+        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
         tmp1empty = FALSE;
         tmp1next = FALSE;
         }
@@ -1906,7 +1906,7 @@
           OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
           stackptr += sizeof(sljit_sw);
           }
-        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
+        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
         tmp2empty = FALSE;
         tmp1next = TRUE;
         }
@@ -1916,7 +1916,7 @@
       if (tmp1next)
         {
         SLJIT_ASSERT(!tmp1empty);
-        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
         tmp1empty = stackptr >= stacktop;
         if (!tmp1empty)
           {
@@ -1928,7 +1928,7 @@
       else
         {
         SLJIT_ASSERT(!tmp2empty);
-        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
         tmp2empty = stackptr >= stacktop;
         if (!tmp2empty)
           {
@@ -2102,8 +2102,8 @@
 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
 #endif
 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
 }
@@ -2123,19 +2123,19 @@
 /* At this point we can freely use all temporary registers. */
 SLJIT_ASSERT(length > 1);
 /* TMP1 returns with begin - 1. */
-OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
+OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
 if (length < 8)
   {
   for (i = 1; i < length; i++)
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
   }
 else
   {
-  GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
-  OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
+  GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
+  OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
   loop = LABEL();
-  OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
-  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
+  OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
   JUMPTO(SLJIT_C_NOT_ZERO, loop);
   }
 }
@@ -2149,11 +2149,11 @@
 SLJIT_ASSERT(length > 1);
 /* OVECTOR(1) contains the "string begin - 1" constant. */
 if (length > 2)
-  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
 if (length < 8)
   {
   for (i = 2; i < length; i++)
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
   }
 else
   {
@@ -2167,11 +2167,11 @@


OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
if (common->mark_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
if (common->control_head_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
}

@@ -2205,44 +2205,44 @@
struct sljit_jump *early_quit;

/* At this point we can freely use all registers. */
-OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
+OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);

-OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
+OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
if (common->mark_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
-OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
+ OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
+OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
if (common->mark_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
-OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
-OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
-GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
+OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
+OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
+GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
/* Unlikely, but possible */
-early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
+early_quit = CMP(SLJIT_C_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
loop = LABEL();
-OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
-OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
+OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
+OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
/* Copy the integer value to the output buffer */
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
-OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
+OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
-OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
-OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
+OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
+OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
JUMPTO(SLJIT_C_NOT_ZERO, loop);
JUMPHERE(early_quit);

/* Calculate the return value, which is the maximum ovector value. */
if (topbracket > 1)
{
- GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
- OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
+ GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);

- /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
+ /* OVECTOR(0) is never equal to SLJIT_S2. */
loop = LABEL();
- OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
- OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
- CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
- OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
+ OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
+ OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
+ CMPTO(SLJIT_C_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
+ OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
}
else
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
@@ -2253,39 +2253,39 @@
DEFINE_COMPILER;
struct sljit_jump *jump;

-SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
+SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
&& (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));

-OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
+OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
-OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
-CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
+OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
+CMPTO(SLJIT_C_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);

/* Store match begin and end. */
-OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
-OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
+OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
+OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));

-jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
-OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
+jump = CMP(SLJIT_C_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
+OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
-OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
+OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
-OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
+OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
JUMPHERE(jump);

-OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
-OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
+OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
+OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
-OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
+OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
-OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
+OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);

-OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
+OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
-OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
+OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
-OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
+OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);

JUMPTO(SLJIT_JUMP, quit);
}
@@ -2299,17 +2299,17 @@
if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
{
/* The value of -1 must be kept for start_used_ptr! */
- OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
+ OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
/* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
JUMPHERE(jump);
}
else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
{
- jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
+ jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
JUMPHERE(jump);
}
}
@@ -2445,12 +2445,12 @@
return;

if (!force)
- jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
+ jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
- jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
+ jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);

 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
-  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
 else
   {
   if (common->partialmatchlabel != NULL)
@@ -2478,13 +2478,13 @@
 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
   {
-  add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
-  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
+  add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
   add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
   }
 else
   {
-  add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
+  add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
   if (common->partialmatchlabel != NULL)
     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
   else
@@ -2506,10 +2506,10 @@


 /* Partial matching mode. */
 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
-add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
+add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
   {
-  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
   add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
   }
 else
@@ -3065,19 +3065,19 @@
     CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
     CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
     JUMPHERE(end);
-    OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+    OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
     }
   else
     {
     end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
     mainloop = LABEL();
     /* Continual stores does not cause data dependency. */
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
     read_char_range(common, common->nlmin, common->nlmax, TRUE);
     check_newlinechar(common, common->nltype, &newline, TRUE);
     CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
     JUMPHERE(end);
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
     set_jumps(newline, LABEL());
     }


@@ -3680,7 +3680,7 @@
 if (firstline)
   {
   SLJIT_ASSERT(common->first_line_end != 0);
-  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
   OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
   quit = CMP(SLJIT_C_LESS_EQUAL, STR_END, 0, TMP1, 0);
@@ -3751,7 +3751,7 @@
 if (firstline)
   {
   if (range_right >= 0)
-    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
   OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
   if (range_right >= 0)
     {
@@ -3780,7 +3780,7 @@
   {
   SLJIT_ASSERT(common->first_line_end != 0);
   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
-  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
+  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
   }


start = LABEL();
@@ -3840,7 +3840,7 @@
{
SLJIT_ASSERT(common->first_line_end != 0);
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
- OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
+ OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
}

if (common->nltype == NLTYPE_FIXED && common->newline > 255)
@@ -3930,7 +3930,7 @@
{
SLJIT_ASSERT(common->first_line_end != 0);
OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
- OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
+ OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
}

start = LABEL();
@@ -4004,7 +4004,7 @@
pcre_uint32 oc, bit;

SLJIT_ASSERT(common->req_char_ptr != 0);
-OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
@@ -4049,7 +4049,7 @@
JUMPHERE(found);
if (foundoc)
JUMPHERE(foundoc);
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
JUMPHERE(alreadyfound);
JUMPHERE(toolong);
return notfound;
@@ -4101,11 +4101,11 @@

SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);

-sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
+sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
/* Get type of the previous char, and put it to LOCALS1. */
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
skip_char_back(common);
check_start_used_ptr(common);
@@ -4125,7 +4125,7 @@
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
JUMPHERE(jump);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
}
else
#endif
@@ -4141,7 +4141,7 @@
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
#ifndef COMPILE_PCRE8
JUMPHERE(jump);
#elif defined SUPPORT_UTF
@@ -4195,8 +4195,8 @@
}
set_jumps(skipread_list, LABEL());

-OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
-sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
+OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
+sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
}

static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
@@ -4454,7 +4454,7 @@
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));

@@ -4468,7 +4468,7 @@
JUMPHERE(jump);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
-OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
+OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
}

@@ -4484,8 +4484,8 @@
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);

OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
@@ -4512,8 +4512,8 @@
JUMPHERE(jump);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
-OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
-OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
+OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
}

@@ -5418,7 +5418,7 @@
add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
/* Optimize register allocation: use a real register. */
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);

label = LABEL();
@@ -5438,7 +5438,7 @@

OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
JUMPHERE(jump[0]);
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
+ OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);

   if (common->mode == JIT_PARTIAL_HARD_COMPILE)
     {
@@ -5504,12 +5504,12 @@
       }
     else
       {
-      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
       read_char_range(common, common->nlmin, common->nlmax, TRUE);
       add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
       add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
       add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
-      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
+      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
       }
     JUMPHERE(jump[2]);
     JUMPHERE(jump[3]);
@@ -5913,21 +5913,21 @@


SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);

-OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));

count--;
while (count-- > 0)
{
offset = GET2(slot, 0) << 1;
GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
- add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
+ add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
slot += common->name_entry_size;
}

offset = GET2(slot, 0) << 1;
GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
if (backtracks != NULL && !common->jscript_compat)
- add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
+ add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));

 set_jumps(found, LABEL());
 }
@@ -5944,10 +5944,10 @@
 if (ref)
   {
   offset = GET2(cc, 1) << 1;
-  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
   /* OVECTOR(1) contains the "string begin - 1" constant. */
   if (withchecks && !common->jscript_compat)
-    add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
+    add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
   }
 else
   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
@@ -5955,9 +5955,9 @@
 #if defined SUPPORT_UTF && defined SUPPORT_UCP
 if (common->utf && *cc == OP_REFI)
   {
-  SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
+  SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
   if (ref)
-    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
   else
     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));


@@ -5965,11 +5965,11 @@
     jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);


   /* Needed to save important temporary registers. */
-  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
-  OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
-  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
+  OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
   sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
-  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
+  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
   if (common->mode == JIT_COMPILE)
     add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
   else
@@ -5986,7 +5986,7 @@
 #endif /* SUPPORT_UTF && SUPPORT_UCP */
   {
   if (ref)
-    OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
+    OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
   else
     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);


@@ -6089,7 +6089,7 @@
     {
     allocate_stack(common, 2);
     if (ref)
-      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
     /* Temporary release of STR_PTR. */
@@ -6097,12 +6097,12 @@
     /* Handles both invalid and empty cases. Since the minimum repeat,
     is zero the invalid case is basically the same as an empty case. */
     if (ref)
-      zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
+      zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
     else
       {
       compile_dnref_search(common, ccbegin, NULL);
       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
-      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
       zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
       }
     /* Restore if not zero length. */
@@ -6112,35 +6112,35 @@
     {
     allocate_stack(common, 1);
     if (ref)
-      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
     if (ref)
       {
-      add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
-      zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
+      add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
+      zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
       }
     else
       {
       compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
-      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
       zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
       }
     }


   if (min > 1 || max > 1)
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);


   label = LABEL();
   if (!ref)
-    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
   compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);


   if (min > 1 || max > 1)
     {
-    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
     if (min > 1)
       CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
     if (max > 1)
@@ -6170,7 +6170,7 @@


 allocate_stack(common, ref ? 2 : 3);
 if (ref)
-  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
 if (type != OP_CRMINSTAR)
   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
@@ -6180,7 +6180,7 @@
   /* Handles both invalid and empty cases. Since the minimum repeat,
   is zero the invalid case is basically the same as an empty case. */
   if (ref)
-    zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
+    zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
   else
     {
     compile_dnref_search(common, ccbegin, NULL);
@@ -6196,8 +6196,8 @@
   {
   if (ref)
     {
-    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
-    zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
+    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
+    zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
     }
   else
     {
@@ -6282,15 +6282,15 @@


if (common->has_set_som && common->mark_ptr != 0)
{
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
allocate_stack(common, 2);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
}
else if (common->has_set_som || common->mark_ptr != 0)
{
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
allocate_stack(common, 1);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
}
@@ -6364,14 +6364,14 @@

allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));

-OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
SLJIT_ASSERT(common->capture_last_ptr != 0);
OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);

/* These pointer sized fields temporarly stores internal variables. */
-OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);

@@ -6382,12 +6382,12 @@
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);

/* Needed to save important temporary registers. */
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
-OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
-GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
+OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
+GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
-OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
+OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));

 /* Check return value. */
@@ -6461,14 +6461,14 @@
   {
   extrasize = needs_control_head ? 2 : 1;
   if (framesize == no_frame)
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
   allocate_stack(common, extrasize);
   if (needs_control_head)
-    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
   if (needs_control_head)
     {
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
     }
   }
@@ -6476,17 +6476,17 @@
   {
   extrasize = needs_control_head ? 3 : 2;
   allocate_stack(common, framesize + extrasize);
-  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
   OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
-  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
   if (needs_control_head)
-    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
   if (needs_control_head)
     {
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
     }
   else
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
@@ -6541,26 +6541,26 @@
   if (framesize < 0)
     {
     if (framesize == no_frame)
-      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
     else
       free_stack(common, extrasize);
     if (needs_control_head)
-      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
     }
   else
     {
     if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
       {
       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
-      OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
+      OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
       if (needs_control_head)
-        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
       }
     else
       {
-      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
       if (needs_control_head)
-        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
       }
     }
@@ -6578,7 +6578,7 @@
         {
         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
-        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
         }
       OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
@@ -6586,7 +6586,7 @@
     else if (framesize >= 0)
       {
       /* For OP_BRA and OP_BRAMINZERO. */
-      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
       }
     }
   add_jump(compiler, found, JUMP(SLJIT_JUMP));
@@ -6630,10 +6630,10 @@
   set_jumps(common->positive_assert_quit, LABEL());
   SLJIT_ASSERT(framesize != no_stack);
   if (framesize < 0)
-    OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
+    OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
   else
     {
-    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
     }
@@ -6641,7 +6641,7 @@
   }


if (needs_control_head)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));

 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
   {
@@ -6672,7 +6672,7 @@
       }
     else
       free_stack(common, framesize + extrasize);
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
     }
   jump = JUMP(SLJIT_JUMP);
   if (bra != OP_BRAZERO)
@@ -6702,13 +6702,13 @@
     if (bra == OP_BRA)
       {
       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
-      OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
+      OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
       }
     else
       {
       /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
-      OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
+      OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
       if (extrasize == 2)
         {
         OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
@@ -6734,9 +6734,9 @@
     JUMPHERE(brajump);
     if (framesize >= 0)
       {
-      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
-      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
       }
     set_jumps(backtrack->common.topbacktracks, LABEL());
     }
@@ -6768,7 +6768,7 @@
       }
     else
       free_stack(common, framesize + extrasize);
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
     }


   if (bra == OP_BRAZERO)
@@ -6809,7 +6809,7 @@
 if (framesize < 0)
   {
   if (framesize == no_frame)
-    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
   else
     {
     stacksize = needs_control_head ? 1 : 0;
@@ -6827,13 +6827,13 @@
   else if (ket == OP_KETRMIN)
     {
     /* Move the STR_PTR to the private_data_ptr. */
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
     }
   }
 else
   {
   stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
-  OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
+  OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
   if (needs_control_head)
     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);


@@ -6844,7 +6844,7 @@
     }
   }
 if (needs_control_head)
-  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
 }


static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
@@ -6853,20 +6853,20 @@

 if (common->capture_last_ptr != 0)
   {
-  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
-  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
   stacksize++;
   }
 if (common->optimized_cbracket[offset >> 1] == 0)
   {
-  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
-  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
-  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
-  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
-  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
   stacksize += 2;
   }
 return stacksize;
@@ -7065,12 +7065,12 @@
       if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
         {
         /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
-        braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+        braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
         }
       else
         {
         /* Except when the whole stack frame must be saved. */
-        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
         braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
         }
       JUMPHERE(skip);
@@ -7086,7 +7086,7 @@


 if (repeat_type != 0)
   {
-  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, repeat_count);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
   if (repeat_type == OP_EXACT)
     rmax_label = LABEL();
   }
@@ -7107,7 +7107,7 @@
   stacksize = 0;
   if (needs_control_head)
     {
-    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
     stacksize++;
     }


@@ -7118,12 +7118,12 @@
       {
       stacksize += 2;
       if (!needs_control_head)
-        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
       }
     else
       {
       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
-        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
       if (ket == OP_KETRMAX || has_alternatives)
         stacksize++;
       }
@@ -7141,10 +7141,10 @@
     if (ket == OP_KETRMIN)
       {
       if (needs_control_head)
-        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
       if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
-        OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
+        OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
       }
     else if (ket == OP_KETRMAX || has_alternatives)
@@ -7161,20 +7161,20 @@
     if (needs_control_head)
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);


-    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
     OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));


     stacksize = needs_control_head ? 1 : 0;
     if (ket != OP_KET || has_alternatives)
       {
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
-      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
       stacksize++;
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
       }
     else
       {
-      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
       }
     init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
@@ -7187,26 +7187,26 @@
     {
     SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
     allocate_stack(common, 2);
-    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
-    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
     }
   else
     {
-    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
     allocate_stack(common, 1);
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
     }
   }
 else if (opcode == OP_SBRA || opcode == OP_SCOND)
   {
   /* Saving the previous value. */
-  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
   allocate_stack(common, 1);
-  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
   }
 else if (has_alternatives)
@@ -7223,7 +7223,7 @@
     {
     SLJIT_ASSERT(has_alternatives);
     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
-      CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
+      CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
     matchingpath += 1 + IMM2_SIZE;
     }
   else if (*matchingpath == OP_DNCREF)
@@ -7233,13 +7233,13 @@
     i = GET2(matchingpath, 1 + IMM2_SIZE);
     slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
     OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
-    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
-    OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
+    OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
     slot += common->name_entry_size;
     i--;
     while (i-- > 0)
       {
-      OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
+      OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
       OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0);
       slot += common->name_entry_size;
       }
@@ -7327,7 +7327,7 @@
 if (repeat_type == OP_MINUPTO)
   {
   /* We need to preserve the counter. TMP2 will be used below. */
-  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
   stacksize++;
   }
 if (ket != OP_KET || bra != OP_BRA)
@@ -7377,7 +7377,7 @@
 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
   {
   SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
-  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
   }


 if (ket == OP_KETRMAX)
@@ -7386,7 +7386,7 @@
     {
     if (has_alternatives)
       BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
-    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
+    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
     JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
     /* Drop STR_PTR for greedy plus quantifier. */
     if (opcode != OP_ONCE)
@@ -7399,7 +7399,7 @@
     /* Checking zero-length iteration. */
     if (opcode != OP_ONCE)
       {
-      CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmax_label);
+      CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
       /* Drop STR_PTR for greedy plus quantifier. */
       if (bra != OP_BRAZERO)
         free_stack(common, 1);
@@ -7416,13 +7416,13 @@
 if (repeat_type == OP_EXACT)
   {
   count_match(common);
-  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
+  OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
   JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
   }
 else if (repeat_type == OP_UPTO)
   {
   /* We need to preserve the counter. */
-  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
   allocate_stack(common, 1);
   OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
   }
@@ -7442,7 +7442,7 @@
     framesize is < 0, OP_ONCE will do the release itself. */
     if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
       {
-      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
       }
     else if (ket == OP_KETRMIN && opcode != OP_ONCE)
@@ -7537,20 +7537,20 @@
   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
   allocate_stack(common, stacksize);
   if (framesize == no_frame)
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);


   stack = 0;
   if (offset != 0)
     {
     stack = 2;
-    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
-    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
     if (common->capture_last_ptr != 0)
-      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
     if (needs_control_head)
-      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
     if (common->capture_last_ptr != 0)
       {
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
@@ -7560,7 +7560,7 @@
   else
     {
     if (needs_control_head)
-      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
     stack = 1;
     }
@@ -7587,10 +7587,10 @@
   BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;


   allocate_stack(common, stacksize);
-  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
   if (needs_control_head)
-    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
-  OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
+  OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));


stack = 0;
if (!zero)
@@ -7614,7 +7614,7 @@
}

if (offset != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);

 loop = LABEL();
 while (*cc != OP_KETRPOS)
@@ -7630,16 +7630,16 @@
   if (framesize < 0)
     {
     if (framesize == no_frame)
-      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);


     if (offset != 0)
       {
-      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
-      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
-      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
       if (common->capture_last_ptr != 0)
-        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
-      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
       }
     else
       {
@@ -7658,17 +7658,17 @@
     {
     if (offset != 0)
       {
-      OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
-      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
-      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
-      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
+      OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
       if (common->capture_last_ptr != 0)
-        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
-      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
+        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
       }
     else
       {
-      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
       OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
       if (opcode == OP_SBRAPOS)
         OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
@@ -7688,7 +7688,7 @@
     }


   if (needs_control_head)
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));


   JUMPTO(SLJIT_JUMP, loop);
   flush_stubs(common);
@@ -7701,7 +7701,7 @@
   if (framesize < 0)
     {
     if (offset != 0)
-      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
+      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
     else
       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
     }
@@ -7711,12 +7711,12 @@
       {
       /* Last alternative. */
       if (*cc == OP_KETRPOS)
-        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
-      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
+        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
       }
     else
       {
-      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
       OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
       }
     }
@@ -7853,7 +7853,7 @@
 struct sljit_jump *jump = NULL;
 struct sljit_label *label;
 int private_data_ptr = PRIVATE_DATA(cc);
-int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
+int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
 int tmp_base, tmp_offset;
@@ -7896,7 +7896,7 @@
   case OP_XCLASS:
   case OP_NOTPROP:
   case OP_PROP:
-  tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
+  tmp_base = SLJIT_MEM1(SLJIT_SP);
   tmp_offset = POSSESSIVE0;
   break;
   }
@@ -7923,19 +7923,19 @@
       }


     if (opcode == OP_UPTO || opcode == OP_CRRANGE)
-      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);


     label = LABEL();
     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
     if (opcode == OP_UPTO || opcode == OP_CRRANGE)
       {
-      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
       OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
       if (opcode == OP_CRRANGE && min > 0)
         CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
       if (opcode == OP_UPTO || (opcode == OP_CRRANGE && max > 0))
         jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
-      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
       }


     /* We cannot use TMP3 because of this allocate_stack. */
@@ -8026,7 +8026,7 @@
   if (opcode == OP_POSPLUS)
     compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
   if (opcode == OP_POSUPTO)
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, max);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max);
   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
   label = LABEL();
   compile_char1_matchingpath(common, type, cc, &nomatch);
@@ -8035,7 +8035,7 @@
     JUMPTO(SLJIT_JUMP, label);
   else
     {
-    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
+    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, 1);
     JUMPTO(SLJIT_C_NOT_ZERO, label);
     }
   set_jumps(nomatch, LABEL());
@@ -8061,7 +8061,7 @@
   if (max != 0)
     {
     SLJIT_ASSERT(max - min > 0);
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, max - min);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max - min);
     }
   OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
   label = LABEL();
@@ -8071,7 +8071,7 @@
     JUMPTO(SLJIT_JUMP, label);
   else
     {
-    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
+    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, 1);
     JUMPTO(SLJIT_C_NOT_ZERO, label);
     }
   set_jumps(nomatch, LABEL());
@@ -8111,9 +8111,9 @@
   }


if (common->accept_label == NULL)
- add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
+ add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
else
- CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->accept_label);
+ CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
@@ -8142,11 +8142,11 @@
return cc + 1 + IMM2_SIZE;

if (!optimized_cbracket)
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
offset <<= 1;
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
if (!optimized_cbracket)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
return cc + 1 + IMM2_SIZE;
}

@@ -8173,7 +8173,7 @@
{
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
}

@@ -8198,12 +8198,12 @@
size = BACKTRACK_AS(then_trap_backtrack)->framesize;
size = 3 + (size < 0 ? 0 : size);

-OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
allocate_stack(common, size);
if (size > 3)
- OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
+ OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
else
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
@@ -8270,9 +8270,9 @@

     case OP_SET_SOM:
     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
-    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
     allocate_stack(common, 1);
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
     cc++;
     break;
@@ -8460,17 +8460,17 @@
     case OP_MARK:
     PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
     SLJIT_ASSERT(common->mark_ptr != 0);
-    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
     allocate_stack(common, common->has_skip_arg ? 5 : 1);
     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
     OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
     if (common->has_skip_arg)
       {
-      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
-      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0);
+      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
@@ -8548,7 +8548,7 @@
 struct sljit_jump *jump = NULL;
 jump_list *jumplist = NULL;
 int private_data_ptr = PRIVATE_DATA(cc);
-int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
+int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);


@@ -8721,14 +8721,14 @@
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
free_stack(common, 2);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
}
else if (common->has_set_som || common->mark_ptr != 0)
{
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
free_stack(common, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
}
}

@@ -8780,9 +8780,9 @@

if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
{
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->private_data_ptr);
+ OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw));

   set_jumps(current->topbacktracks, LABEL());
   }
@@ -8870,9 +8870,9 @@
   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
   free_stack(common, 1);
   if (repeat_type == OP_UPTO)
-    OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
+    OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
   else
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
   }


 if (ket == OP_KETRMAX)
@@ -8901,10 +8901,10 @@
       {
       /* Checking zero-length iteration. */
       if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
-        CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
+        CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
       else
         {
-        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
         CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
         }
       /* Drop STR_PTR for non-greedy plus quantifier. */
@@ -8916,7 +8916,7 @@
     }
   rmin_label = LABEL();
   if (repeat_type != 0)
-    OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
+    OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
   }
 else if (bra == OP_BRAZERO)
   {
@@ -8926,7 +8926,7 @@
   }
 else if (repeat_type == OP_EXACT)
   {
-  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
   exact_label = LABEL();
   }


@@ -8937,19 +8937,19 @@
     SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, TMP1, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
     free_stack(common, 3);
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP2, 0);
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
     }
   else if (common->optimized_cbracket[offset >> 1] == 0)
     {
     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
     free_stack(common, 2);
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
     }
   }


@@ -8957,7 +8957,7 @@
   {
   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
     {
-    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
     }
   once = JUMP(SLJIT_JUMP);
@@ -9008,9 +9008,9 @@
     assert = CURRENT_AS(bracket_backtrack)->u.assert;
     if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
       {
-      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr);
+      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
-      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
       }
     cond = JUMP(SLJIT_JUMP);
     set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
@@ -9043,7 +9043,7 @@
         if (opcode != OP_ONCE)
           {
           if (private_data_ptr != 0)
-            OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+            OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
           else
             OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
           }
@@ -9064,7 +9064,7 @@
     if (repeat_type == OP_MINUPTO)
       {
       /* We need to preserve the counter. TMP2 will be used below. */
-      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
+      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
       stacksize++;
       }
     if (ket != OP_KET || bra != OP_BRA)
@@ -9109,7 +9109,7 @@
       {
       /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
       SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
-      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
       }


     JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
@@ -9149,9 +9149,9 @@
     assert = CURRENT_AS(bracket_backtrack)->u.assert;
     if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
       {
-      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr);
+      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
       add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
-      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
       }
     JUMPHERE(cond);
     }
@@ -9169,19 +9169,19 @@
     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
     free_stack(common, 2);
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
     }
   else
     {
     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
     free_stack(common, 1);
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
     }
   }
 else if (opcode == OP_SBRA || opcode == OP_SCOND)
   {
-  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
   free_stack(common, 1);
   }
 else if (opcode == OP_ONCE)
@@ -9204,20 +9204,20 @@
   JUMPHERE(once);
   /* Restore previous private_data_ptr */
   if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw));
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw));
   else if (ket == OP_KETRMIN)
     {
     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
     /* See the comment below. */
     free_stack(common, 2);
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
     }
   }


 if (repeat_type == OP_EXACT)
   {
-  OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
-  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0);
+  OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
   CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
   }
 else if (ket == OP_KETRMAX)
@@ -9271,19 +9271,19 @@
     offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
     if (common->capture_last_ptr != 0)
       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
     if (common->capture_last_ptr != 0)
-      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, TMP1, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
     }
   set_jumps(current->topbacktracks, LABEL());
   free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
   return;
   }


-OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
+OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));

if (current->topbacktracks)
@@ -9294,7 +9294,7 @@
free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
JUMPHERE(jump);
}
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw));
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw));
}

 static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
@@ -9334,7 +9334,7 @@
     {
     SLJIT_ASSERT(common->control_head_ptr != 0);


-    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
+    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
     jump = JUMP(SLJIT_JUMP);
@@ -9366,11 +9366,11 @@
 if (opcode == OP_SKIP_ARG)
   {
   SLJIT_ASSERT(common->control_head_ptr != 0);
-  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
-  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
   OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
   sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
-  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
+  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);


OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
add_jump(compiler, &common->reset_match, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1));
@@ -9411,7 +9411,7 @@
free_stack(common, 3);

JUMPHERE(jump);
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
}

 static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
@@ -9428,7 +9428,7 @@
     case OP_SET_SOM:
     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
     free_stack(common, 1);
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP1, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
     break;


     case OP_STAR:
@@ -9557,9 +9557,9 @@
     if (common->has_skip_arg)
       OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
     free_stack(common, common->has_skip_arg ? 5 : 1);
-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
     if (common->has_skip_arg)
-      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP2, 0);
+      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
     break;


     case OP_THEN:
@@ -9633,8 +9633,8 @@
 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
 copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
 if (needs_control_head)
-  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, STACK_TOP, 0);
+  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
 if (needs_frame)
   init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);


@@ -9681,7 +9681,7 @@
 if (common->quit != NULL)
   {
   set_jumps(common->quit, LABEL());
-  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr);
+  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
   if (needs_frame)
     {
     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
@@ -9694,7 +9694,7 @@
   }


set_jumps(common->accept, LABEL());
-OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr);
+OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
if (needs_frame)
{
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
@@ -9712,15 +9712,15 @@
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 2 * sizeof(sljit_sw));
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, TMP1, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0);
OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP2, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
}
else
{
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, TMP2, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0);
}
sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
}
@@ -9975,29 +9975,29 @@
common->compiler = compiler;

/* Main pcre_jit_exec entry. */
-sljit_emit_enter(compiler, 1, 5, 5, private_data_size);
+sljit_emit_enter(compiler, 1, 5, 5, 0, 0, private_data_size);

/* Register init. */
reset_ovector(common, (re->top_bracket + 1) * 2);
if (common->req_char_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, SLJIT_SCRATCH_REG1, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);

-OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_SAVED_REG1, 0);
-OP1(SLJIT_MOV, TMP1, 0, SLJIT_SAVED_REG1, 0);
+OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LIMIT_MATCH, TMP1, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);

if (mode == JIT_PARTIAL_SOFT_COMPILE)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
if (common->mark_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
if (common->control_head_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);

/* Main part of the matching */
if ((re->options & PCRE_ANCHORED) == 0)
@@ -10039,16 +10039,16 @@
reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);

/* Store the current STR_PTR in OVECTOR(0). */
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
/* Copy the limit of allowed recursions. */
-OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LIMIT_MATCH);
+OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
if (common->capture_last_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, -1);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, -1);

if (common->needs_start_ptr)
{
SLJIT_ASSERT(common->start_ptr != OVECTOR(0));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr, STR_PTR, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
}
else
SLJIT_ASSERT(common->start_ptr == OVECTOR(0));
@@ -10056,13 +10056,13 @@
/* Copy the beginning of the string. */
if (mode == JIT_PARTIAL_SOFT_COMPILE)
{
- jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start + sizeof(sljit_sw), STR_PTR, 0);
+ jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start + sizeof(sljit_sw), STR_PTR, 0);
JUMPHERE(jump);
}
else if (mode == JIT_PARTIAL_HARD_COMPILE)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);

compile_matchingpath(common, common->start, ccend, &rootbacktrack);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
@@ -10077,7 +10077,7 @@

if (common->might_be_empty)
{
- empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
+ empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
empty_match_found_label = LABEL();
}

@@ -10122,10 +10122,10 @@
if (mode == JIT_PARTIAL_SOFT_COMPILE)
{
/* Update hit_start only in the first time. */
- jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, TMP1, 0);
+ jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
JUMPHERE(jump);
}

@@ -10133,10 +10133,10 @@
if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0)
{
SLJIT_ASSERT(common->first_line_end != 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
}

-OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
+OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);

if ((re->options & PCRE_ANCHORED) == 0)
{
@@ -10160,7 +10160,7 @@
JUMPHERE(reqbyte_notfound);

if (mode == JIT_PARTIAL_SOFT_COMPILE)
- CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
+ CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);

OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
JUMPTO(SLJIT_JUMP, common->quit_label);
@@ -10206,8 +10206,8 @@
/* This is a (really) rare case. */
set_jumps(common->stackalloc, LABEL());
/* RETURN_ADDR is not a saved register. */
-sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
+sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
@@ -10219,8 +10219,8 @@
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
-OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
-sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
+sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);

/* Allocation failed. */
JUMPHERE(jump);

Modified: code/trunk/sljit/sljitConfigInternal.h
===================================================================
--- code/trunk/sljit/sljitConfigInternal.h    2014-06-19 07:51:39 UTC (rev 1490)
+++ code/trunk/sljit/sljitConfigInternal.h    2014-07-07 07:11:16 UTC (rev 1491)
@@ -28,31 +28,43 @@
 #define _SLJIT_CONFIG_INTERNAL_H_


/*
- SLJIT defines the following macros depending on the target architecture:
+ SLJIT defines the following architecture dependent types and macros:

-   Feature detection (boolean) macros:
-   SLJIT_32BIT_ARCHITECTURE : 32 bit architecture
-   SLJIT_64BIT_ARCHITECTURE : 64 bit architecture
-   SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index
-   SLJIT_DOUBLE_SHIFT : the shift required to apply when accessing a double precision floating point array by index
-   SLJIT_SINGLE_SHIFT : the shift required to apply when accessing a single precision floating point array by index
-   SLJIT_LITTLE_ENDIAN : little endian architecture
-   SLJIT_BIG_ENDIAN : big endian architecture
-   SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!)
-   SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() for more information
-   SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address
+   Types:
+     sljit_sb, sljit_ub : signed and unsigned 8 bit byte
+     sljit_sh, sljit_uh : signed and unsigned 16 bit half-word (short) type
+     sljit_si, sljit_ui : signed and unsigned 32 bit integer type
+     sljit_sw, sljit_uw : signed and unsigned machine word, enough to store a pointer
+     sljit_p : unsgined pointer value (usually the same as sljit_uw, but
+               some 64 bit ABIs may use 32 bit pointers)
+     sljit_s : single precision floating point value
+     sljit_d : double precision floating point value


-   Types and useful macros:
-   sljit_sb, sljit_ub : signed and unsigned 8 bit byte
-   sljit_sh, sljit_uh : signed and unsigned 16 bit half-word (short) type
-   sljit_si, sljit_ui : signed and unsigned 32 bit integer type
-   sljit_sw, sljit_uw : signed and unsigned machine word, enough to store a pointer
-   sljit_p : unsgined pointer value (usually the same as sljit_uw, but
-             some 64 bit ABIs may use 32 bit pointers)
-   sljit_s : single precision floating point value
-   sljit_d : double precision floating point value
-   SLJIT_CALL : C calling convention define for both calling JIT form C and C callbacks for JIT
-   SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper)
+   Macros for feature detection (boolean):
+     SLJIT_32BIT_ARCHITECTURE : 32 bit architecture
+     SLJIT_64BIT_ARCHITECTURE : 64 bit architecture
+     SLJIT_LITTLE_ENDIAN : little endian architecture
+     SLJIT_BIG_ENDIAN : big endian architecture
+     SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!)
+     SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() for more information
+
+   Constants:
+     SLJIT_NUMBER_OF_REGISTERS : number of available registers
+     SLJIT_NUMBER_OF_SCRATCH_REGISTERS : number of available scratch registers
+     SLJIT_NUMBER_OF_SAVED_REGISTERS : number of available saved registers
+     SLJIT_NUMBER_OF_FLOAT_REGISTERS : number of available floating point registers
+     SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS : number of available floating point scratch registers
+     SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS : number of available floating point saved registers
+     SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index
+     SLJIT_DOUBLE_SHIFT : the shift required to apply when accessing
+                          a double precision floating point array by index
+     SLJIT_SINGLE_SHIFT : the shift required to apply when accessing
+                          a single precision floating point array by index
+     SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address
+
+   Other macros:
+     SLJIT_CALL : C calling convention define for both calling JIT form C and C callbacks for JIT
+     SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper)
 */


#if !((defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \
@@ -156,6 +168,53 @@
#define SLJIT_CONFIG_SPARC 1
#endif

+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+#define SLJIT_NUMBER_OF_REGISTERS 10
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7
+#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#ifndef _WIN64
+#define SLJIT_NUMBER_OF_REGISTERS 12
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 6
+#else
+#define SLJIT_NUMBER_OF_REGISTERS 12
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
+#endif /* _WIN64 */
+#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+#define SLJIT_NUMBER_OF_REGISTERS 11
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
+#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
+#define SLJIT_NUMBER_OF_REGISTERS 11
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7
+#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
+#define SLJIT_NUMBER_OF_REGISTERS 23
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10
+#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
+#define SLJIT_NUMBER_OF_REGISTERS 22
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 17
+#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
+#define SLJIT_NUMBER_OF_REGISTERS 17
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
+#elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC)
+#define SLJIT_NUMBER_OF_REGISTERS 18
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 14
+#elif (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
+#define SLJIT_NUMBER_OF_REGISTERS 0
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 0
+#endif
+
+#define SLJIT_NUMBER_OF_SCRATCH_REGISTERS \
+    (SLJIT_NUMBER_OF_REGISTERS - SLJIT_NUMBER_OF_SAVED_REGISTERS)
+
+#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 6
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && (defined _WIN64)
+#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 1
+#else
+#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
+#endif
+
+#define SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS \
+    (SLJIT_NUMBER_OF_FLOAT_REGISTERS - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS)
+
 #if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED)


/* These libraries are needed for the macros below. */

Modified: code/trunk/sljit/sljitLir.c
===================================================================
--- code/trunk/sljit/sljitLir.c    2014-06-19 07:51:39 UTC (rev 1490)
+++ code/trunk/sljit/sljitLir.c    2014-07-07 07:11:16 UTC (rev 1491)
@@ -229,13 +229,25 @@
 #    define FCC_IS_SET    (1 << 24)
 #endif


+/* Stack management. */
+
+#define GET_SAVED_REGISTERS_SIZE(scratches, saveds, extra) \
+    (((scratches < SLJIT_NUMBER_OF_SCRATCH_REGISTERS ? 0 : (scratches - SLJIT_NUMBER_OF_SCRATCH_REGISTERS)) + \
+        (saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? saveds : SLJIT_NUMBER_OF_SAVED_REGISTERS) + \
+        extra) * sizeof(sljit_sw))
+
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-#define SLJIT_HAS_VARIABLE_LOCALS_OFFSET 1
-#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
-#define FIXED_LOCALS_OFFSET (3 * sizeof(sljit_sw))
+#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1
+
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+#define FIXED_LOCALS_OFFSET ((2 + 4) * sizeof(sljit_sw))
+#else
+/* Maximum 3 arguments are passed on the stack. */
+#define FIXED_LOCALS_OFFSET ((3 + 4) * sizeof(sljit_sw))
 #endif
-#endif


+#endif /* SLJIT_CONFIG_X86_32 */
+
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1
#ifdef _WIN64
@@ -281,13 +293,13 @@
#if (defined SLJIT_HAS_VARIABLE_LOCALS_OFFSET && SLJIT_HAS_VARIABLE_LOCALS_OFFSET)

 #define ADJUST_LOCAL_OFFSET(p, i) \
-    if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \
+    if ((p) == (SLJIT_MEM1(SLJIT_SP))) \
         (i) += compiler->locals_offset;


#elif (defined SLJIT_HAS_FIXED_LOCALS_OFFSET && SLJIT_HAS_FIXED_LOCALS_OFFSET)

 #define ADJUST_LOCAL_OFFSET(p, i) \
-    if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \
+    if ((p) == (SLJIT_MEM1(SLJIT_SP))) \
         (i) += FIXED_LOCALS_OFFSET;


#else
@@ -361,6 +373,8 @@

     compiler->scratches = -1;
     compiler->saveds = -1;
+    compiler->fscratches = -1;
+    compiler->fsaveds = -1;


 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     compiler->args = -1;
@@ -632,21 +646,30 @@


 #define FUNCTION_CHECK_IS_REG(r) \
     ((r) == SLJIT_UNUSED || \
-    ((r) >= SLJIT_SCRATCH_REG1 && (r) <= SLJIT_SCRATCH_REG1 - 1 + compiler->scratches) || \
-    ((r) >= SLJIT_SAVED_REG1 && (r) <= SLJIT_SAVED_REG1 - 1 + compiler->saveds))
+    ((r) >= SLJIT_R0 && (r) < (SLJIT_R0 + compiler->scratches)) || \
+    ((r) > (SLJIT_S0 - compiler->saveds) && (r) <= SLJIT_S0))


+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+#define FUNCTION_ASSERT_IF_VIRTUAL(p) \
+    SLJIT_ASSERT((p) < SLJIT_R3 || (p) > SLJIT_R6);
+#else
+#define FUNCTION_ASSERT_IF_VIRTUAL(p)
+#endif
+
 #define FUNCTION_CHECK_SRC(p, i) \
     SLJIT_ASSERT(compiler->scratches != -1 && compiler->saveds != -1); \
     if (FUNCTION_CHECK_IS_REG(p)) \
         SLJIT_ASSERT((i) == 0 && (p) != SLJIT_UNUSED); \
     else if ((p) == SLJIT_IMM) \
         ; \
-    else if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \
+    else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \
         SLJIT_ASSERT((i) >= 0 && (i) < compiler->logical_local_size); \
     else if ((p) & SLJIT_MEM) { \
         SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & REG_MASK)); \
+        FUNCTION_ASSERT_IF_VIRTUAL((p) & REG_MASK); \
         if ((p) & OFFS_REG_MASK) { \
             SLJIT_ASSERT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \
+            FUNCTION_ASSERT_IF_VIRTUAL(OFFS_REG(p)); \
             SLJIT_ASSERT(!((i) & ~0x3)); \
         } \
         SLJIT_ASSERT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \
@@ -658,12 +681,14 @@
     SLJIT_ASSERT(compiler->scratches != -1 && compiler->saveds != -1); \
     if (FUNCTION_CHECK_IS_REG(p)) \
         SLJIT_ASSERT((i) == 0); \
-    else if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \
+    else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \
         SLJIT_ASSERT((i) >= 0 && (i) < compiler->logical_local_size); \
     else if ((p) & SLJIT_MEM) { \
         SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & REG_MASK)); \
+        FUNCTION_ASSERT_IF_VIRTUAL((p) & REG_MASK); \
         if ((p) & OFFS_REG_MASK) { \
             SLJIT_ASSERT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \
+            FUNCTION_ASSERT_IF_VIRTUAL(OFFS_REG(p)); \
             SLJIT_ASSERT(!((i) & ~0x3)); \
         } \
         SLJIT_ASSERT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \
@@ -672,15 +697,19 @@
         SLJIT_ASSERT_STOP();


 #define FUNCTION_FCHECK(p, i) \
-    if ((p) >= SLJIT_FLOAT_REG1 && (p) <= SLJIT_FLOAT_REG6) \
+    SLJIT_ASSERT(compiler->fscratches != -1 && compiler->fsaveds != -1); \
+    if (((p) >= SLJIT_FR0 && (p) < (SLJIT_FR0 + compiler->fscratches)) || \
+            ((p) > (SLJIT_FS0 - compiler->fsaveds) && (p) <= SLJIT_FS0)) \
         SLJIT_ASSERT(i == 0); \
-    else if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \
+    else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \
         SLJIT_ASSERT((i) >= 0 && (i) < compiler->logical_local_size); \
     else if ((p) & SLJIT_MEM) { \
         SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & REG_MASK)); \
+        FUNCTION_ASSERT_IF_VIRTUAL((p) & REG_MASK); \
         if ((p) & OFFS_REG_MASK) { \
             SLJIT_ASSERT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \
-            SLJIT_ASSERT(((p) & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_LOCALS_REG) && !(i & ~0x3)); \
+            FUNCTION_ASSERT_IF_VIRTUAL(OFFS_REG(p)); \
+            SLJIT_ASSERT(((p) & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SP) && !(i & ~0x3)); \
         } else \
             SLJIT_ASSERT(OFFS_REG(p) == 0); \
         SLJIT_ASSERT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \
@@ -690,8 +719,8 @@


 #define FUNCTION_CHECK_OP1() \
     if (GET_OPCODE(op) >= SLJIT_MOVU && GET_OPCODE(op) <= SLJIT_MOVU_P) { \
-        SLJIT_ASSERT(!(src & SLJIT_MEM) || (src & REG_MASK) != SLJIT_LOCALS_REG); \
-        SLJIT_ASSERT(!(dst & SLJIT_MEM) || (dst & REG_MASK) != SLJIT_LOCALS_REG); \
+        SLJIT_ASSERT(!(src & SLJIT_MEM) || (src & REG_MASK) != SLJIT_SP); \
+        SLJIT_ASSERT(!(dst & SLJIT_MEM) || (dst & REG_MASK) != SLJIT_SP); \
         if ((src & SLJIT_MEM) && (src & REG_MASK)) \
             SLJIT_ASSERT((dst & REG_MASK) != (src & REG_MASK) && OFFS_REG(dst) != (src & REG_MASK)); \
     }
@@ -705,17 +734,6 @@
     compiler->verbose = verbose;
 }


-static char* reg_names[] = {
-    (char*)"unused", (char*)"s1", (char*)"s2", (char*)"s3",
-    (char*)"se1", (char*)"se2", (char*)"p1", (char*)"p2",
-    (char*)"p3", (char*)"pe1", (char*)"pe2", (char*)"lc"
-};
-
-static char* freg_names[] = {
-    (char*)"unused", (char*)"f1", (char*)"f2", (char*)"f3",
-    (char*)"f4", (char*)"f5", (char*)"f6"
-};
-
 #if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
 #ifdef _WIN64
 #    define SLJIT_PRINT_D    "I64"
@@ -726,48 +744,62 @@
 #    define SLJIT_PRINT_D    ""
 #endif


-#define sljit_verbose_param(p, i) \
+#define sljit_verbose_reg(compiler, r) \
+    do { \
+        if ((r) < (SLJIT_R0 + compiler->scratches)) \
+            fprintf(compiler->verbose, "r%d", (r) - SLJIT_R0); \
+        else \
+            fprintf(compiler->verbose, "s%d", SLJIT_NUMBER_OF_REGISTERS - (r)); \
+    } while (0)
+
+#define sljit_verbose_param(compiler, p, i) \
     if ((p) & SLJIT_IMM) \
         fprintf(compiler->verbose, "#%" SLJIT_PRINT_D "d", (i)); \
     else if ((p) & SLJIT_MEM) { \
         if ((p) & REG_MASK) { \
-            if (i) { \
-                if ((p) & OFFS_REG_MASK) \
-                    fprintf(compiler->verbose, "[%s + %s * %d]", reg_names[(p) & REG_MASK], reg_names[OFFS_REG(p)], 1 << (i)); \
-                else \
-                    fprintf(compiler->verbose, "[%s + #%" SLJIT_PRINT_D "d]", reg_names[(p) & REG_MASK], (i)); \
+            fputc('[', compiler->verbose); \
+            sljit_verbose_reg(compiler, (p) & REG_MASK); \
+            if ((p) & OFFS_REG_MASK) { \
+                fprintf(compiler->verbose, " + "); \
+                sljit_verbose_reg(compiler, OFFS_REG(p)); \
+                if (i) \
+                    fprintf(compiler->verbose, " * %d", 1 << (i)); \
             } \
-            else { \
-                if ((p) & OFFS_REG_MASK) \
-                    fprintf(compiler->verbose, "[%s + %s]", reg_names[(p) & REG_MASK], reg_names[OFFS_REG(p)]); \
-                else \
-                    fprintf(compiler->verbose, "[%s]", reg_names[(p) & REG_MASK]); \
-            } \
+            else if (i) \
+                fprintf(compiler->verbose, " + %" SLJIT_PRINT_D "d", (i)); \
+            fputc(']', compiler->verbose); \
         } \
         else \
             fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); \
-    } else \
-        fprintf(compiler->verbose, "%s", reg_names[p]);
-#define sljit_verbose_fparam(p, i) \
+    } else if (p) \
+        sljit_verbose_reg(compiler, p); \
+    else \
+        fprintf(compiler->verbose, "unused");
+
+#define sljit_verbose_fparam(compiler, p, i) \
     if ((p) & SLJIT_MEM) { \
         if ((p) & REG_MASK) { \
-            if (i) { \
-                if ((p) & OFFS_REG_MASK) \
-                    fprintf(compiler->verbose, "[%s + %s * %d]", reg_names[(p) & REG_MASK], reg_names[OFFS_REG(p)], 1 << (i)); \
-                else \
-                    fprintf(compiler->verbose, "[%s + #%" SLJIT_PRINT_D "d]", reg_names[(p) & REG_MASK], (i)); \
+            fputc('[', compiler->verbose); \
+            sljit_verbose_reg(compiler, (p) & REG_MASK); \
+            if ((p) & OFFS_REG_MASK) { \
+                fprintf(compiler->verbose, " + "); \
+                sljit_verbose_reg(compiler, OFFS_REG(p)); \
+                if (i) \
+                    fprintf(compiler->verbose, "%d", 1 << (i)); \
             } \
-            else { \
-                if ((p) & OFFS_REG_MASK) \
-                    fprintf(compiler->verbose, "[%s + %s]", reg_names[(p) & REG_MASK], reg_names[OFFS_REG(p)]); \
-                else \
-                    fprintf(compiler->verbose, "[%s]", reg_names[(p) & REG_MASK]); \
-            } \
+            else if (i) \
+                fprintf(compiler->verbose, "%" SLJIT_PRINT_D "d", (i)); \
+            fputc(']', compiler->verbose); \
         } \
         else \
             fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); \
-    } else \
-        fprintf(compiler->verbose, "%s", freg_names[p]);
+    } \
+    else { \
+        if ((p) < (SLJIT_FR0 + compiler->fscratches)) \
+            fprintf(compiler->verbose, "fr%d", (p) - SLJIT_FR0); \
+        else \
+            fprintf(compiler->verbose, "fs%d", SLJIT_NUMBER_OF_FLOAT_REGISTERS - (p)); \
+    }


 static SLJIT_CONST char* op_names[] = {
     /* op0 */
@@ -832,7 +864,9 @@
 #endif
 }


-static SLJIT_INLINE void check_sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+static SLJIT_INLINE void check_sljit_emit_enter(struct sljit_compiler *compiler,
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
     /* If debug and verbose are disabled, all arguments are unused. */
     SLJIT_UNUSED_ARG(compiler);
@@ -842,17 +876,24 @@
     SLJIT_UNUSED_ARG(local_size);


     SLJIT_ASSERT(args >= 0 && args <= 3);
-    SLJIT_ASSERT(scratches >= 0 && scratches <= SLJIT_NO_TMP_REGISTERS);
-    SLJIT_ASSERT(saveds >= 0 && saveds <= SLJIT_NO_GEN_REGISTERS);
+    SLJIT_ASSERT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS);
+    SLJIT_ASSERT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS);
+    SLJIT_ASSERT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS);
     SLJIT_ASSERT(args <= saveds);
+    SLJIT_ASSERT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
+    SLJIT_ASSERT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
+    SLJIT_ASSERT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
     SLJIT_ASSERT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE);
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose))
-        fprintf(compiler->verbose, "  enter args=%d scratches=%d saveds=%d local_size=%d\n", args, scratches, saveds, local_size);
+        fprintf(compiler->verbose, "  enter args:%d scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n",
+            args, scratches, saveds, fscratches, fsaveds, local_size);
 #endif
 }


-static SLJIT_INLINE void check_sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+static SLJIT_INLINE void check_sljit_set_context(struct sljit_compiler *compiler,
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
     /* If debug and verbose are disabled, all arguments are unused. */
     SLJIT_UNUSED_ARG(compiler);
@@ -869,13 +910,18 @@
 #endif


     SLJIT_ASSERT(args >= 0 && args <= 3);
-    SLJIT_ASSERT(scratches >= 0 && scratches <= SLJIT_NO_TMP_REGISTERS);
-    SLJIT_ASSERT(saveds >= 0 && saveds <= SLJIT_NO_GEN_REGISTERS);
+    SLJIT_ASSERT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS);
+    SLJIT_ASSERT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS);
+    SLJIT_ASSERT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS);
     SLJIT_ASSERT(args <= saveds);
+    SLJIT_ASSERT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
+    SLJIT_ASSERT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
+    SLJIT_ASSERT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
     SLJIT_ASSERT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE);
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose))
-        fprintf(compiler->verbose, "  set_context args=%d scratches=%d saveds=%d local_size=%d\n", args, scratches, saveds, local_size);
+        fprintf(compiler->verbose, "  set_context args:%d scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n",
+            args, scratches, saveds, fscratches, fsaveds, local_size);
 #endif
 }


@@ -901,7 +947,7 @@
             fprintf(compiler->verbose, "  return\n");
         else {
             fprintf(compiler->verbose, "  return %s ", op_names[op]);
-            sljit_verbose_param(src, srcw);
+            sljit_verbose_param(compiler, src, srcw);
             fprintf(compiler->verbose, "\n");
         }
     }
@@ -921,7 +967,7 @@
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose)) {
         fprintf(compiler->verbose, "  fast_enter ");
-        sljit_verbose_param(dst, dstw);
+        sljit_verbose_param(compiler, dst, dstw);
         fprintf(compiler->verbose, "\n");
     }
 #endif
@@ -940,7 +986,7 @@
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose)) {
         fprintf(compiler->verbose, "  fast_return ");
-        sljit_verbose_param(src, srcw);
+        sljit_verbose_param(compiler, src, srcw);
         fprintf(compiler->verbose, "\n");
     }
 #endif
@@ -954,6 +1000,7 @@


     SLJIT_ASSERT((op >= SLJIT_BREAKPOINT && op <= SLJIT_SMUL)
         || ((op & ~SLJIT_INT_OP) >= SLJIT_UDIV && (op & ~SLJIT_INT_OP) <= SLJIT_SDIV));
+    SLJIT_ASSERT(op < SLJIT_UMUL || compiler->scratches >= 2);
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose))
         fprintf(compiler->verbose, "  %s%s\n", !(op & SLJIT_INT_OP) ? "" : "i", op_names[GET_OPCODE(op)]);
@@ -991,9 +1038,9 @@
         fprintf(compiler->verbose, "  %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op_names[GET_OPCODE(op)],
             !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_U) ? "" : ".u", !(op & SLJIT_SET_S) ? "" : ".s",
             !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k");
-        sljit_verbose_param(dst, dstw);
+        sljit_verbose_param(compiler, dst, dstw);
         fprintf(compiler->verbose, ", ");
-        sljit_verbose_param(src, srcw);
+        sljit_verbose_param(compiler, src, srcw);
         fprintf(compiler->verbose, "\n");
     }
 #endif
@@ -1033,11 +1080,11 @@
         fprintf(compiler->verbose, "  %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op_names[GET_OPCODE(op)],
             !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_U) ? "" : ".u", !(op & SLJIT_SET_S) ? "" : ".s",
             !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k");
-        sljit_verbose_param(dst, dstw);
+        sljit_verbose_param(compiler, dst, dstw);
         fprintf(compiler->verbose, ", ");
-        sljit_verbose_param(src1, src1w);
+        sljit_verbose_param(compiler, src1, src1w);
         fprintf(compiler->verbose, ", ");
-        sljit_verbose_param(src2, src2w);
+        sljit_verbose_param(compiler, src2, src2w);
         fprintf(compiler->verbose, "\n");
     }
 #endif
@@ -1046,13 +1093,13 @@
 static SLJIT_INLINE void check_sljit_get_register_index(sljit_si reg)
 {
     SLJIT_UNUSED_ARG(reg);
-    SLJIT_ASSERT(reg > 0 && reg <= SLJIT_NO_REGISTERS);
+    SLJIT_ASSERT(reg > 0 && reg <= SLJIT_NUMBER_OF_REGISTERS);
 }


 static SLJIT_INLINE void check_sljit_get_float_register_index(sljit_si reg)
 {
     SLJIT_UNUSED_ARG(reg);
-    SLJIT_ASSERT(reg > 0 && reg <= SLJIT_NO_FLOAT_REGISTERS);
+    SLJIT_ASSERT(reg > 0 && reg <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
 }


 static SLJIT_INLINE void check_sljit_emit_op_custom(struct sljit_compiler *compiler,
@@ -1121,9 +1168,9 @@
             (GET_OPCODE(op) == SLJIT_CONVD_FROMS)
             ? ((op & SLJIT_SINGLE_OP) ? "s.fromd" : "d.froms")
             : ((op & SLJIT_SINGLE_OP) ? "s" : "d"));
-        sljit_verbose_fparam(dst, dstw);
+        sljit_verbose_fparam(compiler, dst, dstw);
         fprintf(compiler->verbose, ", ");
-        sljit_verbose_fparam(src, srcw);
+        sljit_verbose_fparam(compiler, src, srcw);
         fprintf(compiler->verbose, "\n");
     }
 #endif
@@ -1158,9 +1205,9 @@
     if (SLJIT_UNLIKELY(!!compiler->verbose)) {
         fprintf(compiler->verbose, "  %s%s%s%s ", op_names[GET_OPCODE(op)], (op & SLJIT_SINGLE_OP) ? "s" : "d",
             !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_S) ? "" : ".s");
-        sljit_verbose_fparam(src1, src1w);
+        sljit_verbose_fparam(compiler, src1, src1w);
         fprintf(compiler->verbose, ", ");
-        sljit_verbose_fparam(src2, src2w);
+        sljit_verbose_fparam(compiler, src2, src2w);
         fprintf(compiler->verbose, "\n");
     }
 #endif
@@ -1196,9 +1243,9 @@
         fprintf(compiler->verbose, "  %s%s.from%s ", op_names[GET_OPCODE(op)],
             (GET_OPCODE(op) == SLJIT_CONVI_FROMD) ? "i" : "w",
             (op & SLJIT_SINGLE_OP) ? "s" : "d");
-        sljit_verbose_param(dst, dstw);
+        sljit_verbose_param(compiler, dst, dstw);
         fprintf(compiler->verbose, ", ");
-        sljit_verbose_fparam(src, srcw);
+        sljit_verbose_fparam(compiler, src, srcw);
         fprintf(compiler->verbose, "\n");
     }
 #endif
@@ -1234,9 +1281,9 @@
         fprintf(compiler->verbose, "  %s%s.from%s ", op_names[GET_OPCODE(op)],
             (op & SLJIT_SINGLE_OP) ? "s" : "d",
             (GET_OPCODE(op) == SLJIT_CONVD_FROMI) ? "i" : "w");
-        sljit_verbose_fparam(dst, dstw);
+        sljit_verbose_fparam(compiler, dst, dstw);
         fprintf(compiler->verbose, ", ");
-        sljit_verbose_param(src, srcw);
+        sljit_verbose_param(compiler, src, srcw);
         fprintf(compiler->verbose, "\n");
     }
 #endif
@@ -1268,11 +1315,11 @@
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose)) {
         fprintf(compiler->verbose, "  %s%s ", op_names[GET_OPCODE(op)], (op & SLJIT_SINGLE_OP) ? "s" : "d");
-        sljit_verbose_fparam(dst, dstw);
+        sljit_verbose_fparam(compiler, dst, dstw);
         fprintf(compiler->verbose, ", ");
-        sljit_verbose_fparam(src1, src1w);
+        sljit_verbose_fparam(compiler, src1, src1w);
         fprintf(compiler->verbose, ", ");
-        sljit_verbose_fparam(src2, src2w);
+        sljit_verbose_fparam(compiler, src2, src2w);
         fprintf(compiler->verbose, "\n");
     }
 #endif
@@ -1330,9 +1377,9 @@
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose)) {
         fprintf(compiler->verbose, "  %scmp%s.%s ", !(type & SLJIT_INT_OP) ? "" : "i", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]);
-        sljit_verbose_param(src1, src1w);
+        sljit_verbose_param(compiler, src1, src1w);
         fprintf(compiler->verbose, ", ");
-        sljit_verbose_param(src2, src2w);
+        sljit_verbose_param(compiler, src2, src2w);
         fprintf(compiler->verbose, "\n");
     }
 #endif
@@ -1360,9 +1407,9 @@
     if (SLJIT_UNLIKELY(!!compiler->verbose)) {
         fprintf(compiler->verbose, "  %scmp%s.%s ", (type & SLJIT_SINGLE_OP) ? "s" : "d",
             !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]);
-        sljit_verbose_fparam(src1, src1w);
+        sljit_verbose_fparam(compiler, src1, src1w);
         fprintf(compiler->verbose, ", ");
-        sljit_verbose_fparam(src2, src2w);
+        sljit_verbose_fparam(compiler, src2, src2w);
         fprintf(compiler->verbose, "\n");
     }
 #endif
@@ -1390,7 +1437,7 @@
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose)) {
         fprintf(compiler->verbose, "  ijump.%s ", jump_names[type]);
-        sljit_verbose_param(src, srcw);
+        sljit_verbose_param(compiler, src, srcw);
         fprintf(compiler->verbose, "\n");
     }
 #endif
@@ -1427,10 +1474,10 @@
     if (SLJIT_UNLIKELY(!!compiler->verbose)) {
         fprintf(compiler->verbose, "  %sflags.%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i",
             op_names[GET_OPCODE(op)], !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k");
-        sljit_verbose_param(dst, dstw);
+        sljit_verbose_param(compiler, dst, dstw);
         if (src != SLJIT_UNUSED) {
             fprintf(compiler->verbose, ", ");
-            sljit_verbose_param(src, srcw);
+            sljit_verbose_param(compiler, src, srcw);
         }
         fprintf(compiler->verbose, ", %s\n", jump_names[type]);
     }
@@ -1450,7 +1497,7 @@
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose)) {
         fprintf(compiler->verbose, "  local_base ");
-        sljit_verbose_param(dst, dstw);
+        sljit_verbose_param(compiler, dst, dstw);
         fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", offset);
     }
 #endif
@@ -1470,7 +1517,7 @@
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose)) {
         fprintf(compiler->verbose, "  const ");
-        sljit_verbose_param(dst, dstw);
+        sljit_verbose_param(compiler, dst, dstw);
         fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", init_value);
     }
 #endif
@@ -1658,13 +1705,13 @@
     CHECK_ERROR();
     check_sljit_get_local_base(compiler, dst, dstw, offset);


-    ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset);
+    ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
     compiler->skip_checks = 1;
 #endif
     if (offset != 0)
-        return sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset);
-    return sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_LOCALS_REG, 0);
+        return sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
+    return sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_SP, 0);
 }


 #endif
@@ -1720,23 +1767,31 @@
     SLJIT_ASSERT_STOP();
 }


-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
     SLJIT_UNUSED_ARG(compiler);
     SLJIT_UNUSED_ARG(args);
     SLJIT_UNUSED_ARG(scratches);
     SLJIT_UNUSED_ARG(saveds);
+    SLJIT_UNUSED_ARG(fscratches);
+    SLJIT_UNUSED_ARG(fsaveds);
     SLJIT_UNUSED_ARG(local_size);
     SLJIT_ASSERT_STOP();
     return SLJIT_ERR_UNSUPPORTED;
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler,
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
     SLJIT_UNUSED_ARG(compiler);
     SLJIT_UNUSED_ARG(args);
     SLJIT_UNUSED_ARG(scratches);
     SLJIT_UNUSED_ARG(saveds);
+    SLJIT_UNUSED_ARG(fscratches);
+    SLJIT_UNUSED_ARG(fsaveds);
     SLJIT_UNUSED_ARG(local_size);
     SLJIT_ASSERT_STOP();
 }


Modified: code/trunk/sljit/sljitLir.h
===================================================================
--- code/trunk/sljit/sljitLir.h    2014-06-19 07:51:39 UTC (rev 1490)
+++ code/trunk/sljit/sljitLir.h    2014-07-07 07:11:16 UTC (rev 1491)
@@ -56,8 +56,6 @@
     Disadvantages:
       - No automatic register allocation, and temporary results are
         not stored on the stack. (hence the name comes)
-      - Limited number of registers (only 6+4 integer registers, max 3+2
-        scratch, max 3+2 saved and 6 floating point registers)
     In practice:
       - This approach is very effective for interpreters
         - One of the saved registers typically points to a stack interface
@@ -104,71 +102,155 @@
 /*  Registers                                                            */
 /* --------------------------------------------------------------------- */


+/*
+  Scratch (R) registers: registers whose may not preserve their values
+  across function calls.
+
+  Saved (S) registers: registers whose preserve their values across
+  function calls.
+
+  The scratch and saved register sets are overlap. The last scratch register
+  is the first saved register, the one before the last is the second saved
+  register, and so on.
+
+  If an architecture provides two scratch and three saved registers,
+  its scratch and saved register sets are the following:
+
+     R0   |  [S4]  |   R0 and S4 represent the same physical register
+     R1   |  [S3]  |   R1 and S3 represent the same physical register
+    [R2]  |   S2   |   R2 and S2 represent the same physical register
+    [R3]  |   S1   |   R3 and S1 represent the same physical register
+    [R4]  |   S0   |   R4 and S0 represent the same physical register
+
+  Note: SLJIT_NUMBER_OF_SCRATCH_REGISTERS would be 2 and
+        SLJIT_NUMBER_OF_SAVED_REGISTERS would be 3 for this architecture.
+
+  Note: On all supported architectures SLJIT_NUMBER_OF_REGISTERS >= 10
+        and SLJIT_NUMBER_OF_SAVED_REGISTERS >= 5. However, 4 registers
+        are virtual on x86-32. See below.
+
+  The purpose of this definition is convenience. Although a register
+  is either scratch register or saved register, SLJIT allows accessing
+  them from the other set. For example, four registers can be used as
+  scratch registers and the fifth one as saved register on the architecture
+  above. Of course the last two scratch registers (R2 and R3) from this
+  four will be saved on the stack, because they are defined as saved
+  registers in the application binary interface. Still R2 and R3 can be
+  used for referencing to these registers instead of S2 and S1, which
+  makes easier to write platform independent code. Scratch registers
+  can be saved registers in a similar way, but these extra saved
+  registers will not be preserved across function calls! Hence the
+  application must save them on those platforms, where the number of
+  saved registers is too low. This can be done by copy them onto
+  the stack and restore them after a function call.
+
+  Note: To emphasize that registers assigned to R2-R4 are saved
+        registers, they are enclosed by square brackets. S3-S4
+        are marked in a similar way.
+
+  Note: sljit_emit_enter and sljit_set_context defines whether a register
+        is S or R register. E.g: when 3 scratches and 1 saved is mapped
+        by sljit_emit_enter, the allowed register set will be: R0-R2 and
+        S0. Although S2 is mapped to the same position as R2, it does not
+        available in the current configuration. Furthermore the R3 (S1)
+        register does not available as well.
+*/
+
+/* When SLJIT_UNUSED is specified as destination, the result is discarded. */
 #define SLJIT_UNUSED        0


-/* Scratch (temporary) registers whose may not preserve their values
-   across function calls. */
-#define SLJIT_SCRATCH_REG1    1
-#define SLJIT_SCRATCH_REG2    2
-#define SLJIT_SCRATCH_REG3    3
-/* Note: extra registers cannot be used for memory addressing. */
-/* Note: on x86-32, these registers are emulated (using stack
-   loads & stores). */
-#define SLJIT_SCRATCH_EREG1    4
-#define SLJIT_SCRATCH_EREG2    5
+/* Scratch registers. */
+#define SLJIT_R0    1
+#define SLJIT_R1    2
+#define SLJIT_R2    3
+/* Note: on x86-32, R3 - R6 are emulated (using stack loads & stores),
+   so they cannot be used for memory addressing. There is no such
+   limitation on other CPUs. */
+#define SLJIT_R3    4
+#define SLJIT_R4    5
+#define SLJIT_R5    6
+#define SLJIT_R6    7
+#define SLJIT_R7    8
+#define SLJIT_R8    9
+#define SLJIT_R9    10
+/* All R registers provided by the architecture can be accessed by SLJIT_R(i)
+   The i parameter must be >= 0 and < SLJIT_NUMBER_OF_REGISTERS. */
+#define SLJIT_R(i)    (1 + (i))


-/* Saved registers whose preserve their values across function calls. */
-#define SLJIT_SAVED_REG1    6
-#define SLJIT_SAVED_REG2    7
-#define SLJIT_SAVED_REG3    8
-/* Note: extra registers cannot be used for memory addressing. */
-/* Note: on x86-32, these registers are emulated (using stack
-   loads & stores). */
-#define SLJIT_SAVED_EREG1    9
-#define SLJIT_SAVED_EREG2    10
+/* Saved registers. */
+#define SLJIT_S0    (SLJIT_NUMBER_OF_REGISTERS)
+#define SLJIT_S1    (SLJIT_NUMBER_OF_REGISTERS - 1)
+#define SLJIT_S2    (SLJIT_NUMBER_OF_REGISTERS - 2)
+/* Note: on x86-32, S3 - S6 are emulated (using stack loads & stores),
+   so they cannot be used for memory addressing. There is no such
+   limitation on other CPUs. */
+#define SLJIT_S3    (SLJIT_NUMBER_OF_REGISTERS - 3)
+#define SLJIT_S4    (SLJIT_NUMBER_OF_REGISTERS - 4)
+#define SLJIT_S5    (SLJIT_NUMBER_OF_REGISTERS - 5)
+#define SLJIT_S6    (SLJIT_NUMBER_OF_REGISTERS - 6)
+#define SLJIT_S7    (SLJIT_NUMBER_OF_REGISTERS - 7)
+#define SLJIT_S8    (SLJIT_NUMBER_OF_REGISTERS - 8)
+#define SLJIT_S9    (SLJIT_NUMBER_OF_REGISTERS - 9)
+/* All S registers provided by the architecture can be accessed by SLJIT_S(i)
+   The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_REGISTERS. */
+#define SLJIT_S(i)    (SLJIT_NUMBER_OF_REGISTERS - (i))


-/* Read-only register (cannot be the destination of an operation).
-   Only SLJIT_MEM1(SLJIT_LOCALS_REG) addressing mode is allowed since
-   several ABIs has certain limitations about the stack layout. However
-   sljit_get_local_base() can be used to obtain the offset of a value
-   on the stack. */
-#define SLJIT_LOCALS_REG    11
+/* Registers >= SLJIT_FIRST_SAVED_REG are saved registers. */
+#define SLJIT_FIRST_SAVED_REG (SLJIT_S0 - SLJIT_NUMBER_OF_SAVED_REGISTERS + 1)


-/* Number of registers. */
-#define SLJIT_NO_TMP_REGISTERS    5
-#define SLJIT_NO_GEN_REGISTERS    5
-#define SLJIT_NO_REGISTERS    11
+/* The SLJIT_SP provides direct access to the linear stack space allocated by
+   sljit_emit_enter. It can only be used in the following form: SLJIT_MEM1(SLJIT_SP).
+   The immediate offset is extended by the relative stack offset automatically.
+   The sljit_get_local_base can be used to obtain the absolute offset. */
+#define SLJIT_SP    (SLJIT_NUMBER_OF_REGISTERS + 1)


/* Return with machine word. */

-#define SLJIT_RETURN_REG    SLJIT_SCRATCH_REG1
+#define SLJIT_RETURN_REG    SLJIT_R0


 /* x86 prefers specific registers for special purposes. In case of shift
-   by register it supports only SLJIT_SCRATCH_REG3 for shift argument
+   by register it supports only SLJIT_R2 for shift argument
    (which is the src2 argument of sljit_emit_op2). If another register is
    used, sljit must exchange data between registers which cause a minor
    slowdown. Other architectures has no such limitation. */


-#define SLJIT_PREF_SHIFT_REG    SLJIT_SCRATCH_REG3
+#define SLJIT_PREF_SHIFT_REG    SLJIT_R2


 /* --------------------------------------------------------------------- */
 /*  Floating point registers                                             */
 /* --------------------------------------------------------------------- */


+/* Each floating point register can store a double or single precision
+   value. The FR and FS register sets are overlap in the same way as R
+   and S register sets. See above. */
+
 /* Note: SLJIT_UNUSED as destination is not valid for floating point
-     operations, since they cannot be used for setting flags. */
+   operations, since they cannot be used for setting flags. */


-/* Floating point operations are performed on double or
-   single precision values. */
+/* Floating point scratch registers. */
+#define SLJIT_FR0    1
+#define SLJIT_FR1    2
+#define SLJIT_FR2    3
+#define SLJIT_FR3    4
+#define SLJIT_FR4    5
+#define SLJIT_FR5    6
+/* All FR registers provided by the architecture can be accessed by SLJIT_FR(i)
+   The i parameter must be >= 0 and < SLJIT_NUMBER_OF_FLOAT_REGISTERS. */
+#define SLJIT_FR(i)    (1 + (i))


-#define SLJIT_FLOAT_REG1        1
-#define SLJIT_FLOAT_REG2        2
-#define SLJIT_FLOAT_REG3        3
-#define SLJIT_FLOAT_REG4        4
-#define SLJIT_FLOAT_REG5        5
-#define SLJIT_FLOAT_REG6        6
+/* Floating point saved registers. */
+#define SLJIT_FS0    (SLJIT_NUMBER_OF_FLOAT_REGISTERS)
+#define SLJIT_FS1    (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 1)
+#define SLJIT_FS2    (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 2)
+#define SLJIT_FS3    (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 3)
+#define SLJIT_FS4    (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 4)
+#define SLJIT_FS5    (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 5)
+/* All S registers provided by the architecture can be accessed by SLJIT_FS(i)
+   The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS. */
+#define SLJIT_FS(i)    (SLJIT_NUMBER_OF_FLOAT_REGISTERS - (i))


-#define SLJIT_NO_FLOAT_REGISTERS    6
+/* Float registers >= SLJIT_FIRST_SAVED_FLOAT_REG are saved registers. */
+#define SLJIT_FIRST_SAVED_FLOAT_REG (SLJIT_FS0 - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS + 1)


 /* --------------------------------------------------------------------- */
 /*  Main structures and functions                                        */
@@ -216,10 +298,14 @@
     struct sljit_memory_fragment *buf;
     struct sljit_memory_fragment *abuf;


-    /* Used local registers. */
+    /* Used scratch registers. */
     sljit_si scratches;
     /* Used saved registers. */
     sljit_si saveds;
+    /* Used float scratch registers. */
+    sljit_si fscratches;
+    /* Used float saved registers. */
+    sljit_si fsaveds;
     /* Local stack size. */
     sljit_si local_size;
     /* Code size. */
@@ -229,9 +315,6 @@


 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     sljit_si args;
-    sljit_si locals_offset;
-    sljit_si scratches_start;
-    sljit_si saveds_start;
 #endif


 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@@ -361,46 +444,61 @@
    error, they return with SLJIT_SUCCESS. */


/*
- The executable code is basically a function call from the viewpoint of
- the C language. The function calls must obey to the ABI (Application
- Binary Interface) of the platform, which specify the purpose of machine
- registers and stack handling among other things. The sljit_emit_enter
- function emits the necessary instructions for setting up a new context
- for the executable code and moves function arguments to the saved
- registers. The number of arguments are specified in the "args"
- parameter and the first argument goes to SLJIT_SAVED_REG1, the second
- goes to SLJIT_SAVED_REG2 and so on. The number of scratch and
- saved registers are passed in "scratches" and "saveds" arguments
- respectively. Since the saved registers contains the arguments,
- "args" must be less or equal than "saveds". The sljit_emit_enter
- is also capable of allocating a stack space for local variables. The
- "local_size" argument contains the size in bytes of this local area
- and its staring address is stored in SLJIT_LOCALS_REG. However
- the SLJIT_LOCALS_REG is not necessary the machine stack pointer.
- The memory bytes between SLJIT_LOCALS_REG (inclusive) and
- SLJIT_LOCALS_REG + local_size (exclusive) can be modified freely
- until the function returns. The stack space is uninitialized.
+ The executable code is a function call from the viewpoint of the C
+ language. The function calls must obey to the ABI (Application
+ Binary Interface) of the platform, which specify the purpose of
+ all machine registers and stack handling among other things. The
+ sljit_emit_enter function emits the necessary instructions for
+ setting up a new context for the executable code and moves function
+ arguments to the saved registers. The number of sljit_sw arguments
+ passed to the function are specified in the "args" parameter. The
+ number of arguments must be less than or equal to 3. The first
+ argument goes to SLJIT_S0, the second goes to SLJIT_S1 and so on.
+ The register set used by the function must be declared as well.
+ The number of scratch and saved registers used by the function must
+ be passed to sljit_emit_enter. Only R registers between R0 and
+ "scratches" argument can be used later. E.g. if "scratches" is set
+ to 2, the register set will be limited to R0 and R1. The S registers
+ and the floating point registers ("fscratches" and "fsaveds")
+ are specified in a similar way. The sljit_emit_enter is also capable
+ of allocating a stack space for local variables. The "local_size"
+ argument contains the size in bytes of this local area and its
+ staring address is stored in SLJIT_SP. The memory area between
+ SLJIT_SP (inclusive) and SLJIT_SP + local_size (exclusive) can be
+ modified freely until the function returns. The stack space is not
+ initialized.

+   Note: the following conditions must met:
+         0 <= scratches <= SLJIT_NUMBER_OF_REGISTERS
+         0 <= saveds <= SLJIT_NUMBER_OF_REGISTERS
+         scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS
+         0 <= fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS
+         0 <= fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS
+         fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS
+
    Note: every call of sljit_emit_enter and sljit_set_context
-         overwrites the previous context. */
+         overwrites the previous context.
+*/


 #define SLJIT_MAX_LOCAL_SIZE    65536


 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
-    sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size);
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size);


 /* The machine code has a context (which contains the local stack space size,
    number of used registers, etc.) which initialized by sljit_emit_enter. Several
    functions (like sljit_emit_return) requres this context to be able to generate
    the appropriate code. However, some code fragments (like inline cache) may have
-   no normal entry point so their context is unknown for the compiler. Using the
-   function below we can specify their context.
+   no normal entry point so their context is unknown for the compiler. Their context
+   can be provided to the compiler by the sljit_set_context function.


    Note: every call of sljit_emit_enter and sljit_set_context overwrites
          the previous context. */


 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler,
-    sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size);
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size);


 /* Return from machine code.  The op argument can be SLJIT_UNUSED which means the
    function does not return with anything or any opcode between SLJIT_MOV and
@@ -558,23 +656,23 @@
          it can even decrease the runtime in a few cases. */
 #define SLJIT_NOP            1
 /* Flags: - (may destroy flags)
-   Unsigned multiplication of SLJIT_SCRATCH_REG1 and SLJIT_SCRATCH_REG2.
-   Result goes to SLJIT_SCRATCH_REG2:SLJIT_SCRATCH_REG1 (high:low) word */
+   Unsigned multiplication of SLJIT_R0 and SLJIT_R1.
+   Result goes to SLJIT_R1:SLJIT_R0 (high:low) word */
 #define SLJIT_UMUL            2
 /* Flags: - (may destroy flags)
-   Signed multiplication of SLJIT_SCRATCH_REG1 and SLJIT_SCRATCH_REG2.
-   Result goes to SLJIT_SCRATCH_REG2:SLJIT_SCRATCH_REG1 (high:low) word */
+   Signed multiplication of SLJIT_R0 and SLJIT_R1.
+   Result goes to SLJIT_R1:SLJIT_R0 (high:low) word */
 #define SLJIT_SMUL            3
 /* Flags: I - (may destroy flags)
-   Unsigned divide of the value in SLJIT_SCRATCH_REG1 by the value in SLJIT_SCRATCH_REG2.
-   The result is placed in SLJIT_SCRATCH_REG1 and the remainder goes to SLJIT_SCRATCH_REG2.
-   Note: if SLJIT_SCRATCH_REG2 contains 0, the behaviour is undefined. */
+   Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1.
+   The result is placed in SLJIT_R0 and the remainder goes to SLJIT_R1.
+   Note: if SLJIT_R1 contains 0, the behaviour is undefined. */
 #define SLJIT_UDIV            4
 #define SLJIT_IUDIV            (SLJIT_UDIV | SLJIT_INT_OP)
 /* Flags: I - (may destroy flags)
-   Signed divide of the value in SLJIT_SCRATCH_REG1 by the value in SLJIT_SCRATCH_REG2.
-   The result is placed in SLJIT_SCRATCH_REG1 and the remainder goes to SLJIT_SCRATCH_REG2.
-   Note: if SLJIT_SCRATCH_REG2 contains 0, the behaviour is undefined. */
+   Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1.
+   The result is placed in SLJIT_R0 and the remainder goes to SLJIT_R1.
+   Note: if SLJIT_R1 contains 0, the behaviour is undefined. */
 #define SLJIT_SDIV            5
 #define SLJIT_ISDIV            (SLJIT_SDIV | SLJIT_INT_OP)


@@ -906,7 +1004,7 @@
     sljit_si src, sljit_sw srcw,
     sljit_si type);


-/* Copies the base address of SLJIT_LOCALS_REG+offset to dst.
+/* Copies the base address of SLJIT_SP + offset to dst.
    Flags: - (never set any flags) */
 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset);



Modified: code/trunk/sljit/sljitNativeARM_32.c
===================================================================
--- code/trunk/sljit/sljitNativeARM_32.c    2014-06-19 07:51:39 UTC (rev 1490)
+++ code/trunk/sljit/sljitNativeARM_32.c    2014-07-07 07:11:16 UTC (rev 1491)
@@ -36,13 +36,13 @@
 }


 /* Last register + 1. */
-#define TMP_REG1    (SLJIT_NO_REGISTERS + 1)
-#define TMP_REG2    (SLJIT_NO_REGISTERS + 2)
-#define TMP_REG3    (SLJIT_NO_REGISTERS + 3)
-#define TMP_PC        (SLJIT_NO_REGISTERS + 4)
+#define TMP_REG1    (SLJIT_NUMBER_OF_REGISTERS + 2)
+#define TMP_REG2    (SLJIT_NUMBER_OF_REGISTERS + 3)
+#define TMP_REG3    (SLJIT_NUMBER_OF_REGISTERS + 4)
+#define TMP_PC        (SLJIT_NUMBER_OF_REGISTERS + 5)


 #define TMP_FREG1    (0)
-#define TMP_FREG2    (SLJIT_FLOAT_REG6 + 1)
+#define TMP_FREG2    (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)


 /* In ARM instruction words.
    Cache lines are usually 32 byte aligned. */
@@ -55,8 +55,8 @@
     (((max_diff) / (sljit_si)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1))


 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
-static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
-    0, 0, 1, 2, 10, 11, 4, 5, 6, 7, 8, 13, 3, 12, 14, 15
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
+    0, 0, 1, 2, 11, 10, 9, 8, 7, 6, 5, 4, 13, 3, 12, 14, 15
 };


 #define RM(rm) (reg_map[rm])
@@ -823,16 +823,20 @@
     sljit_si src1, sljit_sw src1w,
     sljit_si src2, sljit_sw src2w);


-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
-    sljit_si size;
+    sljit_si size, i, tmp;
     sljit_uw push;


     CHECK_ERROR();
-    check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
+    check_sljit_emit_enter(compiler, args, scratches, saveds, fscratches, fsaveds, local_size);


     compiler->scratches = scratches;
     compiler->saveds = saveds;
+    compiler->fscratches = fscratches;
+    compiler->fsaveds = fsaveds;
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
     compiler->logical_local_size = local_size;
 #endif
@@ -840,67 +844,57 @@
     /* Push saved registers, temporary registers
        stmdb sp!, {..., lr} */
     push = PUSH | (1 << 14);
-    if (scratches >= 5)
-        push |= 1 << 11;
-    if (scratches >= 4)
-        push |= 1 << 10;
-    if (saveds >= 5)
-        push |= 1 << 8;
-    if (saveds >= 4)
-        push |= 1 << 7;
-    if (saveds >= 3)
-        push |= 1 << 6;
-    if (saveds >= 2)
-        push |= 1 << 5;
-    if (saveds >= 1)
-        push |= 1 << 4;
+
+    tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
+    for (i = SLJIT_S0; i >= tmp; i--)
+        push |= 1 << reg_map[i];
+
+    for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
+        push |= 1 << reg_map[i];
+
     FAIL_IF(push_inst(compiler, push));


     /* Stack must be aligned to 8 bytes: */
-    size = (1 + saveds) * sizeof(sljit_uw);
-    if (scratches >= 4)
-        size += (scratches - 3) * sizeof(sljit_uw);
-    local_size += size;
-    local_size = (local_size + 7) & ~7;
-    local_size -= size;
+    size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
+    local_size = ((size + local_size + 7) & ~7) - size;
     compiler->local_size = local_size;
     if (local_size > 0)
-        FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size));
+        FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));


     if (args >= 1)
-        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG1, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG1))));
+        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S0, SLJIT_UNUSED, RM(SLJIT_R0))));
     if (args >= 2)
-        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG2, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG2))));
+        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S1, SLJIT_UNUSED, RM(SLJIT_R1))));
     if (args >= 3)
-        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG3, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG3))));
+        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S2, SLJIT_UNUSED, RM(SLJIT_R2))));


     return SLJIT_SUCCESS;
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler,
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
     sljit_si size;


     CHECK_ERROR_VOID();
-    check_sljit_set_context(compiler, args, scratches, saveds, local_size);
+    check_sljit_set_context(compiler, args, scratches, saveds, fscratches, fsaveds, local_size);


     compiler->scratches = scratches;
     compiler->saveds = saveds;
+    compiler->fscratches = fscratches;
+    compiler->fsaveds = fsaveds;
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
     compiler->logical_local_size = local_size;
 #endif


-    size = (1 + saveds) * sizeof(sljit_uw);
-    if (scratches >= 4)
-        size += (scratches - 3) * sizeof(sljit_uw);
-    local_size += size;
-    local_size = (local_size + 7) & ~7;
-    local_size -= size;
-    compiler->local_size = local_size;
+    size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
+    compiler->local_size = ((size + local_size + 7) & ~7) - size;
 }


 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
 {
+    sljit_si i, tmp;
     sljit_uw pop;


     CHECK_ERROR();
@@ -909,26 +903,19 @@
     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));


     if (compiler->local_size > 0)
-        FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, compiler->local_size));
+        FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));


-    pop = POP | (1 << 15);
     /* Push saved registers, temporary registers
        ldmia sp!, {..., pc} */
-    if (compiler->scratches >= 5)
-        pop |= 1 << 11;
-    if (compiler->scratches >= 4)
-        pop |= 1 << 10;
-    if (compiler->saveds >= 5)
-        pop |= 1 << 8;
-    if (compiler->saveds >= 4)
-        pop |= 1 << 7;
-    if (compiler->saveds >= 3)
-        pop |= 1 << 6;
-    if (compiler->saveds >= 2)
-        pop |= 1 << 5;
-    if (compiler->saveds >= 1)
-        pop |= 1 << 4;
+    pop = POP | (1 << 15);


+    tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
+    for (i = SLJIT_S0; i >= tmp; i--)
+        pop |= 1 << reg_map[i];
+
+    for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
+        pop |= 1 << reg_map[i];
+
     return push_inst(compiler, pop);
 }


@@ -1845,16 +1832,16 @@
     case SLJIT_SMUL:
 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
         return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL)
-            | (reg_map[SLJIT_SCRATCH_REG2] << 16)
-            | (reg_map[SLJIT_SCRATCH_REG1] << 12)
-            | (reg_map[SLJIT_SCRATCH_REG1] << 8)
-            | reg_map[SLJIT_SCRATCH_REG2]);
+            | (reg_map[SLJIT_R1] << 16)
+            | (reg_map[SLJIT_R0] << 12)
+            | (reg_map[SLJIT_R0] << 8)
+            | reg_map[SLJIT_R1]);
 #else
-        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG2))));
+        FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_R1))));
         return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL)
-            | (reg_map[SLJIT_SCRATCH_REG2] << 16)
-            | (reg_map[SLJIT_SCRATCH_REG1] << 12)
-            | (reg_map[SLJIT_SCRATCH_REG1] << 8)
+            | (reg_map[SLJIT_R1] << 16)
+            | (reg_map[SLJIT_R0] << 12)
+            | (reg_map[SLJIT_R0] << 8)
             | reg_map[TMP_REG1]);
 #endif
     case SLJIT_UDIV:


Modified: code/trunk/sljit/sljitNativeARM_64.c
===================================================================
--- code/trunk/sljit/sljitNativeARM_64.c    2014-06-19 07:51:39 UTC (rev 1490)
+++ code/trunk/sljit/sljitNativeARM_64.c    2014-07-07 07:11:16 UTC (rev 1491)
@@ -34,18 +34,18 @@


 #define TMP_ZERO    0


-#define TMP_REG1    (SLJIT_NO_REGISTERS + 1)
-#define TMP_REG2    (SLJIT_NO_REGISTERS + 2)
-#define TMP_REG3    (SLJIT_NO_REGISTERS + 3)
-#define TMP_REG4    (SLJIT_NO_REGISTERS + 4)
-#define TMP_LR        (SLJIT_NO_REGISTERS + 5)
-#define TMP_SP        (SLJIT_NO_REGISTERS + 6)
+#define TMP_REG1    (SLJIT_NUMBER_OF_REGISTERS + 2)
+#define TMP_REG2    (SLJIT_NUMBER_OF_REGISTERS + 3)
+#define TMP_REG3    (SLJIT_NUMBER_OF_REGISTERS + 4)
+#define TMP_REG4    (SLJIT_NUMBER_OF_REGISTERS + 5)
+#define TMP_LR        (SLJIT_NUMBER_OF_REGISTERS + 6)
+#define TMP_SP        (SLJIT_NUMBER_OF_REGISTERS + 7)


 #define TMP_FREG1    (0)
-#define TMP_FREG2    (SLJIT_FLOAT_REG6 + 1)
+#define TMP_FREG2    (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)


-static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 7] = {
- 31, 0, 1, 2, 3, 4, 19, 20, 21, 22, 23, 29, 9, 10, 11, 12, 30, 31
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = {
+ 31, 0, 1, 2, 3, 4, 5, 6, 7, 13, 14, 15, 16, 17, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 11, 12, 30, 31
};

 #define W_OP (1 << 31)
@@ -1061,17 +1061,23 @@
 /*  Entry, exit                                                          */
 /* --------------------------------------------------------------------- */


-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
+    sljit_si i, tmp, offs, prev;
+
     CHECK_ERROR();
-    check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
+    check_sljit_emit_enter(compiler, args, scratches, saveds, fscratches, fsaveds, local_size);


     compiler->scratches = scratches;
     compiler->saveds = saveds;
+    compiler->fscratches = fscratches;
+    compiler->fsaveds = fsaveds;
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
     compiler->logical_local_size = local_size;
 #endif
-    compiler->locals_offset = (2 + saveds) * sizeof(sljit_sw);
+    compiler->locals_offset = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
     local_size = (compiler->locals_offset + local_size + 15) & ~15;
     compiler->local_size = local_size;


@@ -1089,65 +1095,98 @@
         FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR) | RN(TMP_SP) | (0x40 << 15)));
     }


-    FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_LOCALS_REG) | RN(TMP_SP)));
+    FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP)));


-    if (saveds >= 2)
-        FAIL_IF(push_inst(compiler, STP | RT(SLJIT_SAVED_REG1) | RT2(SLJIT_SAVED_REG2) | RN(TMP_SP) | (2 << 15)));
-    if (saveds >= 4)
-        FAIL_IF(push_inst(compiler, STP | RT(SLJIT_SAVED_REG3) | RT2(SLJIT_SAVED_EREG1) | RN(TMP_SP) | (4 << 15)));
-    if (saveds == 1)
-        FAIL_IF(push_inst(compiler, STRI | RT(SLJIT_SAVED_REG1) | RN(TMP_SP) | (2 << 10)));
-    if (saveds == 3)
-        FAIL_IF(push_inst(compiler, STRI | RT(SLJIT_SAVED_REG3) | RN(TMP_SP) | (4 << 10)));
-    if (saveds == 5)
-        FAIL_IF(push_inst(compiler, STRI | RT(SLJIT_SAVED_EREG2) | RN(TMP_SP) | (6 << 10)));
+    tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
+    offs = 2 << 15;
+    prev = -1;
+    for (i = SLJIT_S0; i >= tmp; i--) {
+        if (prev == -1) {
+            prev = i;
+            continue;
+        }
+        FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+        offs += 2 << 15;
+        prev = -1;
+    }


+    for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
+        if (prev == -1) {
+            prev = i;
+            continue;
+        }
+        FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+        offs += 2 << 15;
+        prev = -1;
+    }
+
+    if (prev != -1)
+        FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(TMP_SP) | (offs >> 5)));
+
     if (args >= 1)
-        FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_SAVED_REG1) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG1)));
+        FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0) | RN(TMP_ZERO) | RM(SLJIT_R0)));
     if (args >= 2)
-        FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_SAVED_REG2) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG2)));
+        FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S1) | RN(TMP_ZERO) | RM(SLJIT_R1)));
     if (args >= 3)
-        FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_SAVED_REG3) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG3)));
+        FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2)));


     return SLJIT_SUCCESS;
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler,
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
     CHECK_ERROR_VOID();
-    check_sljit_set_context(compiler, args, scratches, saveds, local_size);
+    check_sljit_set_context(compiler, args, scratches, saveds, fscratches, fsaveds, local_size);


     compiler->scratches = scratches;
     compiler->saveds = saveds;
+    compiler->fscratches = fscratches;
+    compiler->fsaveds = fsaveds;
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
     compiler->logical_local_size = local_size;
 #endif
-    compiler->locals_offset = (2 + saveds) * sizeof(sljit_sw);
+    compiler->locals_offset = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
     compiler->local_size = (compiler->locals_offset + local_size + 15) & ~15;
 }


 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
 {
-    sljit_si saveds, local_size;
+    sljit_si local_size;
+    sljit_si i, tmp, offs, prev;


     CHECK_ERROR();
     check_sljit_emit_return(compiler, op, src, srcw);


     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));


-    saveds = compiler->saveds;
+    tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
+    offs = 2 << 15;
+    prev = -1;
+    for (i = SLJIT_S0; i >= tmp; i--) {
+        if (prev == -1) {
+            prev = i;
+            continue;
+        }
+        FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+        offs += 2 << 15;
+        prev = -1;
+    }


-    if (saveds >= 2)
-        FAIL_IF(push_inst(compiler, LDP | RT(SLJIT_SAVED_REG1) | RT2(SLJIT_SAVED_REG2) | RN(TMP_SP) | (2 << 15)));
-    if (saveds >= 4)
-        FAIL_IF(push_inst(compiler, LDP | RT(SLJIT_SAVED_REG3) | RT2(SLJIT_SAVED_EREG1) | RN(TMP_SP) | (4 << 15)));
-    if (saveds == 1)
-        FAIL_IF(push_inst(compiler, LDRI | RT(SLJIT_SAVED_REG1) | RN(TMP_SP) | (2 << 10)));
-    if (saveds == 3)
-        FAIL_IF(push_inst(compiler, LDRI | RT(SLJIT_SAVED_REG3) | RN(TMP_SP) | (4 << 10)));
-    if (saveds == 5)
-        FAIL_IF(push_inst(compiler, LDRI | RT(SLJIT_SAVED_EREG2) | RN(TMP_SP) | (6 << 10)));
+    for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
+        if (prev == -1) {
+            prev = i;
+            continue;
+        }
+        FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+        offs += 2 << 15;
+        prev = -1;
+    }


+    if (prev != -1)
+        FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(TMP_SP) | (offs >> 5)));
+
     local_size = compiler->local_size;


     if (local_size <= (62 << 3))
@@ -1187,15 +1226,15 @@
         return push_inst(compiler, NOP);
     case SLJIT_UMUL:
     case SLJIT_SMUL:
-        FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG1)));
-        FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_SCRATCH_REG1) | RN(SLJIT_SCRATCH_REG1) | RM(SLJIT_SCRATCH_REG2) | RT2(TMP_ZERO)));
-        return push_inst(compiler, (op == SLJIT_SMUL ? SMULH : UMULH) | RD(SLJIT_SCRATCH_REG2) | RN(TMP_REG1) | RM(SLJIT_SCRATCH_REG2));
+        FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0)));
+        FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO)));
+        return push_inst(compiler, (op == SLJIT_SMUL ? SMULH : UMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1));
     case SLJIT_UDIV:
     case SLJIT_SDIV:
-        FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG1)));
-        FAIL_IF(push_inst(compiler, ((op == SLJIT_SDIV ? SDIV : UDIV) ^ inv_bits) | RD(SLJIT_SCRATCH_REG1) | RN(SLJIT_SCRATCH_REG1) | RM(SLJIT_SCRATCH_REG2)));
-        FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_SCRATCH_REG2) | RN(SLJIT_SCRATCH_REG1) | RM(SLJIT_SCRATCH_REG2) | RT2(TMP_ZERO)));
-        return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_SCRATCH_REG2) | RN(TMP_REG1) | RM(SLJIT_SCRATCH_REG2));
+        FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0)));
+        FAIL_IF(push_inst(compiler, ((op == SLJIT_SDIV ? SDIV : UDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)));
+        FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO)));
+        return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1));
     }


     return SLJIT_SUCCESS;


Modified: code/trunk/sljit/sljitNativeARM_T2_32.c
===================================================================
--- code/trunk/sljit/sljitNativeARM_T2_32.c    2014-06-19 07:51:39 UTC (rev 1490)
+++ code/trunk/sljit/sljitNativeARM_T2_32.c    2014-07-07 07:11:16 UTC (rev 1491)
@@ -33,17 +33,17 @@
 typedef sljit_ui sljit_ins;


 /* Last register + 1. */
-#define TMP_REG1    (SLJIT_NO_REGISTERS + 1)
-#define TMP_REG2    (SLJIT_NO_REGISTERS + 2)
-#define TMP_REG3    (SLJIT_NO_REGISTERS + 3)
-#define TMP_PC        (SLJIT_NO_REGISTERS + 4)
+#define TMP_REG1    (SLJIT_NUMBER_OF_REGISTERS + 2)
+#define TMP_REG2    (SLJIT_NUMBER_OF_REGISTERS + 3)
+#define TMP_REG3    (SLJIT_NUMBER_OF_REGISTERS + 4)
+#define TMP_PC        (SLJIT_NUMBER_OF_REGISTERS + 5)


 #define TMP_FREG1    (0)
-#define TMP_FREG2    (SLJIT_FLOAT_REG6 + 1)
+#define TMP_FREG2    (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)


 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
-static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
-    0, 0, 1, 2, 12, 5, 6, 7, 8, 10, 11, 13, 3, 4, 14, 15
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
+    0, 0, 1, 2, 12, 11, 10, 9, 8, 7, 6, 5, 13, 3, 4, 14, 15
 };


 #define COPY_BITS(src, from, to, bits) \
@@ -138,9 +138,9 @@
 #define ORRI        0xf0400000
 #define ORRS        0x4300
 #define ORR_W        0xea400000
-#define POP        0xbd00
+#define POP        0xbc00
 #define POP_W        0xe8bd0000
-#define PUSH        0xb500
+#define PUSH        0xb400
 #define PUSH_W        0xe92d0000
 #define RSB_WI        0xf1c00000
 #define RSBSI        0x4240
@@ -960,7 +960,7 @@
     }


     /* SP based immediate. */
-    if (SLJIT_UNLIKELY(arg == SLJIT_LOCALS_REG) && OFFSET_CHECK(0xff, 2) && IS_WORD_SIZE(flags) && reg_map[reg] <= 7) {
+    if (SLJIT_UNLIKELY(arg == SLJIT_SP) && OFFSET_CHECK(0xff, 2) && IS_WORD_SIZE(flags) && reg_map[reg] <= 7) {
         FAIL_IF(push_inst16(compiler, STR_SP | ((flags & STORE) ? 0 : 0x800) | RDN3(reg) | (argw >> 2)));
         return -1;
     }
@@ -1127,82 +1127,82 @@
 /*  Entry, exit                                                          */
 /* --------------------------------------------------------------------- */


-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
-    sljit_si size;
+    sljit_si size, i, tmp;
     sljit_ins push;


     CHECK_ERROR();
-    check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
+    check_sljit_emit_enter(compiler, args, scratches, saveds, fscratches, fsaveds, local_size);


     compiler->scratches = scratches;
     compiler->saveds = saveds;
+    compiler->fscratches = fscratches;
+    compiler->fsaveds = fsaveds;
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
     compiler->logical_local_size = local_size;
 #endif


     push = (1 << 4);
-    if (saveds >= 5)
-        push |= 1 << 11;
-    if (saveds >= 4)
-        push |= 1 << 10;
-    if (saveds >= 3)
-        push |= 1 << 8;
-    if (saveds >= 2)
-        push |= 1 << 7;
-    if (saveds >= 1)
-        push |= 1 << 6;
-        if (scratches >= 5)
-        push |= 1 << 5;
-    FAIL_IF(saveds >= 3
+
+    tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
+    for (i = SLJIT_S0; i >= tmp; i--)
+        push |= 1 << reg_map[i];
+
+    for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
+        push |= 1 << reg_map[i];
+
+    FAIL_IF((push & 0xff00)
         ? push_inst32(compiler, PUSH_W | (1 << 14) | push)
-        : push_inst16(compiler, PUSH | push));
+        : push_inst16(compiler, PUSH | (1 << 8) | push));


-    /* Stack must be aligned to 8 bytes: */
-    size = (3 + saveds) * sizeof(sljit_uw);
-    local_size += size;
-    local_size = (local_size + 7) & ~7;
-    local_size -= size;
+    /* Stack must be aligned to 8 bytes: (LR, R4) */
+    size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
+    local_size = ((size + local_size + 7) & ~7) - size;
     compiler->local_size = local_size;
     if (local_size > 0) {
         if (local_size <= (127 << 2))
             FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2)));
         else
-            FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_LOCALS_REG, SLJIT_LOCALS_REG, local_size));
+            FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, local_size));
     }


     if (args >= 1)
-        FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG1, SLJIT_SCRATCH_REG1)));
+        FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0, SLJIT_R0)));
     if (args >= 2)
-        FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG2, SLJIT_SCRATCH_REG2)));
+        FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S1, SLJIT_R1)));
     if (args >= 3)
-        FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG3, SLJIT_SCRATCH_REG3)));
+        FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S2, SLJIT_R2)));


     return SLJIT_SUCCESS;
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler,
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
     sljit_si size;


     CHECK_ERROR_VOID();
-    check_sljit_set_context(compiler, args, scratches, saveds, local_size);
+    check_sljit_set_context(compiler, args, scratches, saveds, fscratches, fsaveds, local_size);


     compiler->scratches = scratches;
     compiler->saveds = saveds;
+    compiler->fscratches = fscratches;
+    compiler->fsaveds = fsaveds;
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
     compiler->logical_local_size = local_size;
 #endif


-    size = (3 + saveds) * sizeof(sljit_uw);
-    local_size += size;
-    local_size = (local_size + 7) & ~7;
-    local_size -= size;
-    compiler->local_size = local_size;
+    size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
+    compiler->local_size = ((size + local_size + 7) & ~7) - size;
 }


 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
 {
+    sljit_si i, tmp;
     sljit_ins pop;


     CHECK_ERROR();
@@ -1214,25 +1214,21 @@
         if (compiler->local_size <= (127 << 2))
             FAIL_IF(push_inst16(compiler, ADD_SP | (compiler->local_size >> 2)));
         else
-            FAIL_IF(emit_op_imm(compiler, SLJIT_ADD | ARG2_IMM, SLJIT_LOCALS_REG, SLJIT_LOCALS_REG, compiler->local_size));
+            FAIL_IF(emit_op_imm(compiler, SLJIT_ADD | ARG2_IMM, SLJIT_SP, SLJIT_SP, compiler->local_size));
     }


     pop = (1 << 4);
-    if (compiler->saveds >= 5)
-        pop |= 1 << 11;
-    if (compiler->saveds >= 4)
-        pop |= 1 << 10;
-    if (compiler->saveds >= 3)
-        pop |= 1 << 8;
-    if (compiler->saveds >= 2)
-        pop |= 1 << 7;
-    if (compiler->saveds >= 1)
-        pop |= 1 << 6;
-        if (compiler->scratches >= 5)
-        pop |= 1 << 5;
-    return compiler->saveds >= 3
+
+    tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
+    for (i = SLJIT_S0; i >= tmp; i--)
+        pop |= 1 << reg_map[i];
+
+    for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
+        pop |= 1 << reg_map[i];
+
+    return (pop & 0xff00)
         ? push_inst32(compiler, POP_W | (1 << 15) | pop)
-        : push_inst16(compiler, POP | pop);
+        : push_inst16(compiler, POP | (1 << 8) | pop);
 }


 /* --------------------------------------------------------------------- */
@@ -1268,10 +1264,10 @@
     case SLJIT_UMUL:
     case SLJIT_SMUL:
         return push_inst32(compiler, (op == SLJIT_UMUL ? UMULL : SMULL)
-            | (reg_map[SLJIT_SCRATCH_REG2] << 8)
-            | (reg_map[SLJIT_SCRATCH_REG1] << 12)
-            | (reg_map[SLJIT_SCRATCH_REG1] << 16)
-            | reg_map[SLJIT_SCRATCH_REG2]);
+            | (reg_map[SLJIT_R1] << 8)
+            | (reg_map[SLJIT_R0] << 12)
+            | (reg_map[SLJIT_R0] << 16)
+            | reg_map[SLJIT_R1]);
     case SLJIT_UDIV:
     case SLJIT_SDIV:
         if (compiler->scratches >= 4) {


Modified: code/trunk/sljit/sljitNativeMIPS_32.c
===================================================================
--- code/trunk/sljit/sljitNativeMIPS_32.c    2014-06-19 07:51:39 UTC (rev 1490)
+++ code/trunk/sljit/sljitNativeMIPS_32.c    2014-07-07 07:11:16 UTC (rev 1491)
@@ -154,9 +154,9 @@
         if (flags & SRC2_IMM) {
             if (op & SLJIT_SET_O) {
                 if (src2 >= 0)
-                    FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1));
+                    FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
                 else
-                    FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1));
+                    FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
             }
             if (op & SLJIT_SET_E)
                 FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
@@ -174,7 +174,7 @@
         }
         else {
             if (op & SLJIT_SET_O)
-                FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1));
+                FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
             if (op & SLJIT_SET_E)
                 FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
             if (op & (SLJIT_SET_C | SLJIT_SET_O))
@@ -189,8 +189,8 @@
             FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
         if (!(op & SLJIT_SET_O))
             return SLJIT_SUCCESS;
-        FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG));
-        FAIL_IF(push_inst(compiler, XOR | SA(TMP_EREG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+        FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
+        FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
         FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
         return push_inst(compiler, SLL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);


@@ -198,21 +198,21 @@
         if (flags & SRC2_IMM) {
             if (op & SLJIT_SET_C) {
                 if (src2 >= 0)
-                    FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1));
+                    FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
                 else {
-                    FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1));
-                    FAIL_IF(push_inst(compiler, OR | S(src1) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1));
+                    FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
+                    FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
                 }
             }
             FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst)));
         } else {
             if (op & SLJIT_SET_C)
-                FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1));
+                FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
             /* dst may be the same as src1 or src2. */
             FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst)));
         }
         if (op & SLJIT_SET_C)
-            FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1));
+            FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));


         FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
         if (!(op & SLJIT_SET_C))
@@ -221,7 +221,7 @@
         /* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */
         FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
         /* Set carry flag. */
-        return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(TMP_EREG1) | DA(ULESS_FLAG), ULESS_FLAG);
+        return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(OVERFLOW_FLAG) | DA(ULESS_FLAG), ULESS_FLAG);


     case SLJIT_SUB:
         if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) {
@@ -233,9 +233,9 @@
         if (flags & SRC2_IMM) {
             if (op & SLJIT_SET_O) {
                 if (src2 >= 0)
-                    FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1));
+                    FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
                 else
-                    FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1));
+                    FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
             }
             if (op & SLJIT_SET_E)
                 FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
@@ -247,7 +247,7 @@
         }
         else {
             if (op & SLJIT_SET_O)
-                FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1));
+                FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
             if (op & SLJIT_SET_E)
                 FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
             if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O))
@@ -265,8 +265,8 @@


         if (!(op & SLJIT_SET_O))
             return SLJIT_SUCCESS;
-        FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG));
-        FAIL_IF(push_inst(compiler, XOR | SA(TMP_EREG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+        FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
+        FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
         FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
         return push_inst(compiler, SRL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);


@@ -279,22 +279,22 @@

         if (flags & SRC2_IMM) {
             if (op & SLJIT_SET_C)
-                FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1));
+                FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
             /* dst may be the same as src1 or src2. */
             FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst)));
         }
         else {
             if (op & SLJIT_SET_C)
-                FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1));
+                FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
             /* dst may be the same as src1 or src2. */
             FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst)));
         }


         if (op & SLJIT_SET_C)
-            FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(TMP_EREG2), TMP_EREG2));
+            FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(LESS_FLAG), LESS_FLAG));


         FAIL_IF(push_inst(compiler, SUBU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
-        return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(TMP_EREG1) | TA(TMP_EREG2) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS;
+        return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(OVERFLOW_FLAG) | TA(LESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS;


     case SLJIT_MUL:
         SLJIT_ASSERT(!(flags & SRC2_IMM));
@@ -307,10 +307,10 @@
 #endif
         }
         FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
-        FAIL_IF(push_inst(compiler, MFHI | DA(TMP_EREG1), TMP_EREG1));
+        FAIL_IF(push_inst(compiler, MFHI | DA(ULESS_FLAG), ULESS_FLAG));
         FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
-        FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(TMP_EREG2) | SH_IMM(31), TMP_EREG2));
-        return push_inst(compiler, SUBU | SA(TMP_EREG1) | TA(TMP_EREG2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG);
+        FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(UGREATER_FLAG) | SH_IMM(31), UGREATER_FLAG));
+        return push_inst(compiler, SUBU | SA(ULESS_FLAG) | TA(UGREATER_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG);


     case SLJIT_AND:
         EMIT_LOGICAL(ANDI, AND);


Modified: code/trunk/sljit/sljitNativeMIPS_64.c
===================================================================
--- code/trunk/sljit/sljitNativeMIPS_64.c    2014-06-19 07:51:39 UTC (rev 1490)
+++ code/trunk/sljit/sljitNativeMIPS_64.c    2014-07-07 07:11:16 UTC (rev 1491)
@@ -246,9 +246,9 @@
         if (flags & SRC2_IMM) {
             if (op & SLJIT_SET_O) {
                 if (src2 >= 0)
-                    FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1));
+                    FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
                 else
-                    FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1));
+                    FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
             }
             if (op & SLJIT_SET_E)
                 FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
@@ -266,7 +266,7 @@
         }
         else {
             if (op & SLJIT_SET_O)
-                FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1));
+                FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
             if (op & SLJIT_SET_E)
                 FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
             if (op & (SLJIT_SET_C | SLJIT_SET_O))
@@ -281,8 +281,8 @@
             FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
         if (!(op & SLJIT_SET_O))
             return SLJIT_SUCCESS;
-        FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG));
-        FAIL_IF(push_inst(compiler, XOR | SA(TMP_EREG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+        FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
+        FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
         FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
         return push_inst(compiler, SELECT_OP(DSRL32, SLL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);


@@ -290,21 +290,21 @@
         if (flags & SRC2_IMM) {
             if (op & SLJIT_SET_C) {
                 if (src2 >= 0)
-                    FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1));
+                    FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
                 else {
-                    FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1));
-                    FAIL_IF(push_inst(compiler, OR | S(src1) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1));
+                    FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
+                    FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
                 }
             }
             FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst)));
         } else {
             if (op & SLJIT_SET_C)
-                FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1));
+                FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
             /* dst may be the same as src1 or src2. */
             FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst)));
         }
         if (op & SLJIT_SET_C)
-            FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1));
+            FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));


         FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
         if (!(op & SLJIT_SET_C))
@@ -313,7 +313,7 @@
         /* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */
         FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
         /* Set carry flag. */
-        return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(TMP_EREG1) | DA(ULESS_FLAG), ULESS_FLAG);
+        return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(OVERFLOW_FLAG) | DA(ULESS_FLAG), ULESS_FLAG);


     case SLJIT_SUB:
         if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) {
@@ -325,9 +325,9 @@
         if (flags & SRC2_IMM) {
             if (op & SLJIT_SET_O) {
                 if (src2 >= 0)
-                    FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1));
+                    FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
                 else
-                    FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1));
+                    FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
             }
             if (op & SLJIT_SET_E)
                 FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
@@ -339,7 +339,7 @@
         }
         else {
             if (op & SLJIT_SET_O)
-                FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1));
+                FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
             if (op & SLJIT_SET_E)
                 FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
             if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O))
@@ -357,8 +357,8 @@


         if (!(op & SLJIT_SET_O))
             return SLJIT_SUCCESS;
-        FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG));
-        FAIL_IF(push_inst(compiler, XOR | SA(TMP_EREG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+        FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
+        FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
         FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
         return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);


@@ -371,22 +371,22 @@

         if (flags & SRC2_IMM) {
             if (op & SLJIT_SET_C)
-                FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1));
+                FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
             /* dst may be the same as src1 or src2. */
             FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)));
         }
         else {
             if (op & SLJIT_SET_C)
-                FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1));
+                FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
             /* dst may be the same as src1 or src2. */
             FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)));
         }


         if (op & SLJIT_SET_C)
-            FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(TMP_EREG2), TMP_EREG2));
+            FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(LESS_FLAG), LESS_FLAG));


         FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
-        return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(TMP_EREG1) | TA(TMP_EREG2) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS;
+        return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(OVERFLOW_FLAG) | TA(LESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS;


     case SLJIT_MUL:
         SLJIT_ASSERT(!(flags & SRC2_IMM));
@@ -402,10 +402,10 @@
 #endif
         }
         FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS));
-        FAIL_IF(push_inst(compiler, MFHI | DA(TMP_EREG1), TMP_EREG1));
+        FAIL_IF(push_inst(compiler, MFHI | DA(ULESS_FLAG), ULESS_FLAG));
         FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
-        FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(TMP_EREG2) | SH_IMM(31), TMP_EREG2));
-        return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(TMP_EREG1) | TA(TMP_EREG2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG);
+        FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(UGREATER_FLAG) | SH_IMM(31), UGREATER_FLAG));
+        return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(ULESS_FLAG) | TA(UGREATER_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG);


     case SLJIT_AND:
         EMIT_LOGICAL(ANDI, AND);


Modified: code/trunk/sljit/sljitNativeMIPS_common.c
===================================================================
--- code/trunk/sljit/sljitNativeMIPS_common.c    2014-06-19 07:51:39 UTC (rev 1490)
+++ code/trunk/sljit/sljitNativeMIPS_common.c    2014-07-07 07:11:16 UTC (rev 1491)
@@ -40,35 +40,32 @@
    Both for mips-32 and mips-64 */
 typedef sljit_ui sljit_ins;


-#define TMP_REG1    (SLJIT_NO_REGISTERS + 1)
-#define TMP_REG2    (SLJIT_NO_REGISTERS + 2)
-#define TMP_REG3    (SLJIT_NO_REGISTERS + 3)
+#define TMP_REG1    (SLJIT_NUMBER_OF_REGISTERS + 2)
+#define TMP_REG2    (SLJIT_NUMBER_OF_REGISTERS + 3)
+#define TMP_REG3    (SLJIT_NUMBER_OF_REGISTERS + 4)


 /* For position independent code, t9 must contain the function address. */
 #define PIC_ADDR_REG    TMP_REG2


-/* TMP_EREGs are used mainly for arithmetic operations. */
-#define TMP_EREG1    15
-#define TMP_EREG2    24
 /* Floating point status register. */
 #define FCSR_REG    31
 /* Return address register. */
 #define RETURN_ADDR_REG    31


-/* Flags are keept in volatile registers. */
-#define EQUAL_FLAG    7
+/* Flags are kept in volatile registers. */
+#define EQUAL_FLAG    12
 /* And carry flag as well. */
-#define ULESS_FLAG    10
-#define UGREATER_FLAG    11
-#define LESS_FLAG    12
-#define GREATER_FLAG    13
-#define OVERFLOW_FLAG    14
+#define ULESS_FLAG    13
+#define UGREATER_FLAG    14
+#define LESS_FLAG    15
+#define GREATER_FLAG    31
+#define OVERFLOW_FLAG    1


 #define TMP_FREG1    (0)
-#define TMP_FREG2    ((SLJIT_FLOAT_REG6 + 1) << 1)
+#define TMP_FREG2    ((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) << 1)


-static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
-    0, 2, 5, 6, 3, 8, 16, 17, 18, 19, 20, 29, 4, 25, 9
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
+    0, 2, 5, 6, 7, 8, 9, 10, 11, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 3, 25, 4
 };


/* --------------------------------------------------------------------- */
@@ -537,20 +534,25 @@
#include "sljitNativeMIPS_64.c"
#endif

-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
     sljit_ins base;
+    sljit_si i, tmp, offs;


     CHECK_ERROR();
-    check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
+    check_sljit_emit_enter(compiler, args, scratches, saveds, fscratches, fsaveds, local_size);


     compiler->scratches = scratches;
     compiler->saveds = saveds;
+    compiler->fscratches = fscratches;
+    compiler->fsaveds = fsaveds;
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
     compiler->logical_local_size = local_size;
 #endif


-    local_size += ((saveds + 1) * sizeof(sljit_sw)) + FIXED_LOCALS_OFFSET;
+    local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + FIXED_LOCALS_OFFSET;
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
     local_size = (local_size + 15) & ~0xf;
 #else
@@ -560,51 +562,57 @@


     if (local_size <= SIMM_MAX) {
         /* Frequent case. */
-        FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_LOCALS_REG) | T(SLJIT_LOCALS_REG) | IMM(-local_size), DR(SLJIT_LOCALS_REG)));
-        base = S(SLJIT_LOCALS_REG);
+        FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-local_size), DR(SLJIT_SP)));
+        base = S(SLJIT_SP);
     }
     else {
         FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size));
-        FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_LOCALS_REG) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
-        FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_LOCALS_REG) | T(TMP_REG1) | D(SLJIT_LOCALS_REG), DR(SLJIT_LOCALS_REG)));
+        FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
+        FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(TMP_REG1) | D(SLJIT_SP), DR(SLJIT_SP)));
         base = S(TMP_REG2);
         local_size = 0;
     }


-    FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(local_size - 1 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS));
-    if (saveds >= 1)
-        FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG1) | IMM(local_size - 2 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS));
-    if (saveds >= 2)
-        FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG2) | IMM(local_size - 3 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS));
-    if (saveds >= 3)
-        FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG3) | IMM(local_size - 4 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS));
-    if (saveds >= 4)
-        FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_EREG1) | IMM(local_size - 5 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS));
-    if (saveds >= 5)
-        FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_EREG2) | IMM(local_size - 6 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS));
+    offs = local_size - (sljit_sw)(sizeof(sljit_sw));
+    FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(offs), MOVABLE_INS));


+    tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
+    for (i = SLJIT_S0; i >= tmp; i--) {
+        offs -= (sljit_si)(sizeof(sljit_sw));
+        FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offs), MOVABLE_INS));
+    }
+
+    for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
+        offs -= (sljit_si)(sizeof(sljit_sw));
+        FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offs), MOVABLE_INS));
+    }
+
     if (args >= 1)
-        FAIL_IF(push_inst(compiler, ADDU_W | SA(4) | TA(0) | D(SLJIT_SAVED_REG1), DR(SLJIT_SAVED_REG1)));
+        FAIL_IF(push_inst(compiler, ADDU_W | SA(4) | TA(0) | D(SLJIT_S0), DR(SLJIT_S0)));
     if (args >= 2)
-        FAIL_IF(push_inst(compiler, ADDU_W | SA(5) | TA(0) | D(SLJIT_SAVED_REG2), DR(SLJIT_SAVED_REG2)));
+        FAIL_IF(push_inst(compiler, ADDU_W | SA(5) | TA(0) | D(SLJIT_S1), DR(SLJIT_S1)));
     if (args >= 3)
-        FAIL_IF(push_inst(compiler, ADDU_W | SA(6) | TA(0) | D(SLJIT_SAVED_REG3), DR(SLJIT_SAVED_REG3)));
+        FAIL_IF(push_inst(compiler, ADDU_W | SA(6) | TA(0) | D(SLJIT_S2), DR(SLJIT_S2)));


     return SLJIT_SUCCESS;
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler,
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
     CHECK_ERROR_VOID();
-    check_sljit_set_context(compiler, args, scratches, saveds, local_size);
+    check_sljit_set_context(compiler, args, scratches, saveds, fscratches, fsaveds, local_size);


     compiler->scratches = scratches;
     compiler->saveds = saveds;
+    compiler->fscratches = fscratches;
+    compiler->fsaveds = fsaveds;
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
     compiler->logical_local_size = local_size;
 #endif


-    local_size += ((saveds + 1) * sizeof(sljit_sw)) + FIXED_LOCALS_OFFSET;
+    local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + FIXED_LOCALS_OFFSET;
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
     compiler->local_size = (local_size + 15) & ~0xf;
 #else
@@ -614,7 +622,7 @@


 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
 {
-    sljit_si local_size;
+    sljit_si local_size, i, tmp, offs;
     sljit_ins base;


     CHECK_ERROR();
@@ -624,31 +632,36 @@


     local_size = compiler->local_size;
     if (local_size <= SIMM_MAX)
-        base = S(SLJIT_LOCALS_REG);
+        base = S(SLJIT_SP);
     else {
         FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size));
-        FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_LOCALS_REG) | T(TMP_REG1) | D(TMP_REG1), DR(TMP_REG1)));
+        FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | T(TMP_REG1) | D(TMP_REG1), DR(TMP_REG1)));
         base = S(TMP_REG1);
         local_size = 0;
     }


-    FAIL_IF(push_inst(compiler, STACK_LOAD | base | TA(RETURN_ADDR_REG) | IMM(local_size - 1 * (sljit_si)sizeof(sljit_sw)), RETURN_ADDR_REG));
-    if (compiler->saveds >= 5)
-        FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_EREG2) | IMM(local_size - 6 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_EREG2)));
-    if (compiler->saveds >= 4)
-        FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_EREG1) | IMM(local_size - 5 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_EREG1)));
-    if (compiler->saveds >= 3)
-        FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG3) | IMM(local_size - 4 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_REG3)));
-    if (compiler->saveds >= 2)
-        FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG2) | IMM(local_size - 3 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_REG2)));
-    if (compiler->saveds >= 1)
-        FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG1) | IMM(local_size - 2 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_REG1)));
+    FAIL_IF(push_inst(compiler, STACK_LOAD | base | TA(RETURN_ADDR_REG) | IMM(local_size - (sljit_si)sizeof(sljit_sw)), RETURN_ADDR_REG));
+    offs = local_size - (sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1);


+    tmp = compiler->scratches;
+    for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
+        FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(i) | IMM(offs), DR(i)));
+        offs += (sljit_si)(sizeof(sljit_sw));
+    }
+
+    tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
+    for (i = tmp; i <= SLJIT_S0; i++) {
+        FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(i) | IMM(offs), DR(i)));
+        offs += (sljit_si)(sizeof(sljit_sw));
+    }
+
+    SLJIT_ASSERT(offs == local_size - (sljit_sw)(sizeof(sljit_sw)));
+
     FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS));
     if (compiler->local_size <= SIMM_MAX)
-        return push_inst(compiler, ADDIU_W | S(SLJIT_LOCALS_REG) | T(SLJIT_LOCALS_REG) | IMM(compiler->local_size), UNMOVABLE_INS);
+        return push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(compiler->local_size), UNMOVABLE_INS);
     else
-        return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_LOCALS_REG), UNMOVABLE_INS);
+        return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_SP), UNMOVABLE_INS);
 }


 #undef STACK_STORE
@@ -1043,12 +1056,12 @@
     case SLJIT_UMUL:
     case SLJIT_SMUL:
 #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
-        FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? DMULTU : DMULT) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS));
+        FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? DMULTU : DMULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
 #else
-        FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? MULTU : MULT) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS));
+        FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? MULTU : MULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
 #endif
-        FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_SCRATCH_REG1), DR(SLJIT_SCRATCH_REG1)));
-        return push_inst(compiler, MFHI | D(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2));
+        FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0)));
+        return push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1));
     case SLJIT_UDIV:
     case SLJIT_SDIV:
 #if !(defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
@@ -1058,15 +1071,15 @@


 #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
         if (int_op)
-            FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVU : DIV) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS));
+            FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
         else
-            FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DDIVU : DDIV) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS));
+            FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DDIVU : DDIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
 #else
-        FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVU : DIV) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS));
+        FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
 #endif


-        FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_SCRATCH_REG1), DR(SLJIT_SCRATCH_REG1)));
-        return push_inst(compiler, MFHI | D(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2));
+        FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0)));
+        return push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1));
     }


     return SLJIT_SUCCESS;
@@ -1702,7 +1715,7 @@
         PTR_FAIL_IF(push_inst(compiler, JALR | S(TMP_REG2) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
         jump->addr = compiler->size;
         /* A NOP if type < CALL1. */
-        PTR_FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SCRATCH_REG1) | TA(0) | DA(4), UNMOVABLE_INS));
+        PTR_FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), UNMOVABLE_INS));
     }
     return jump;
 }
@@ -1963,12 +1976,12 @@
             }
             FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
             /* We need an extra instruction in any case. */
-            return push_inst(compiler, ADDU_W | S(SLJIT_SCRATCH_REG1) | TA(0) | DA(4), UNMOVABLE_INS);
+            return push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), UNMOVABLE_INS);
         }


         /* Register input. */
         if (type >= SLJIT_CALL1)
-            FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SCRATCH_REG1) | TA(0) | DA(4), 4));
+            FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), 4));
         FAIL_IF(push_inst(compiler, JALR | S(src_r) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
         return push_inst(compiler, ADDU_W | S(src_r) | TA(0) | D(PIC_ADDR_REG), UNMOVABLE_INS);
     }


Modified: code/trunk/sljit/sljitNativePPC_common.c
===================================================================
--- code/trunk/sljit/sljitNativePPC_common.c    2014-06-19 07:51:39 UTC (rev 1490)
+++ code/trunk/sljit/sljitNativePPC_common.c    2014-07-07 07:11:16 UTC (rev 1491)
@@ -87,22 +87,22 @@
 #endif /* _AIX */
 }


-#define TMP_REG1    (SLJIT_NO_REGISTERS + 1)
-#define TMP_REG2    (SLJIT_NO_REGISTERS + 2)
-#define TMP_REG3    (SLJIT_NO_REGISTERS + 3)
-#define TMP_ZERO    (SLJIT_NO_REGISTERS + 4)
+#define TMP_REG1    (SLJIT_NUMBER_OF_REGISTERS + 2)
+#define TMP_REG2    (SLJIT_NUMBER_OF_REGISTERS + 3)
+#define TMP_REG3    (SLJIT_NUMBER_OF_REGISTERS + 4)
+#define TMP_ZERO    (SLJIT_NUMBER_OF_REGISTERS + 5)


 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
-#define TMP_CALL_REG    (SLJIT_NO_REGISTERS + 5)
+#define TMP_CALL_REG    (SLJIT_NUMBER_OF_REGISTERS + 6)
 #else
 #define TMP_CALL_REG    TMP_REG2
 #endif


 #define TMP_FREG1    (0)
-#define TMP_FREG2    (SLJIT_FLOAT_REG6 + 1)
+#define TMP_FREG2    (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)


-static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = {
-    0, 3, 4, 5, 6, 7, 30, 29, 28, 27, 26, 1, 8, 9, 10, 31, 12
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
+    0, 3, 4, 5, 6, 7, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 8, 9, 10, 31, 12
 };


 /* --------------------------------------------------------------------- */
@@ -571,112 +571,135 @@
 #define STACK_LOAD    LD
 #endif


-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
+    sljit_si i, tmp, offs;
+
     CHECK_ERROR();
-    check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
+    check_sljit_emit_enter(compiler, args, scratches, saveds, fscratches, fsaveds, local_size);


     compiler->scratches = scratches;
     compiler->saveds = saveds;
+    compiler->fscratches = fscratches;
+    compiler->fsaveds = fsaveds;
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
     compiler->logical_local_size = local_size;
 #endif


     FAIL_IF(push_inst(compiler, MFLR | D(0)));
-    FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_LOCALS_REG) | IMM(-(sljit_si)(sizeof(sljit_sw))) ));
-    if (saveds >= 1)
-        FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG1) | A(SLJIT_LOCALS_REG) | IMM(-2 * (sljit_si)(sizeof(sljit_sw))) ));
-    if (saveds >= 2)
-        FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG2) | A(SLJIT_LOCALS_REG) | IMM(-3 * (sljit_si)(sizeof(sljit_sw))) ));
-    if (saveds >= 3)
-        FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG3) | A(SLJIT_LOCALS_REG) | IMM(-4 * (sljit_si)(sizeof(sljit_sw))) ));
-    if (saveds >= 4)
-        FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG1) | A(SLJIT_LOCALS_REG) | IMM(-5 * (sljit_si)(sizeof(sljit_sw))) ));
-    if (saveds >= 5)
-        FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG2) | A(SLJIT_LOCALS_REG) | IMM(-6 * (sljit_si)(sizeof(sljit_sw))) ));
+    offs = -(sljit_si)(sizeof(sljit_sw));
+    FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
+
+    tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
+    for (i = SLJIT_S0; i >= tmp; i--) {
+        offs -= (sljit_si)(sizeof(sljit_sw));
+        FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
+    }
+
+    for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
+        offs -= (sljit_si)(sizeof(sljit_sw));
+        FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
+    }
+
+    SLJIT_ASSERT(offs == -(sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1));
+
 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
-    FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_LOCALS_REG) | IMM(2 * sizeof(sljit_sw)) ));
+    FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
 #else
-    FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_LOCALS_REG) | IMM(sizeof(sljit_sw)) ));
+    FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
 #endif


     FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0));
     if (args >= 1)
-        FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(SLJIT_SAVED_REG1) | B(SLJIT_SCRATCH_REG1)));
+        FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(SLJIT_S0) | B(SLJIT_R0)));
     if (args >= 2)
-        FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG2) | A(SLJIT_SAVED_REG2) | B(SLJIT_SCRATCH_REG2)));
+        FAIL_IF(push_inst(compiler, OR | S(SLJIT_R1) | A(SLJIT_S1) | B(SLJIT_R1)));
     if (args >= 3)
-        FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG3) | A(SLJIT_SAVED_REG3) | B(SLJIT_SCRATCH_REG3)));
+        FAIL_IF(push_inst(compiler, OR | S(SLJIT_R2) | A(SLJIT_S2) | B(SLJIT_R2)));


-    local_size += ((1 + saveds) * sizeof(sljit_sw)) + FIXED_LOCALS_OFFSET;
+    local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + FIXED_LOCALS_OFFSET;
     local_size = (local_size + 15) & ~0xf;
     compiler->local_size = local_size;


 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
     if (local_size <= SIMM_MAX)
-        FAIL_IF(push_inst(compiler, STWU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-local_size)));
+        FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
     else {
         FAIL_IF(load_immediate(compiler, 0, -local_size));
-        FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0)));
+        FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
     }
 #else
     if (local_size <= SIMM_MAX)
-        FAIL_IF(push_inst(compiler, STDU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-local_size)));
+        FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
     else {
         FAIL_IF(load_immediate(compiler, 0, -local_size));
-        FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0)));
+        FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
     }
 #endif


     return SLJIT_SUCCESS;
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler,
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
     CHECK_ERROR_VOID();
-    check_sljit_set_context(compiler, args, scratches, saveds, local_size);
+    check_sljit_set_context(compiler, args, scratches, saveds, fscratches, fsaveds, local_size);


     compiler->scratches = scratches;
     compiler->saveds = saveds;
+    compiler->fscratches = fscratches;
+    compiler->fsaveds = fsaveds;
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
     compiler->logical_local_size = local_size;
 #endif


-    local_size += ((1 + saveds) * sizeof(sljit_sw)) + FIXED_LOCALS_OFFSET;
+    local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + FIXED_LOCALS_OFFSET;
     compiler->local_size = (local_size + 15) & ~0xf;
 }


 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
 {
+    sljit_si i, tmp, offs;
+
     CHECK_ERROR();
     check_sljit_emit_return(compiler, op, src, srcw);


     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));


     if (compiler->local_size <= SIMM_MAX)
-        FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(compiler->local_size)));
+        FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_SP) | A(SLJIT_SP) | IMM(compiler->local_size)));
     else {
         FAIL_IF(load_immediate(compiler, 0, compiler->local_size));
-        FAIL_IF(push_inst(compiler, ADD | D(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0)));
+        FAIL_IF(push_inst(compiler, ADD | D(SLJIT_SP) | A(SLJIT_SP) | B(0)));
     }


 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
-    FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_LOCALS_REG) | IMM(2 * sizeof(sljit_sw))));
+    FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
 #else
-    FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_LOCALS_REG) | IMM(sizeof(sljit_sw))));
+    FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
 #endif
-    if (compiler->saveds >= 5)
-        FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_EREG2) | A(SLJIT_LOCALS_REG) | IMM(-6 * (sljit_si)(sizeof(sljit_sw))) ));
-    if (compiler->saveds >= 4)
-        FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_EREG1) | A(SLJIT_LOCALS_REG) | IMM(-5 * (sljit_si)(sizeof(sljit_sw))) ));
-    if (compiler->saveds >= 3)
-        FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG3) | A(SLJIT_LOCALS_REG) | IMM(-4 * (sljit_si)(sizeof(sljit_sw))) ));
-    if (compiler->saveds >= 2)
-        FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG2) | A(SLJIT_LOCALS_REG) | IMM(-3 * (sljit_si)(sizeof(sljit_sw))) ));
-    if (compiler->saveds >= 1)
-        FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG1) | A(SLJIT_LOCALS_REG) | IMM(-2 * (sljit_si)(sizeof(sljit_sw))) ));
-    FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_LOCALS_REG) | IMM(-(sljit_si)(sizeof(sljit_sw))) ));


+    offs = -(sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1);
+
+    tmp = compiler->scratches;
+    for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
+        FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
+        offs += (sljit_si)(sizeof(sljit_sw));
+    }
+
+    tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
+    for (i = tmp; i <= SLJIT_S0; i++) {
+        FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
+        offs += (sljit_si)(sizeof(sljit_sw));
+    }
+
+    FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
+    SLJIT_ASSERT(offs == -(sljit_sw)(sizeof(sljit_sw)));
+
     FAIL_IF(push_inst(compiler, MTLR | S(0)));
     FAIL_IF(push_inst(compiler, BLR));


@@ -1249,30 +1272,30 @@
         return push_inst(compiler, NOP);
     case SLJIT_UMUL:
     case SLJIT_SMUL:
-        FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG1)));
+        FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
-        FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)));
-        return push_inst(compiler, (op == SLJIT_UMUL ? MULHDU : MULHD) | D(SLJIT_SCRATCH_REG2) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2));
+        FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
+        return push_inst(compiler, (op == SLJIT_UMUL ? MULHDU : MULHD) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
 #else
-        FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)));
-        return push_inst(compiler, (op == SLJIT_UMUL ? MULHWU : MULHW) | D(SLJIT_SCRATCH_REG2) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2));
+        FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
+        return push_inst(compiler, (op == SLJIT_UMUL ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
 #endif
     case SLJIT_UDIV:
     case SLJIT_SDIV:
-        FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG1)));
+        FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
         if (int_op) {
-            FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)));
-            FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2)));
+            FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
+            FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
         } else {
-            FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVDU : DIVD) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)));
-            FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2)));
+            FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVDU : DIVD) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
+            FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
         }
-        return push_inst(compiler, SUBF | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG2) | B(TMP_REG1));
+        return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1));
 #else
-        FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)));
-        FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2)));
-        return push_inst(compiler, SUBF | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG2) | B(TMP_REG1));
+        FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
+        FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
+        return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1));
 #endif
     }


@@ -1718,8 +1741,8 @@

     if (op == SLJIT_CONVW_FROMD) {
         if (FAST_IS_REG(dst)) {
-            FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, 0, 0));
-            return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, 0, 0);
+            FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0));
+            return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
         }
         return emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
     }
@@ -1733,8 +1756,8 @@


     if (FAST_IS_REG(dst)) {
         FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET));
-        FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_LOCALS_REG) | B(TMP_REG1)));
-        return emit_op_mem2(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, 0, 0);
+        FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_SP) | B(TMP_REG1)));
+        return emit_op_mem2(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
     }


     SLJIT_ASSERT(dst & SLJIT_MEM);
@@ -1785,13 +1808,13 @@
         if (FAST_IS_REG(src))
             FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1)));
         else
-            FAIL_IF(emit_op_mem2(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET));
+            FAIL_IF(emit_op_mem2(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
         src = TMP_REG1;
     }


     if (FAST_IS_REG(src)) {
-        FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET));
-        FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, dst, dstw));
+        FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
+        FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, dst, dstw));
     }
     else
         FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
@@ -1815,7 +1838,7 @@
         invert_sign = 0;
     }
     else if (!FAST_IS_REG(src)) {
-        FAIL_IF(emit_op_mem2(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_LOW));
+        FAIL_IF(emit_op_mem2(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
         src = TMP_REG1;
     }


@@ -1827,12 +1850,12 @@
     FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
     if (invert_sign)
         FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000));
-    FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_HI, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET));
-    FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_HI));
+    FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
+    FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI));
     FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000));
-    FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_LOW));
-    FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET));
-    FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_LOW));
+    FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
+    FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
+    FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));


     FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2)));



Modified: code/trunk/sljit/sljitNativeSPARC_32.c
===================================================================
--- code/trunk/sljit/sljitNativeSPARC_32.c    2014-06-19 07:51:39 UTC (rev 1490)
+++ code/trunk/sljit/sljitNativeSPARC_32.c    2014-07-07 07:11:16 UTC (rev 1491)
@@ -110,8 +110,8 @@
         if (!(flags & SET_FLAGS))
             return SLJIT_SUCCESS;
         FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(dst) | IMM(31), DR(TMP_REG1)));
-        FAIL_IF(push_inst(compiler, RDY | D(TMP_REG4), DR(TMP_REG4)));
-        return push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(TMP_REG4), MOVABLE_INS | SET_FLAGS);
+        FAIL_IF(push_inst(compiler, RDY | D(TMP_LINK), DR(TMP_LINK)));
+        return push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(TMP_LINK), MOVABLE_INS | SET_FLAGS);


     case SLJIT_AND:
         return push_inst(compiler, AND | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));


Modified: code/trunk/sljit/sljitNativeSPARC_common.c
===================================================================
--- code/trunk/sljit/sljitNativeSPARC_common.c    2014-06-19 07:51:39 UTC (rev 1490)
+++ code/trunk/sljit/sljitNativeSPARC_common.c    2014-07-07 07:11:16 UTC (rev 1491)
@@ -83,17 +83,16 @@
 }


 /* TMP_REG2 is not used by getput_arg */
-#define TMP_REG1    (SLJIT_NO_REGISTERS + 1)
-#define TMP_REG2    (SLJIT_NO_REGISTERS + 2)
-#define TMP_REG3    (SLJIT_NO_REGISTERS + 3)
-#define TMP_REG4    (SLJIT_NO_REGISTERS + 4)
-#define TMP_LINK    (SLJIT_NO_REGISTERS + 5)
+#define TMP_REG1    (SLJIT_NUMBER_OF_REGISTERS + 2)
+#define TMP_REG2    (SLJIT_NUMBER_OF_REGISTERS + 3)
+#define TMP_REG3    (SLJIT_NUMBER_OF_REGISTERS + 4)
+#define TMP_LINK    (SLJIT_NUMBER_OF_REGISTERS + 5)


 #define TMP_FREG1    (0)
-#define TMP_FREG2    ((SLJIT_FLOAT_REG6 + 1) << 1)
+#define TMP_FREG2    ((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) << 1)


-static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 7] = {
-    0, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 14, 1, 24, 25, 26, 15
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
+    0, 8, 9, 10, 13, 29, 28, 27, 23, 22, 21, 20, 19, 18, 17, 16, 26, 25, 24, 14, 1, 11, 12, 15
 };


/* --------------------------------------------------------------------- */
@@ -419,13 +418,17 @@
#include "sljitNativeSPARC_64.c"
#endif

-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
     CHECK_ERROR();
-    check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
+    check_sljit_emit_enter(compiler, args, scratches, saveds, fscratches, fsaveds, local_size);


     compiler->scratches = scratches;
     compiler->saveds = saveds;
+    compiler->fscratches = fscratches;
+    compiler->fsaveds = fsaveds;
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
     compiler->logical_local_size = local_size;
 #endif
@@ -434,30 +437,29 @@
     compiler->local_size = local_size;


     if (local_size <= SIMM_MAX) {
-        FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_LOCALS_REG) | S1(SLJIT_LOCALS_REG) | IMM(-local_size), UNMOVABLE_INS));
+        FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | IMM(-local_size), UNMOVABLE_INS));
     }
     else {
         FAIL_IF(load_immediate(compiler, TMP_REG1, -local_size));
-        FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_LOCALS_REG) | S1(SLJIT_LOCALS_REG) | S2(TMP_REG1), UNMOVABLE_INS));
+        FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | S2(TMP_REG1), UNMOVABLE_INS));
     }


-    if (args >= 1)
-        FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG1) | S1(0) | S2A(24), DR(SLJIT_SAVED_REG1)));
-    if (args >= 2)
-        FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG2) | S1(0) | S2A(25), DR(SLJIT_SAVED_REG2)));
-    if (args >= 3)
-        FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG3) | S1(0) | S2A(26), DR(SLJIT_SAVED_REG3)));
+    /* Arguments are in their appropriate registers. */


     return SLJIT_SUCCESS;
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler,
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
     CHECK_ERROR_VOID();
-    check_sljit_set_context(compiler, args, scratches, saveds, local_size);
+    check_sljit_set_context(compiler, args, scratches, saveds, fscratches, fsaveds, local_size);


     compiler->scratches = scratches;
     compiler->saveds = saveds;
+    compiler->fscratches = fscratches;
+    compiler->fsaveds = fsaveds;
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
     compiler->logical_local_size = local_size;
 #endif
@@ -472,11 +474,11 @@


     if (op != SLJIT_MOV || !FAST_IS_REG(src)) {
         FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
-        src = SLJIT_SCRATCH_REG1;
+        src = SLJIT_R0;
     }


     FAIL_IF(push_inst(compiler, JMPL | D(0) | S1A(31) | IMM(8), UNMOVABLE_INS));
-    return push_inst(compiler, RESTORE | D(SLJIT_SCRATCH_REG1) | S1(src) | S2(0), UNMOVABLE_INS);
+    return push_inst(compiler, RESTORE | D(SLJIT_R0) | S1(src) | S2(0), UNMOVABLE_INS);
 }


 /* --------------------------------------------------------------------- */
@@ -783,8 +785,8 @@
     case SLJIT_UMUL:
     case SLJIT_SMUL:
 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
-        FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? UMUL : SMUL) | D(SLJIT_SCRATCH_REG1) | S1(SLJIT_SCRATCH_REG1) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG1)));
-        return push_inst(compiler, RDY | D(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2));
+        FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? UMUL : SMUL) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0)));
+        return push_inst(compiler, RDY | D(SLJIT_R1), DR(SLJIT_R1));
 #else
 #error "Implementation required"
 #endif
@@ -794,13 +796,13 @@
         if (op == SLJIT_UDIV)
             FAIL_IF(push_inst(compiler, WRY | S1(0), MOVABLE_INS));
         else {
-            FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(SLJIT_SCRATCH_REG1) | IMM(31), DR(TMP_REG1)));
+            FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(SLJIT_R0) | IMM(31), DR(TMP_REG1)));
             FAIL_IF(push_inst(compiler, WRY | S1(TMP_REG1), MOVABLE_INS));
         }
-        FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_SCRATCH_REG1), DR(TMP_REG2)));
-        FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? UDIV : SDIV) | D(SLJIT_SCRATCH_REG1) | S1(SLJIT_SCRATCH_REG1) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG1)));
-        FAIL_IF(push_inst(compiler, SMUL | D(SLJIT_SCRATCH_REG2) | S1(SLJIT_SCRATCH_REG1) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2)));
-        FAIL_IF(push_inst(compiler, SUB | D(SLJIT_SCRATCH_REG2) | S1(TMP_REG2) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2)));
+        FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_R0), DR(TMP_REG2)));
+        FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? UDIV : SDIV) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0)));
+        FAIL_IF(push_inst(compiler, SMUL | D(SLJIT_R1) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R1)));
+        FAIL_IF(push_inst(compiler, SUB | D(SLJIT_R1) | S1(TMP_REG2) | S2(SLJIT_R1), DR(SLJIT_R1)));
         return SLJIT_SUCCESS;
 #else
 #error "Implementation required"
@@ -977,8 +979,8 @@
         return SLJIT_SUCCESS;


     if (FAST_IS_REG(dst)) {
-        FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET));
-        return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET);
+        FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
+        return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET);
     }


     /* Store the integer value from a VFP register. */
@@ -1002,8 +1004,8 @@
     }


     if (FAST_IS_REG(src)) {
-        FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET));
-        src = SLJIT_MEM1(SLJIT_LOCALS_REG);
+        FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
+        src = SLJIT_MEM1(SLJIT_SP);
         srcw = FLOAT_TMP_MEM_OFFSET;
     }



Modified: code/trunk/sljit/sljitNativeTILEGX_64.c
===================================================================
--- code/trunk/sljit/sljitNativeTILEGX_64.c    2014-06-19 07:51:39 UTC (rev 1490)
+++ code/trunk/sljit/sljitNativeTILEGX_64.c    2014-07-07 07:11:16 UTC (rev 1491)
@@ -1173,16 +1173,20 @@
     return SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm);
 }


-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
     sljit_ins base;
     sljit_ins bundle = 0;


     CHECK_ERROR();
-    check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
+    check_sljit_emit_enter(compiler, args, scratches, saveds, fscratches, fsaveds, local_size);


     compiler->scratches = scratches;
     compiler->saveds = saveds;
+    compiler->fscratches = fscratches;
+    compiler->fsaveds = fsaveds;
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
     compiler->logical_local_size = local_size;
 #endif
@@ -1233,13 +1237,17 @@
     return SLJIT_SUCCESS;
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler,
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
     CHECK_ERROR_VOID();
-    check_sljit_set_context(compiler, args, scratches, saveds, local_size);
+    check_sljit_set_context(compiler, args, scratches, saveds, fscratches, fsaveds, local_size);


     compiler->scratches = scratches;
     compiler->saveds = saveds;
+    compiler->fscratches = fscratches;
+    compiler->fsaveds = fsaveds;
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
     compiler->logical_local_size = local_size;
 #endif
@@ -2370,7 +2378,7 @@
                 FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
             }


-            FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO));
+            FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO));


             FAIL_IF(ADDI_SOLO(54, 54, -16));


@@ -2381,7 +2389,7 @@

         /* Register input. */
         if (type >= SLJIT_CALL1)
-            FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO));
+            FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO));


         FAIL_IF(ADD_SOLO(reg_map[PIC_ADDR_REG], reg_map[src_r], ZERO));


@@ -2511,7 +2519,7 @@
         SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2);
         /* Cannot be optimized out if type is >= CALL0. */
         jump->flags |= IS_JAL | (type >= SLJIT_CALL0 ? SLJIT_REWRITABLE_JUMP : 0);
-        PTR_FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO));
+        PTR_FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO));
         jump->addr = compiler->size;
         PTR_FAIL_IF(JALR_SOLO(TMP_REG2_mapped));
     }


Modified: code/trunk/sljit/sljitNativeX86_32.c
===================================================================
--- code/trunk/sljit/sljitNativeX86_32.c    2014-06-19 07:51:39 UTC (rev 1490)
+++ code/trunk/sljit/sljitNativeX86_32.c    2014-07-07 07:11:16 UTC (rev 1491)
@@ -63,27 +63,31 @@
     return code_ptr;
 }


-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
     sljit_si size;
-    sljit_si locals_offset;
     sljit_ub *inst;


     CHECK_ERROR();
-    check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
+    check_sljit_emit_enter(compiler, args, scratches, saveds, fscratches, fsaveds, local_size);


     compiler->scratches = scratches;
     compiler->saveds = saveds;
+    compiler->fscratches = fscratches;
+    compiler->fsaveds = fsaveds;
     compiler->args = args;
     compiler->flags_saved = 0;
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
     compiler->logical_local_size = local_size;
 #endif


+    size = 1 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3);
 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
-    size = 1 + (saveds <= 3 ? saveds : 3) + (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0);
+    size += (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0);
 #else
-    size = 1 + (saveds <= 3 ? saveds : 3) + (args > 0 ? (2 + args * 3) : 0);
+    size += (args > 0 ? (2 + args * 3) : 0);
 #endif
     inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
     FAIL_IF(!inst);
@@ -96,76 +100,66 @@
         *inst++ = MOD_REG | (reg_map[TMP_REG1] << 3) | 0x4 /* esp */;
     }
 #endif
-    if (saveds > 2)
-        PUSH_REG(reg_map[SLJIT_SAVED_REG3]);
-    if (saveds > 1)
-        PUSH_REG(reg_map[SLJIT_SAVED_REG2]);
-    if (saveds > 0)
-        PUSH_REG(reg_map[SLJIT_SAVED_REG1]);
+    if (saveds > 2 || scratches > 7)
+        PUSH_REG(reg_map[SLJIT_S2]);
+    if (saveds > 1 || scratches > 8)
+        PUSH_REG(reg_map[SLJIT_S1]);
+    if (saveds > 0 || scratches > 9)
+        PUSH_REG(reg_map[SLJIT_S0]);


 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
     if (args > 0) {
         *inst++ = MOV_r_rm;
-        *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | reg_map[SLJIT_SCRATCH_REG3];
+        *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2];
     }
     if (args > 1) {
         *inst++ = MOV_r_rm;
-        *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG2] << 3) | reg_map[SLJIT_SCRATCH_REG2];
+        *inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1];
     }
     if (args > 2) {
         *inst++ = MOV_r_rm;
-        *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG3] << 3) | 0x4 /* esp */;
+        *inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */;
         *inst++ = 0x24;
         *inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
     }
 #else
     if (args > 0) {
         *inst++ = MOV_r_rm;
-        *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG1] << 3) | reg_map[TMP_REG1];
+        *inst++ = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1];
         *inst++ = sizeof(sljit_sw) * 2;
     }
     if (args > 1) {
         *inst++ = MOV_r_rm;
-        *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG2] << 3) | reg_map[TMP_REG1];
+        *inst++ = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1];
         *inst++ = sizeof(sljit_sw) * 3;
     }
     if (args > 2) {
         *inst++ = MOV_r_rm;
-        *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG3] << 3) | reg_map[TMP_REG1];
+        *inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1];
         *inst++ = sizeof(sljit_sw) * 4;
     }
 #endif


-#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
-    locals_offset = 2 * sizeof(sljit_uw);
-#else
-    SLJIT_COMPILE_ASSERT(FIXED_LOCALS_OFFSET >= 2 * sizeof(sljit_uw), require_at_least_two_words);
-    locals_offset = FIXED_LOCALS_OFFSET;
-#endif
-    compiler->scratches_start = locals_offset;
-    if (scratches > 3)
-        locals_offset += (scratches - 3) * sizeof(sljit_uw);
-    compiler->saveds_start = locals_offset;
-    if (saveds > 3)
-        locals_offset += (saveds - 3) * sizeof(sljit_uw);
-    compiler->locals_offset = locals_offset;
+    SLJIT_COMPILE_ASSERT(FIXED_LOCALS_OFFSET >= (2 + 4) * sizeof(sljit_uw), require_at_least_two_words);
 #if defined(__APPLE__)
-    saveds = (2 + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
-    local_size = ((locals_offset + saveds + local_size + 15) & ~15) - saveds;
+    /* Ignore pushed registers and FIXED_LOCALS_OFFSET when
+    computing the aligned local size. */
+    saveds = (2 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
+    local_size = ((FIXED_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
 #else
-    local_size = locals_offset + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1));
+    local_size = FIXED_LOCALS_OFFSET + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1));
 #endif


     compiler->local_size = local_size;
 #ifdef _WIN32
     if (local_size > 1024) {
 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
-        FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_SCRATCH_REG1], local_size));
+        FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
 #else
         local_size -= FIXED_LOCALS_OFFSET;
-        FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_SCRATCH_REG1], local_size));
+        FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
         FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
-            SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, FIXED_LOCALS_OFFSET));
+            SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, FIXED_LOCALS_OFFSET));
 #endif
         FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
     }
@@ -173,40 +167,30 @@


     SLJIT_ASSERT(local_size > 0);
     return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
-        SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size);
+        SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size);
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler,
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
-    sljit_si locals_offset;
-
     CHECK_ERROR_VOID();
-    check_sljit_set_context(compiler, args, scratches, saveds, local_size);
+    check_sljit_set_context(compiler, args, scratches, saveds, fscratches, fsaveds, local_size);


     compiler->scratches = scratches;
     compiler->saveds = saveds;
+    compiler->fscratches = fscratches;
+    compiler->fsaveds = fsaveds;
     compiler->args = args;
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
     compiler->logical_local_size = local_size;
 #endif


-#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
-    locals_offset = 2 * sizeof(sljit_uw);
-#else
-    locals_offset = FIXED_LOCALS_OFFSET;
-#endif
-    compiler->scratches_start = locals_offset;
-    if (scratches > 3)
-        locals_offset += (scratches - 3) * sizeof(sljit_uw);
-    compiler->saveds_start = locals_offset;
-    if (saveds > 3)
-        locals_offset += (saveds - 3) * sizeof(sljit_uw);
-    compiler->locals_offset = locals_offset;
 #if defined(__APPLE__)
-    saveds = (2 + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
-    compiler->local_size = ((locals_offset + saveds + local_size + 15) & ~15) - saveds;
+    saveds = (2 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
+    compiler->local_size = ((FIXED_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
 #else
-    compiler->local_size = locals_offset + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1));
+    compiler->local_size = FIXED_LOCALS_OFFSET + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1));
 #endif
 }


@@ -224,9 +208,10 @@

     SLJIT_ASSERT(compiler->local_size > 0);
     FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
-        SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, compiler->local_size));
+        SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));


-    size = 2 + (compiler->saveds <= 3 ? compiler->saveds : 3);
+    size = 2 + (compiler->scratches > 7 ? (compiler->scratches - 7) : 0) +
+        (compiler->saveds <= 3 ? compiler->saveds : 3);
 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
     if (compiler->args > 2)
         size += 2;
@@ -239,12 +224,12 @@


     INC_SIZE(size);


-    if (compiler->saveds > 0)
-        POP_REG(reg_map[SLJIT_SAVED_REG1]);
-    if (compiler->saveds > 1)
-        POP_REG(reg_map[SLJIT_SAVED_REG2]);
-    if (compiler->saveds > 2)
-        POP_REG(reg_map[SLJIT_SAVED_REG3]);
+    if (compiler->saveds > 0 || compiler->scratches > 9)
+        POP_REG(reg_map[SLJIT_S0]);
+    if (compiler->saveds > 1 || compiler->scratches > 8)
+        POP_REG(reg_map[SLJIT_S1]);
+    if (compiler->saveds > 2 || compiler->scratches > 7)
+        POP_REG(reg_map[SLJIT_S2]);
     POP_REG(reg_map[TMP_REG1]);
 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
     if (compiler->args > 2)
@@ -307,8 +292,8 @@
                 inst_size += sizeof(sljit_sw);
         }


-        if ((b & REG_MASK) == SLJIT_LOCALS_REG && !(b & OFFS_REG_MASK))
-            b |= TO_OFFS_REG(SLJIT_LOCALS_REG);
+        if ((b & REG_MASK) == SLJIT_SP && !(b & OFFS_REG_MASK))
+            b |= TO_OFFS_REG(SLJIT_SP);


         if ((b & OFFS_REG_MASK) != SLJIT_UNUSED)
             inst_size += 1; /* SIB byte. */
@@ -379,7 +364,7 @@
     if (!(b & SLJIT_MEM))
         *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_map[b] : b);
     else if ((b & REG_MASK) != SLJIT_UNUSED) {
-        if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_LOCALS_REG)) {
+        if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
             if (immb != 0) {
                 if (immb <= 127 && immb >= -128)
                     *buf_ptr |= 0x40;
@@ -440,28 +425,28 @@
     INC_SIZE(type >= SLJIT_CALL3 ? 2 + 1 : 2);


     if (type >= SLJIT_CALL3)
-        PUSH_REG(reg_map[SLJIT_SCRATCH_REG3]);
+        PUSH_REG(reg_map[SLJIT_R2]);
     *inst++ = MOV_r_rm;
-    *inst++ = MOD_REG | (reg_map[SLJIT_SCRATCH_REG3] << 3) | reg_map[SLJIT_SCRATCH_REG1];
+    *inst++ = MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0];
 #else
     inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 * (type - SLJIT_CALL0));
     FAIL_IF(!inst);
     INC_SIZE(4 * (type - SLJIT_CALL0));


     *inst++ = MOV_rm_r;
-    *inst++ = MOD_DISP8 | (reg_map[SLJIT_SCRATCH_REG1] << 3) | 0x4 /* SIB */;
-    *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_LOCALS_REG];
+    *inst++ = MOD_DISP8 | (reg_map[SLJIT_R0] << 3) | 0x4 /* SIB */;
+    *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
     *inst++ = 0;
     if (type >= SLJIT_CALL2) {
         *inst++ = MOV_rm_r;
-        *inst++ = MOD_DISP8 | (reg_map[SLJIT_SCRATCH_REG2] << 3) | 0x4 /* SIB */;
-        *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_LOCALS_REG];
+        *inst++ = MOD_DISP8 | (reg_map[SLJIT_R1] << 3) | 0x4 /* SIB */;
+        *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
         *inst++ = sizeof(sljit_sw);
     }
     if (type >= SLJIT_CALL3) {
         *inst++ = MOV_rm_r;
-        *inst++ = MOD_DISP8 | (reg_map[SLJIT_SCRATCH_REG3] << 3) | 0x4 /* SIB */;
-        *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_LOCALS_REG];
+        *inst++ = MOD_DISP8 | (reg_map[SLJIT_R2] << 3) | 0x4 /* SIB */;
+        *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
         *inst++ = 2 * sizeof(sljit_sw);
     }
 #endif


Modified: code/trunk/sljit/sljitNativeX86_64.c
===================================================================
--- code/trunk/sljit/sljitNativeX86_64.c    2014-06-19 07:51:39 UTC (rev 1490)
+++ code/trunk/sljit/sljitNativeX86_64.c    2014-07-07 07:11:16 UTC (rev 1491)
@@ -87,118 +87,94 @@
     return code_ptr;
 }


-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
-    sljit_si size, pushed_size;
+    sljit_si i, tmp, size, allocated_size;
     sljit_ub *inst;


     CHECK_ERROR();
-    check_sljit_emit_enter(compiler, args, scratches, saveds, local_size);
+    check_sljit_emit_enter(compiler, args, scratches, saveds, fscratches, fsaveds, local_size);


     compiler->scratches = scratches;
     compiler->saveds = saveds;
+    compiler->fscratches = fscratches;
+    compiler->fsaveds = fsaveds;
     compiler->flags_saved = 0;
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
     compiler->logical_local_size = local_size;
 #endif


-    size = saveds;
     /* Including the return address saved by the call instruction. */
-    pushed_size = (saveds + 1) * sizeof(sljit_sw);
-#ifndef _WIN64
-    if (saveds >= 2)
-        size += saveds - 1;
-#else
-    if (saveds >= 4)
-        size += saveds - 3;
-    if (scratches >= 5) {
-        size += (5 - 4) * 2;
-        pushed_size += sizeof(sljit_sw);
-    }
-#endif
-    size += args * 3;
-    if (size > 0) {
+    allocated_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
+
+    tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
+    for (i = SLJIT_S0; i >= tmp; i--) {
+        size = reg_map[i] >= 8 ? 2 : 1;
         inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
         FAIL_IF(!inst);
+        INC_SIZE(size);
+        if (reg_map[i] >= 8)
+            *inst++ = REX_B;
+        PUSH_REG(reg_lmap[i]);
+    }


+    for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
+        size = reg_map[i] >= 8 ? 2 : 1;
+        inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
+        FAIL_IF(!inst);
         INC_SIZE(size);
-        if (saveds >= 5) {
-            SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG2] >= 8, saved_ereg2_is_hireg);
+        if (reg_map[i] >= 8)
             *inst++ = REX_B;
-            PUSH_REG(reg_lmap[SLJIT_SAVED_EREG2]);
-        }
-        if (saveds >= 4) {
-            SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG1] >= 8, saved_ereg1_is_hireg);
-            *inst++ = REX_B;
-            PUSH_REG(reg_lmap[SLJIT_SAVED_EREG1]);
-        }
-        if (saveds >= 3) {
-#ifndef _WIN64
-            SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] >= 8, saved_reg3_is_hireg);
-            *inst++ = REX_B;
-#else
-            SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] < 8, saved_reg3_is_loreg);
-#endif
-            PUSH_REG(reg_lmap[SLJIT_SAVED_REG3]);
-        }
-        if (saveds >= 2) {
-#ifndef _WIN64
-            SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] >= 8, saved_reg2_is_hireg);
-            *inst++ = REX_B;
-#else
-            SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] < 8, saved_reg2_is_loreg);
-#endif
-            PUSH_REG(reg_lmap[SLJIT_SAVED_REG2]);
-        }
-        if (saveds >= 1) {
-            SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG1] < 8, saved_reg1_is_loreg);
-            PUSH_REG(reg_lmap[SLJIT_SAVED_REG1]);
-        }
-#ifdef _WIN64
-        if (scratches >= 5) {
-            SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_EREG2] >= 8, temporary_ereg2_is_hireg);
-            *inst++ = REX_B;
-            PUSH_REG(reg_lmap[SLJIT_SCRATCH_EREG2]);
-        }
-#endif
+        PUSH_REG(reg_lmap[i]);
+    }


+    if (args > 0) {
+        size = args * 3;
+        inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
+        FAIL_IF(!inst);
+
+        INC_SIZE(size);
+
 #ifndef _WIN64
         if (args > 0) {
             *inst++ = REX_W;
             *inst++ = MOV_r_rm;
-            *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x7 /* rdi */;
+            *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
         }
         if (args > 1) {
             *inst++ = REX_W | REX_R;
             *inst++ = MOV_r_rm;
-            *inst++ = MOD_REG | (reg_lmap[SLJIT_SAVED_REG2] << 3) | 0x6 /* rsi */;
+            *inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
         }
         if (args > 2) {
             *inst++ = REX_W | REX_R;
             *inst++ = MOV_r_rm;
-            *inst++ = MOD_REG | (reg_lmap[SLJIT_SAVED_REG3] << 3) | 0x2 /* rdx */;
+            *inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
         }
 #else
         if (args > 0) {
             *inst++ = REX_W;
             *inst++ = MOV_r_rm;
-            *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x1 /* rcx */;
+            *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
         }
         if (args > 1) {
             *inst++ = REX_W;
             *inst++ = MOV_r_rm;
-            *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG2] << 3) | 0x2 /* rdx */;
+            *inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
         }
         if (args > 2) {
             *inst++ = REX_W | REX_B;
             *inst++ = MOV_r_rm;
-            *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG3] << 3) | 0x0 /* r8 */;
+            *inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
         }
 #endif
     }


-    local_size = ((local_size + FIXED_LOCALS_OFFSET + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size;
+    local_size = ((local_size + FIXED_LOCALS_OFFSET + allocated_size + 16 - 1) & ~(16 - 1)) - allocated_size;
     compiler->local_size = local_size;
+
 #ifdef _WIN64
     if (local_size > 1024) {
         /* Allocate stack for the callback, which grows the stack. */
@@ -208,9 +184,10 @@
         *inst++ = REX_W;
         *inst++ = GROUP_BINARY_83;
         *inst++ = MOD_REG | SUB | 4;
-        /* Pushed size must be divisible by 8. */
-        SLJIT_ASSERT(!(pushed_size & 0x7));
-        if (pushed_size & 0x8) {
+        /* Allocated size for registers must be divisible by 8. */
+        SLJIT_ASSERT(!(allocated_size & 0x7));
+        /* Aligned to 16 byte. */
+        if (allocated_size & 0x8) {
             *inst++ = 5 * sizeof(sljit_sw);
             local_size -= 5 * sizeof(sljit_sw);
         } else {
@@ -218,10 +195,10 @@
             local_size -= 4 * sizeof(sljit_sw);
         }
         /* Second instruction */
-        SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG1] < 8, temporary_reg1_is_loreg);
+        SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] < 8, temporary_reg1_is_loreg);
         *inst++ = REX_W;
         *inst++ = MOV_rm_i32;
-        *inst++ = MOD_REG | reg_lmap[SLJIT_SCRATCH_REG1];
+        *inst++ = MOD_REG | reg_lmap[SLJIT_R0];
         *(sljit_si*)inst = local_size;
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
         compiler->skip_checks = 1;
@@ -229,6 +206,7 @@
         FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
     }
 #endif
+
     SLJIT_ASSERT(local_size > 0);
     if (local_size <= 127) {
         inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
@@ -249,43 +227,46 @@
         *(sljit_si*)inst = local_size;
         inst += sizeof(sljit_si);
     }
+
 #ifdef _WIN64
-    /* Save xmm6 with MOVAPS instruction. */
-    inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
-    FAIL_IF(!inst);
-    INC_SIZE(5);
-    *inst++ = GROUP_0F;
-    *(sljit_si*)inst = 0x20247429;
+    /* Save xmm6 register: movaps [rsp + 0x20], xmm6 */
+    if (fscratches >= 6 || fsaveds >= 1) {
+        inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
+        FAIL_IF(!inst);
+        INC_SIZE(5);
+        *inst++ = GROUP_0F;
+        *(sljit_si*)inst = 0x20247429;
+    }
 #endif


     return SLJIT_SUCCESS;
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler,
+    sljit_si args, sljit_si scratches, sljit_si saveds,
+    sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
 {
-    sljit_si pushed_size;
+    sljit_si allocated_size;


     CHECK_ERROR_VOID();
-    check_sljit_set_context(compiler, args, scratches, saveds, local_size);
+    check_sljit_set_context(compiler, args, scratches, saveds, fscratches, fsaveds, local_size);


     compiler->scratches = scratches;
     compiler->saveds = saveds;
+    compiler->fscratches = fscratches;
+    compiler->fsaveds = fsaveds;
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
     compiler->logical_local_size = local_size;
 #endif


     /* Including the return address saved by the call instruction. */
-    pushed_size = (saveds + 1) * sizeof(sljit_sw);
-#ifdef _WIN64
-    if (scratches >= 5)
-        pushed_size += sizeof(sljit_sw);
-#endif
-    compiler->local_size = ((local_size + FIXED_LOCALS_OFFSET + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size;
+    allocated_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
+    compiler->local_size = ((local_size + FIXED_LOCALS_OFFSET + allocated_size + 16 - 1) & ~(16 - 1)) - allocated_size;
 }


 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
 {
-    sljit_si size;
+    sljit_si i, tmp, size;
     sljit_ub *inst;


     CHECK_ERROR();
@@ -295,13 +276,16 @@
     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));


 #ifdef _WIN64
-    /* Restore xmm6 with MOVAPS instruction. */
-    inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
-    FAIL_IF(!inst);
-    INC_SIZE(5);
-    *inst++ = GROUP_0F;
-    *(sljit_si*)inst = 0x20247428;
+    /* Restore xmm6 register: movaps xmm6, [rsp + 0x20] */
+    if (compiler->fscratches >= 6 || compiler->fsaveds >= 1) {
+        inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
+        FAIL_IF(!inst);
+        INC_SIZE(5);
+        *inst++ = GROUP_0F;
+        *(sljit_si*)inst = 0x20247428;
+    }
 #endif
+
     SLJIT_ASSERT(compiler->local_size > 0);
     if (compiler->local_size <= 127) {
         inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
@@ -322,50 +306,31 @@
         *(sljit_si*)inst = compiler->local_size;
     }


-    size = 1 + compiler->saveds;
-#ifndef _WIN64
-    if (compiler->saveds >= 2)
-        size += compiler->saveds - 1;
-#else
-    if (compiler->saveds >= 4)
-        size += compiler->saveds - 3;
-    if (compiler->scratches >= 5)
-        size += (5 - 4) * 2;
-#endif
-    inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
-    FAIL_IF(!inst);
+    tmp = compiler->scratches;
+    for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
+        size = reg_map[i] >= 8 ? 2 : 1;
+        inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
+        FAIL_IF(!inst);
+        INC_SIZE(size);
+        if (reg_map[i] >= 8)
+            *inst++ = REX_B;
+        POP_REG(reg_lmap[i]);
+    }


-    INC_SIZE(size);
-
-#ifdef _WIN64
-    if (compiler->scratches >= 5) {
-        *inst++ = REX_B;
-        POP_REG(reg_lmap[SLJIT_SCRATCH_EREG2]);
+    tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
+    for (i = tmp; i <= SLJIT_S0; i++) {
+        size = reg_map[i] >= 8 ? 2 : 1;
+        inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
+        FAIL_IF(!inst);
+        INC_SIZE(size);
+        if (reg_map[i] >= 8)
+            *inst++ = REX_B;
+        POP_REG(reg_lmap[i]);
     }
-#endif
-    if (compiler->saveds >= 1)
-        POP_REG(reg_map[SLJIT_SAVED_REG1]);
-    if (compiler->saveds >= 2) {
-#ifndef _WIN64
-        *inst++ = REX_B;
-#endif
-        POP_REG(reg_lmap[SLJIT_SAVED_REG2]);
-    }
-    if (compiler->saveds >= 3) {
-#ifndef _WIN64
-        *inst++ = REX_B;
-#endif
-        POP_REG(reg_lmap[SLJIT_SAVED_REG3]);
-    }
-    if (compiler->saveds >= 4) {
-        *inst++ = REX_B;
-        POP_REG(reg_lmap[SLJIT_SAVED_EREG1]);
-    }
-    if (compiler->saveds >= 5) {
-        *inst++ = REX_B;
-        POP_REG(reg_lmap[SLJIT_SAVED_EREG2]);
-    }


+    inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+    FAIL_IF(!inst);
+    INC_SIZE(1);
     RET();
     return SLJIT_SUCCESS;
 }
@@ -442,7 +407,7 @@
                     b |= TMP_REG3;
             }
             else if (reg_lmap[b & REG_MASK] == 4)
-                b |= TO_OFFS_REG(SLJIT_LOCALS_REG);
+                b |= TO_OFFS_REG(SLJIT_SP);
         }


         if ((b & REG_MASK) == SLJIT_UNUSED)
@@ -450,13 +415,16 @@
         else {
             if (reg_map[b & REG_MASK] >= 8)
                 rex |= REX_B;
-            if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_LOCALS_REG))) {
+
+            if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) {
                 /* Immediate operand. */
                 if (immb <= 127 && immb >= -128)
                     inst_size += sizeof(sljit_sb);
                 else
                     inst_size += sizeof(sljit_si);
             }
+            else if (reg_lmap[b & REG_MASK] == 5)
+                inst_size += sizeof(sljit_sb);


             if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) {
                 inst_size += 1; /* SIB byte. */
@@ -540,8 +508,8 @@
     if (!(b & SLJIT_MEM))
         *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : b);
     else if ((b & REG_MASK) != SLJIT_UNUSED) {
-        if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_LOCALS_REG)) {
-            if (immb != 0) {
+        if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
+            if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
                 if (immb <= 127 && immb >= -128)
                     *buf_ptr |= 0x40;
                 else
@@ -555,7 +523,7 @@
                 *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3);
             }


-            if (immb != 0) {
+            if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
                 if (immb <= 127 && immb >= -128)
                     *buf_ptr++ = immb; /* 8 bit displacement. */
                 else {
@@ -565,8 +533,12 @@
             }
         }
         else {
+            if (reg_lmap[b & REG_MASK] == 5)
+                *buf_ptr |= 0x40;
             *buf_ptr++ |= 0x04;
             *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6);
+            if (reg_lmap[b & REG_MASK] == 5)
+                *buf_ptr++ = 0;
         }
     }
     else {
@@ -597,7 +569,7 @@
     sljit_ub *inst;


 #ifndef _WIN64
-    SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG2] == 6 && reg_map[SLJIT_SCRATCH_REG1] < 8 && reg_map[SLJIT_SCRATCH_REG3] < 8, args_registers);
+    SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers);


     inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
     FAIL_IF(!inst);
@@ -605,13 +577,13 @@
     if (type >= SLJIT_CALL3) {
         *inst++ = REX_W;
         *inst++ = MOV_r_rm;
-        *inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_SCRATCH_REG3];
+        *inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_R2];
     }
     *inst++ = REX_W;
     *inst++ = MOV_r_rm;
-    *inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_SCRATCH_REG1];
+    *inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_R0];
 #else
-    SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG2] == 2 && reg_map[SLJIT_SCRATCH_REG1] < 8 && reg_map[SLJIT_SCRATCH_REG3] < 8, args_registers);
+    SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers);


     inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
     FAIL_IF(!inst);
@@ -619,11 +591,11 @@
     if (type >= SLJIT_CALL3) {
         *inst++ = REX_W | REX_R;
         *inst++ = MOV_r_rm;
-        *inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_SCRATCH_REG3];
+        *inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_R2];
     }
     *inst++ = REX_W;
     *inst++ = MOV_r_rm;
-    *inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_SCRATCH_REG1];
+    *inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_R0];
 #endif
     return SLJIT_SUCCESS;
 }


Modified: code/trunk/sljit/sljitNativeX86_common.c
===================================================================
--- code/trunk/sljit/sljitNativeX86_common.c    2014-06-19 07:51:39 UTC (rev 1490)
+++ code/trunk/sljit/sljitNativeX86_common.c    2014-07-07 07:11:16 UTC (rev 1491)
@@ -64,51 +64,46 @@
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)


 /* Last register + 1. */
-#define TMP_REG1    (SLJIT_NO_REGISTERS + 1)
+#define TMP_REG1    (SLJIT_NUMBER_OF_REGISTERS + 2)


-static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = {
-    0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
+    0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5
 };


 #define CHECK_EXTRA_REGS(p, w, do) \
-    if (p >= SLJIT_SCRATCH_EREG1 && p <= SLJIT_SCRATCH_EREG2) { \
-        w = compiler->scratches_start + (p - SLJIT_SCRATCH_EREG1) * sizeof(sljit_sw); \
-        p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
+    if (p >= SLJIT_R3 && p <= SLJIT_R6) { \
+        w = FIXED_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \
+        p = SLJIT_MEM1(SLJIT_SP); \
         do; \
-    } \
-    else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \
-        w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_sw); \
-        p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
-        do; \
     }


#else /* SLJIT_CONFIG_X86_32 */

 /* Last register + 1. */
-#define TMP_REG1    (SLJIT_NO_REGISTERS + 1)
-#define TMP_REG2    (SLJIT_NO_REGISTERS + 2)
-#define TMP_REG3    (SLJIT_NO_REGISTERS + 3)
+#define TMP_REG1    (SLJIT_NUMBER_OF_REGISTERS + 2)
+#define TMP_REG2    (SLJIT_NUMBER_OF_REGISTERS + 3)
+#define TMP_REG3    (SLJIT_NUMBER_OF_REGISTERS + 4)


 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
    Note: avoid to use r12 and r13 for memory addessing
    therefore r12 is better for SAVED_EREG than SAVED_REG. */
 #ifndef _WIN64
 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
-static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
-    0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
+    0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9
 };
 /* low-map. reg_map & 0x7. */
-static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
-    0, 0, 6, 1, 0, 3,  3, 7,  6,  5,  4,  4, 2, 7, 1
+static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
+    0, 0, 6, 1, 0, 3,  2,  4,  5,  5,  6,  7, 3, 4, 2, 7, 1
 };
 #else
 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
-static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
-    0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9
+static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
+    0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9
 };
 /* low-map. reg_map & 0x7. */
-static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
-    0, 0, 2, 1, 3,  5,  3, 6, 7,  6,  7, 4, 2,  0, 1
+static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
+    0, 0, 2, 1, 3,  4,  5,  5, 6,  7,  7, 6, 3, 4, 2,  0, 1
 };
 #endif


@@ -752,14 +747,14 @@
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 #ifdef _WIN64
         SLJIT_COMPILE_ASSERT(
-            reg_map[SLJIT_SCRATCH_REG1] == 0
-            && reg_map[SLJIT_SCRATCH_REG2] == 2
+            reg_map[SLJIT_R0] == 0
+            && reg_map[SLJIT_R1] == 2
             && reg_map[TMP_REG1] > 7,
             invalid_register_assignment_for_div_mul);
 #else
         SLJIT_COMPILE_ASSERT(
-            reg_map[SLJIT_SCRATCH_REG1] == 0
-            && reg_map[SLJIT_SCRATCH_REG2] < 7
+            reg_map[SLJIT_R0] == 0
+            && reg_map[SLJIT_R1] < 7
             && reg_map[TMP_REG1] == 2,
             invalid_register_assignment_for_div_mul);
 #endif
@@ -769,8 +764,8 @@
         op = GET_OPCODE(op);
         if (op == SLJIT_UDIV) {
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
-            EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SCRATCH_REG2, 0);
-            inst = emit_x86_instruction(compiler, 1, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0);
+            EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
+            inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
 #else
             inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
 #endif
@@ -780,7 +775,7 @@


         if (op == SLJIT_SDIV) {
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
-            EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SCRATCH_REG2, 0);
+            EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
 #endif


 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
@@ -809,7 +804,7 @@
         FAIL_IF(!inst);
         INC_SIZE(2);
         *inst++ = GROUP_F7;
-        *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_SCRATCH_REG2]);
+        *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
 #else
 #ifdef _WIN64
         size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
@@ -825,12 +820,12 @@
         else if (op >= SLJIT_UDIV)
             *inst++ = REX_B;
         *inst++ = GROUP_F7;
-        *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_SCRATCH_REG2]);
+        *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
 #else
         if (!compiler->mode32)
             *inst++ = REX_W;
         *inst++ = GROUP_F7;
-        *inst = MOD_REG | reg_map[SLJIT_SCRATCH_REG2];
+        *inst = MOD_REG | reg_map[SLJIT_R1];
 #endif
 #endif
         switch (op) {
@@ -848,7 +843,7 @@
             break;
         }
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
-        EMIT_MOV(compiler, SLJIT_SCRATCH_REG2, 0, TMP_REG1, 0);
+        EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
 #endif
         break;
     }
@@ -957,22 +952,22 @@
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
         if (dst_r == TMP_REG1) {
             /* Find a non-used register, whose reg_map[src] < 4. */
-            if ((dst & REG_MASK) == SLJIT_SCRATCH_REG1) {
-                if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SCRATCH_REG2))
-                    work_r = SLJIT_SCRATCH_REG3;
+            if ((dst & REG_MASK) == SLJIT_R0) {
+                if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
+                    work_r = SLJIT_R2;
                 else
-                    work_r = SLJIT_SCRATCH_REG2;
+                    work_r = SLJIT_R1;
             }
             else {
-                if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG1))
-                    work_r = SLJIT_SCRATCH_REG1;
-                else if ((dst & REG_MASK) == SLJIT_SCRATCH_REG2)
-                    work_r = SLJIT_SCRATCH_REG3;
+                if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
+                    work_r = SLJIT_R0;
+                else if ((dst & REG_MASK) == SLJIT_R1)
+                    work_r = SLJIT_R2;
                 else
-                    work_r = SLJIT_SCRATCH_REG2;
+                    work_r = SLJIT_R1;
             }


-            if (work_r == SLJIT_SCRATCH_REG1) {
+            if (work_r == SLJIT_R0) {
                 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
             }
             else {
@@ -985,7 +980,7 @@
             FAIL_IF(!inst);
             *inst = MOV_rm8_r8;


-            if (work_r == SLJIT_SCRATCH_REG1) {
+            if (work_r == SLJIT_R0) {
                 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
             }
             else {
@@ -1180,12 +1175,12 @@
         dst_r = dst;
     else {
         /* Find an unused temporary register. */
-        if ((dst & REG_MASK) != SLJIT_SCRATCH_REG1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG1))
-            dst_r = SLJIT_SCRATCH_REG1;
-        else if ((dst & REG_MASK) != SLJIT_SCRATCH_REG2 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG2))
-            dst_r = SLJIT_SCRATCH_REG2;
+        if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
+            dst_r = SLJIT_R0;
+        else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1))
+            dst_r = SLJIT_R1;
         else
-            dst_r = SLJIT_SCRATCH_REG3;
+            dst_r = SLJIT_R2;
         EMIT_MOV(compiler, dst, dstw, dst_r, 0);
     }
     EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
@@ -1341,7 +1336,7 @@


 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
         if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
-            SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG));
+            SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
             dst = TMP_REG1;
         }
 #endif
@@ -1379,7 +1374,7 @@


 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
         if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
-            return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REG1, 0);
+            return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
 #endif


         if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) {
@@ -1471,9 +1466,9 @@
     if (dst == src1 && dstw == src1w) {
         if (src2 & SLJIT_IMM) {
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-            if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+            if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 #else
-            if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) {
+            if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
 #endif
                 BINARY_EAX_IMM(op_eax_imm, src2w);
             }
@@ -1505,9 +1500,9 @@
     if (dst == src2 && dstw == src2w) {
         if (src1 & SLJIT_IMM) {
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-            if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
+            if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
 #else
-            if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128)) {
+            if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
 #endif
                 BINARY_EAX_IMM(op_eax_imm, src1w);
             }
@@ -1587,9 +1582,9 @@
     if (dst == src1 && dstw == src1w) {
         if (src2 & SLJIT_IMM) {
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-            if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+            if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 #else
-            if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) {
+            if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
 #endif
                 BINARY_EAX_IMM(op_eax_imm, src2w);
             }
@@ -1841,9 +1836,9 @@
     sljit_ub* inst;


 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-    if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+    if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 #else
-    if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
+    if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
 #endif
         BINARY_EAX_IMM(CMP_EAX_i32, src2w);
         return SLJIT_SUCCESS;
@@ -1892,18 +1887,18 @@
     sljit_ub* inst;


 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-    if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+    if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 #else
-    if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
+    if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
 #endif
         BINARY_EAX_IMM(TEST_EAX_i32, src2w);
         return SLJIT_SUCCESS;
     }


 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-    if (src2 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
+    if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
 #else
-    if (src2 == SLJIT_SCRATCH_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
+    if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
 #endif
         BINARY_EAX_IMM(TEST_EAX_i32, src1w);
         return SLJIT_SUCCESS;
@@ -2065,7 +2060,7 @@
         EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
 #else
         /* [esp+0] contains the flags. */
-        EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
+        EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
 #endif
         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
         inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
@@ -2074,7 +2069,7 @@
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
 #else
-        EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw));
+        EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw));
 #endif
         EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
     }
@@ -2216,8 +2211,7 @@
 {
     check_sljit_get_register_index(reg);
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-    if (reg == SLJIT_SCRATCH_EREG1 || reg == SLJIT_SCRATCH_EREG2
-            || reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2)
+    if (reg >= SLJIT_R3 && reg <= SLJIT_R6)
         return -1;
 #endif
     return reg_map[reg];
@@ -2601,16 +2595,16 @@
     if (type >= SLJIT_CALL1) {
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
-        if (src == SLJIT_SCRATCH_REG3) {
+        if (src == SLJIT_R2) {
             EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
             src = TMP_REG1;
         }
-        if (src == SLJIT_MEM1(SLJIT_LOCALS_REG) && type >= SLJIT_CALL3)
+        if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
             srcw += sizeof(sljit_sw);
 #endif
 #endif
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
-        if (src == SLJIT_SCRATCH_REG3) {
+        if (src == SLJIT_R2) {
             EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
             src = TMP_REG1;
         }
@@ -2776,8 +2770,8 @@
     }


     if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
-        SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG1] == 0, scratch_reg1_must_be_eax);
-        if (dst != SLJIT_SCRATCH_REG1) {
+        SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax);
+        if (dst != SLJIT_R0) {
             inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
             FAIL_IF(!inst);
             INC_SIZE(1 + 3 + 2 + 1);
@@ -2846,23 +2840,23 @@
     compiler->mode32 = 0;
 #endif


-    ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset);
+    ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);


 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
     if (NOT_HALFWORD(offset)) {
         FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
-        SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
+        SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
         return compiler->error;
 #else
-        return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REG1, 0);
+        return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
 #endif
     }
 #endif


     if (offset != 0)
-        return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset);
-    return emit_mov(compiler, dst, dstw, SLJIT_LOCALS_REG, 0);
+        return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
+    return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
 }


SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)