[Pcre-svn] [955] code/trunk: JIT compiler update

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [955] code/trunk: JIT compiler update
Revision: 955
          http://vcs.pcre.org/viewvc?view=rev&revision=955
Author:   zherczeg
Date:     2012-04-03 16:32:36 +0100 (Tue, 03 Apr 2012)


Log Message:
-----------
JIT compiler update

Modified Paths:
--------------
    code/trunk/pcre_jit_compile.c
    code/trunk/sljit/sljitConfigInternal.h
    code/trunk/sljit/sljitLir.c
    code/trunk/sljit/sljitLir.h
    code/trunk/sljit/sljitNativeARM_Thumb2.c
    code/trunk/sljit/sljitNativeARM_v5.c
    code/trunk/sljit/sljitNativeMIPS_common.c
    code/trunk/sljit/sljitNativePPC_common.c
    code/trunk/sljit/sljitNativeX86_32.c
    code/trunk/sljit/sljitNativeX86_64.c
    code/trunk/sljit/sljitNativeX86_common.c


Modified: code/trunk/pcre_jit_compile.c
===================================================================
--- code/trunk/pcre_jit_compile.c    2012-03-31 18:09:26 UTC (rev 954)
+++ code/trunk/pcre_jit_compile.c    2012-04-03 15:32:36 UTC (rev 955)
@@ -272,8 +272,7 @@
   struct sljit_compiler *compiler;
   pcre_uchar *start;


- /* Local stack area size and variable pointers. */
- int localsize;
+ /* Opcode local area direct map. */
int *localptrs;
int cbraptr;
/* OVector starting point. Must be divisible by 2. */
@@ -448,6 +447,8 @@
sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
#define COND_VALUE(op, dst, dstw, type) \
sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
+#define GET_LOCAL_BASE(dst, dstw, offset) \
+ sljit_get_local_base(compiler, (dst), (dstw), (offset))

static pcre_uchar* bracketend(pcre_uchar* cc)
{
@@ -1325,7 +1326,7 @@
}
else
{
- OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START - sizeof(sljit_w));
+ GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, OVECTOR_START - sizeof(sljit_w));
OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
loop = LABEL();
OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
@@ -1352,7 +1353,7 @@
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0);
OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
-OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START);
+GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
/* Unlikely, but possible */
earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
loop = LABEL();
@@ -1370,7 +1371,7 @@
/* Calculate the return value, which is the maximum ovector value. */
if (topbracket > 1)
{
- OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
+ GET_LOCAL_BASE(SLJIT_TEMPORARY_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);

/* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
@@ -1823,7 +1824,7 @@
DEFINE_COMPILER;
struct sljit_jump *jump;

-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
/* Searching for the first zero. */
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
jump = JUMP(SLJIT_C_NOT_ZERO);
@@ -1882,7 +1883,7 @@
struct sljit_jump *jump;
struct sljit_jump *compare;

-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);

OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
jump = JUMP(SLJIT_C_NOT_ZERO);
@@ -1919,7 +1920,7 @@
DEFINE_COMPILER;
struct sljit_jump *jump;

-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
/* Do nothing, only return. */
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
@@ -1956,7 +1957,7 @@

SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);

-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
@@ -2384,14 +2385,15 @@
struct sljit_jump *jump;
struct sljit_label *mainloop;

-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
+GET_LOCAL_BASE(TMP3, 0, 0);

/* Drop frames until we reach STACK_TOP. */
mainloop = LABEL();
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
-OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_LOCALS_REG, 0);
+OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
@@ -2437,7 +2439,7 @@

SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);

-sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);
+sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
/* Get type of the previous char, and put it to LOCALS1. */
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
@@ -2540,7 +2542,7 @@
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
DEFINE_COMPILER;

-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);

OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
@@ -2567,7 +2569,7 @@
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
DEFINE_COMPILER;

-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);

OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
@@ -2606,7 +2608,7 @@
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
DEFINE_COMPILER;

-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);

OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
@@ -2638,7 +2640,7 @@
struct sljit_jump *jump;
struct sljit_label *label;

-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
@@ -2667,7 +2669,7 @@
struct sljit_jump *jump;
struct sljit_label *label;

-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
+sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);

 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
@@ -4871,7 +4873,7 @@
     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_w)));
-    OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0);
+    GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
     sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
@@ -4921,7 +4923,7 @@
       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
       OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
-      OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0);
+      GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
       OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
       sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
@@ -6727,7 +6729,7 @@
 common->currententry->entry = LABEL();
 set_jumps(common->currententry->calls, common->currententry->entry);


-sljit_emit_fast_enter(compiler, TMP2, 0, 1, 5, 5, common->localsize);
+sljit_emit_fast_enter(compiler, TMP2, 0);
allocate_stack(common, localsize + framesize + alternativesize);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(localsize + framesize + alternativesize - 1), TMP2, 0);
copy_locals(common, ccbegin, ccend, TRUE, localsize + framesize + alternativesize, framesize + alternativesize);
@@ -6819,6 +6821,7 @@
compiler_common *common = &common_data;
const pcre_uint8 *tables = re->tables;
pcre_study_data *study;
+int localsize;
pcre_uchar *ccend;
executable_functions *functions;
void *executable_func;
@@ -6895,8 +6898,8 @@
common->ovector_start = CALL_LIMIT + sizeof(sljit_w);

SLJIT_ASSERT(*rootfallback.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
-common->localsize = get_localspace(common, rootfallback.cc, ccend);
-if (common->localsize < 0)
+localsize = get_localspace(common, rootfallback.cc, ccend);
+if (localsize < 0)
return;

/* Checking flags and updating ovector_start. */
@@ -6927,8 +6930,8 @@

SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
common->cbraptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_w);
-common->localsize += common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w);
-if (common->localsize > SLJIT_MAX_LOCAL_SIZE)
+localsize += common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w);
+if (localsize > SLJIT_MAX_LOCAL_SIZE)
return;
common->localptrs = (int *)SLJIT_MALLOC((ccend - rootfallback.cc) * sizeof(int));
if (!common->localptrs)
@@ -6945,7 +6948,7 @@
common->compiler = compiler;

/* Main pcre_jit_exec entry. */
-sljit_emit_enter(compiler, 1, 5, 5, common->localsize);
+sljit_emit_enter(compiler, 1, 5, 5, localsize);

/* Register init. */
reset_ovector(common, (re->top_bracket + 1) * 2);
@@ -7119,7 +7122,7 @@
/* This is a (really) rare case. */
set_jumps(common->stackalloc, LABEL());
/* RETURN_ADDR is not a saved register. */
-sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);
+sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));

Modified: code/trunk/sljit/sljitConfigInternal.h
===================================================================
--- code/trunk/sljit/sljitConfigInternal.h    2012-03-31 18:09:26 UTC (rev 954)
+++ code/trunk/sljit/sljitConfigInternal.h    2012-04-03 15:32:36 UTC (rev 955)
@@ -335,14 +335,13 @@
 #ifndef SLJIT_SSE2


#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-/* Turn on SSE2 support on x86 (operating on doubles).
- (Better performance than legacy fpu instructions). */
+/* Turn on SSE2 support on x86. */
#define SLJIT_SSE2 1

 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 /* Auto detect SSE2 support using CPUID.
    On 64 bit x86 cpus, sse2 must be present. */
-#define SLJIT_SSE2_AUTO 1
+#define SLJIT_DETECT_SSE2 1
 #endif


#endif /* (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) */

Modified: code/trunk/sljit/sljitLir.c
===================================================================
--- code/trunk/sljit/sljitLir.c    2012-03-31 18:09:26 UTC (rev 954)
+++ code/trunk/sljit/sljitLir.c    2012-04-03 15:32:36 UTC (rev 955)
@@ -166,6 +166,52 @@
     #define MOVABLE_INS    33
 #endif


+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+#define SLJIT_HAS_VARIABLE_LOCALS_OFFSET 1
+#endif
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1
+#ifdef _WIN64
+#define FIXED_LOCALS_OFFSET (4 * sizeof(sljit_w))
+#else
+#define FIXED_LOCALS_OFFSET (sizeof(sljit_w))
+#endif
+#endif
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1
+#define FIXED_LOCALS_OFFSET (4 * sizeof(sljit_w))
+#endif
+
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1
+#define FIXED_LOCALS_OFFSET (2 * sizeof(sljit_w))
+#endif
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1
+#define FIXED_LOCALS_OFFSET ((7 + 8) * sizeof(sljit_w))
+#endif
+
+#if (defined SLJIT_HAS_VARIABLE_LOCALS_OFFSET && SLJIT_HAS_VARIABLE_LOCALS_OFFSET)
+
+#define ADJUST_LOCAL_OFFSET(p, i) \
+    if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \
+        (i) += compiler->locals_offset;
+
+#elif (defined SLJIT_HAS_FIXED_LOCALS_OFFSET && SLJIT_HAS_FIXED_LOCALS_OFFSET)
+
+#define ADJUST_LOCAL_OFFSET(p, i) \
+    if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \
+        (i) += FIXED_LOCALS_OFFSET;
+
+#else
+
+#define ADJUST_LOCAL_OFFSET(p, i)
+
+#endif
+
 #endif /* !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) */


/* Utils can still be used even if SLJIT_CONFIG_UNSUPPORTED is set. */
@@ -192,7 +238,6 @@
static void init_compiler(void);
#endif

-
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void)
 {
     struct sljit_compiler *compiler = (struct sljit_compiler*)SLJIT_MALLOC(sizeof(struct sljit_compiler));
@@ -473,25 +518,24 @@
     }


 #define FUNCTION_CHECK_IS_REG(r) \
-    ((r) == SLJIT_UNUSED || (r) == SLJIT_LOCALS_REG || \
-    ((r) >= SLJIT_TEMPORARY_REG1 && (r) <= SLJIT_TEMPORARY_REG3 && (r) <= SLJIT_TEMPORARY_REG1 - 1 + compiler->temporaries) || \
-    ((r) >= SLJIT_SAVED_REG1 && (r) <= SLJIT_SAVED_REG3 && (r) <= SLJIT_SAVED_REG1 - 1 + compiler->saveds)) \
+    ((r) == SLJIT_UNUSED || \
+    ((r) >= SLJIT_TEMPORARY_REG1 && (r) <= SLJIT_TEMPORARY_REG1 - 1 + compiler->temporaries) || \
+    ((r) >= SLJIT_SAVED_REG1 && (r) <= SLJIT_SAVED_REG1 - 1 + compiler->saveds))


 #define FUNCTION_CHECK_SRC(p, i) \
     SLJIT_ASSERT(compiler->temporaries != -1 && compiler->saveds != -1); \
-    if (((p) >= SLJIT_TEMPORARY_REG1 && (p) <= SLJIT_TEMPORARY_REG1 - 1 + compiler->temporaries) || \
-            ((p) >= SLJIT_SAVED_REG1 && (p) <= SLJIT_SAVED_REG1 - 1 + compiler->saveds) || \
-            (p) == SLJIT_LOCALS_REG) \
-        SLJIT_ASSERT(i == 0); \
+    if (FUNCTION_CHECK_IS_REG(p)) \
+        SLJIT_ASSERT((i) == 0 && (p) != SLJIT_UNUSED); \
     else if ((p) == SLJIT_IMM) \
         ; \
+    else if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \
+        SLJIT_ASSERT((i) >= 0 && (i) < compiler->logical_local_size); \
     else if ((p) & SLJIT_MEM) { \
         SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & 0xf)); \
         if ((p) & 0xf0) { \
             SLJIT_ASSERT(FUNCTION_CHECK_IS_REG(((p) >> 4) & 0xf)); \
-            SLJIT_ASSERT(((p) & 0xf0) != (SLJIT_LOCALS_REG << 4) && !(i & ~0x3)); \
-        } else \
-            SLJIT_ASSERT((((p) >> 4) & 0xf) == 0); \
+            SLJIT_ASSERT(!((i) & ~0x3)); \
+        } \
         SLJIT_ASSERT(((p) >> 9) == 0); \
     } \
     else \
@@ -499,17 +543,16 @@


 #define FUNCTION_CHECK_DST(p, i) \
     SLJIT_ASSERT(compiler->temporaries != -1 && compiler->saveds != -1); \
-    if (((p) >= SLJIT_TEMPORARY_REG1 && (p) <= SLJIT_TEMPORARY_REG1 - 1 + compiler->temporaries) || \
-            ((p) >= SLJIT_SAVED_REG1 && (p) <= SLJIT_SAVED_REG1 - 1 + compiler->saveds) || \
-            (p) == SLJIT_UNUSED) \
-        SLJIT_ASSERT(i == 0); \
+    if (FUNCTION_CHECK_IS_REG(p)) \
+        SLJIT_ASSERT((i) == 0); \
+    else if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \
+        SLJIT_ASSERT((i) >= 0 && (i) < compiler->logical_local_size); \
     else if ((p) & SLJIT_MEM) { \
         SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & 0xf)); \
         if ((p) & 0xf0) { \
             SLJIT_ASSERT(FUNCTION_CHECK_IS_REG(((p) >> 4) & 0xf)); \
-            SLJIT_ASSERT(((p) & 0xf0) != (SLJIT_LOCALS_REG << 4) && !(i & ~0x3)); \
-        } else \
-            SLJIT_ASSERT((((p) >> 4) & 0xf) == 0); \
+            SLJIT_ASSERT(!((i) & ~0x3)); \
+        } \
         SLJIT_ASSERT(((p) >> 9) == 0); \
     } \
     else \
@@ -703,6 +746,13 @@
     SLJIT_UNUSED_ARG(saveds);
     SLJIT_UNUSED_ARG(local_size);


+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+        compiler->skip_checks = 0;
+        return;
+    }
+#endif
+
     SLJIT_ASSERT(args >= 0 && args <= 3);
     SLJIT_ASSERT(temporaries >= 0 && temporaries <= SLJIT_NO_TMP_REGISTERS);
     SLJIT_ASSERT(saveds >= 0 && saveds <= SLJIT_NO_GEN_REGISTERS);
@@ -710,7 +760,7 @@
     SLJIT_ASSERT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE);
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose))
-        fprintf(compiler->verbose, "  fake_enter args=%d temporaries=%d saveds=%d local_size=%d\n", args, temporaries, saveds, local_size);
+        fprintf(compiler->verbose, "  set_context args=%d temporaries=%d saveds=%d local_size=%d\n", args, temporaries, saveds, local_size);
 #endif
 }


@@ -743,34 +793,21 @@
#endif
}

-static SLJIT_INLINE void check_sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int saveds, int local_size)
+static SLJIT_INLINE void check_sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw)
 {
     /* If debug and verbose are disabled, all arguments are unused. */
     SLJIT_UNUSED_ARG(compiler);
     SLJIT_UNUSED_ARG(dst);
     SLJIT_UNUSED_ARG(dstw);
-    SLJIT_UNUSED_ARG(args);
-    SLJIT_UNUSED_ARG(temporaries);
-    SLJIT_UNUSED_ARG(saveds);
-    SLJIT_UNUSED_ARG(local_size);


-    SLJIT_ASSERT(args >= 0 && args <= 3);
-    SLJIT_ASSERT(temporaries >= 0 && temporaries <= SLJIT_NO_TMP_REGISTERS);
-    SLJIT_ASSERT(saveds >= 0 && saveds <= SLJIT_NO_GEN_REGISTERS);
-    SLJIT_ASSERT(args <= saveds);
-    SLJIT_ASSERT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE);
 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
-    compiler->temporaries = temporaries;
-    compiler->saveds = saveds;
     FUNCTION_CHECK_DST(dst, dstw);
-    compiler->temporaries = -1;
-    compiler->saveds = -1;
 #endif
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose)) {
         fprintf(compiler->verbose, "  fast_enter ");
         sljit_verbose_param(dst, dstw);
-        fprintf(compiler->verbose, " args=%d temporaries=%d saveds=%d local_size=%d\n", args, temporaries, saveds, local_size);
+        fprintf(compiler->verbose, "\n");
     }
 #endif
 }
@@ -1113,6 +1150,25 @@
 #endif
 }


+static SLJIT_INLINE void check_sljit_get_local_base(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w offset)
+{
+    SLJIT_UNUSED_ARG(compiler);
+    SLJIT_UNUSED_ARG(dst);
+    SLJIT_UNUSED_ARG(dstw);
+    SLJIT_UNUSED_ARG(offset);
+
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    FUNCTION_CHECK_DST(dst, dstw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+    if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+        fprintf(compiler->verbose, "  local_base ");
+        sljit_verbose_param(dst, dstw);
+        fprintf(compiler->verbose, ", #%"SLJIT_PRINT_D"d\n", offset);
+    }
+#endif
+}
+
 static SLJIT_INLINE void check_sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value)
 {
     /* If debug and verbose are disabled, all arguments are unused. */
@@ -1293,6 +1349,22 @@


#endif

+#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) && !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+
+SLJIT_API_FUNC_ATTRIBUTE int sljit_get_local_base(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w offset)
+{
+    CHECK_ERROR();
+    check_sljit_get_local_base(compiler, dst, dstw, offset);
+
+    ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset);
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    compiler->skip_checks = 1;
+#endif
+    return sljit_emit_op2(compiler, SLJIT_ADD, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset);
+}
+
+#endif
+
 #else /* SLJIT_CONFIG_UNSUPPORTED */


 /* Empty function bodies for those machines, which are not (yet) supported. */
@@ -1567,6 +1639,16 @@
     return SLJIT_ERR_UNSUPPORTED;
 }


+SLJIT_API_FUNC_ATTRIBUTE int sljit_get_local_base(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w offset)
+{
+    SLJIT_UNUSED_ARG(compiler);
+    SLJIT_UNUSED_ARG(dst);
+    SLJIT_UNUSED_ARG(dstw);
+    SLJIT_UNUSED_ARG(offset);
+    SLJIT_ASSERT_STOP();
+    return SLJIT_ERR_UNSUPPORTED;
+}
+
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w initval)
 {
     SLJIT_UNUSED_ARG(compiler);


Modified: code/trunk/sljit/sljitLir.h
===================================================================
--- code/trunk/sljit/sljitLir.h    2012-03-31 18:09:26 UTC (rev 954)
+++ code/trunk/sljit/sljitLir.h    2012-04-03 15:32:36 UTC (rev 955)
@@ -117,9 +117,10 @@
 #define SLJIT_SAVED_EREG1    9
 #define SLJIT_SAVED_EREG2    10


-/* Read-only register (cannot be the destination of an operation). */
-/* Note: SLJIT_MEM2( ... , SLJIT_LOCALS_REG) is not supported (x86 limitation). */
-/* Note: SLJIT_LOCALS_REG is not necessary the real stack pointer. See sljit_emit_enter. */
+/* Read-only register (cannot be the destination of an operation).
+   Only SLJIT_MEM1(SLJIT_LOCALS_REG) addressing mode is allowed since
+   several ABIs has certain limitations about the stack layout. However
+   sljit_get_local_base() can be used to obtain the offset of a value. */
 #define SLJIT_LOCALS_REG    11


/* Number of registers. */
@@ -211,16 +212,14 @@

 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     int args;
+    int locals_offset;
     int temporaries_start;
     int saveds_start;
 #endif


 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
     int mode32;
-#ifdef _WIN64
-    int has_locals;
 #endif
-#endif


 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
     int flags_saved;
@@ -250,14 +249,12 @@
 #endif


 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
-    int has_locals;
     sljit_w imm;
     int cache_arg;
     sljit_w cache_argw;
 #endif


 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-    int has_locals;
     int delay_slot;
     int cache_arg;
     sljit_w cache_argw;
@@ -267,6 +264,11 @@
     FILE* verbose;
 #endif


+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    /* Local size passed to the functions. */
+    int logical_local_size;
+#endif
+
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
     int skip_checks;
 #endif
@@ -382,7 +384,7 @@
 /* Note: although sljit_emit_fast_return could be replaced by an ijump, it is not suggested,
    since many architectures do clever branch prediction on call / return instruction pairs. */


-SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int saveds, int local_size);
+SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw);
SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_return(struct sljit_compiler *compiler, int src, sljit_w srcw);

 /*
@@ -748,6 +750,10 @@
    Note: sljit_emit_cond_value does nothing, if dst is SLJIT_UNUSED (regardless of op). */
 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type);


+/* Copies the base address of SLJIT_MEM1(SLJIT_LOCALS_REG)+offset to dst.
+   Flags: - (never set any flags) */
+SLJIT_API_FUNC_ATTRIBUTE int sljit_get_local_base(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w offset);
+
 /* The constant can be changed runtime (see: sljit_set_const)
    Flags: - (never set any flags) */
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value);
@@ -768,7 +774,7 @@
 /* --------------------------------------------------------------------- */


 #define SLJIT_MAJOR_VERSION    0
-#define SLJIT_MINOR_VERSION    87
+#define SLJIT_MINOR_VERSION    88


 /* Get the human readable name of the platfrom.
    Can be useful for debugging on platforms like ARM, where ARM and


Modified: code/trunk/sljit/sljitNativeARM_Thumb2.c
===================================================================
--- code/trunk/sljit/sljitNativeARM_Thumb2.c    2012-03-31 18:09:26 UTC (rev 954)
+++ code/trunk/sljit/sljitNativeARM_Thumb2.c    2012-04-03 15:32:36 UTC (rev 955)
@@ -515,7 +515,7 @@
        arg1 must be register, TMP_REG1, imm
        arg2 must be register, TMP_REG2, imm */
     int reg;
-    sljit_uw imm;
+    sljit_uw imm, negated_imm;


     if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) {
         /* Both are immediates. */
@@ -542,14 +542,25 @@
             /* No form with immediate operand. */
             break;
         case SLJIT_ADD:
+            negated_imm = (sljit_uw)-(sljit_w)imm;
             if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) {
                 if (imm <= 0x7)
                     return push_inst16(compiler, ADDSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
-                if (reg == dst && imm <= 0xff)
-                    return push_inst16(compiler, ADDSI8 | IMM8(imm) | RDN3(dst));
+                if (negated_imm <= 0x7)
+                    return push_inst16(compiler, SUBSI3 | IMM3(negated_imm) | RD3(dst) | RN3(reg));
+                if (reg == dst) {
+                    if (imm <= 0xff)
+                        return push_inst16(compiler, ADDSI8 | IMM8(imm) | RDN3(dst));
+                    if (negated_imm <= 0xff)
+                        return push_inst16(compiler, SUBSI8 | IMM8(negated_imm) | RDN3(dst));
+                }
             }
-            if (imm <= 0xfff && !(flags & SET_FLAGS))
-                return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm));
+            if (!(flags & SET_FLAGS)) {
+                if (imm <= 0xfff)
+                    return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm));
+                if (negated_imm <= 0xfff)
+                    return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(negated_imm));
+            }
             imm = get_imm(imm);
             if (imm != INVALID_IMM)
                 return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
@@ -561,18 +572,27 @@
             break;
         case SLJIT_SUB:
             if (flags & ARG2_IMM) {
+                negated_imm = (sljit_uw)-(sljit_w)imm;
                 if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) {
                     if (imm <= 0x7)
                         return push_inst16(compiler, SUBSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
-                    if (imm <= 0xff) {
-                        if (reg == dst)
+                    if (negated_imm <= 0x7)
+                        return push_inst16(compiler, ADDSI3 | IMM3(negated_imm) | RD3(dst) | RN3(reg));
+                    if (reg == dst) {
+                        if (imm <= 0xff)
                             return push_inst16(compiler, SUBSI8 | IMM8(imm) | RDN3(dst));
-                        if (flags & UNUSED_RETURN)
-                            return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg));
+                        if (negated_imm <= 0xff)
+                            return push_inst16(compiler, ADDSI8 | IMM8(negated_imm) | RDN3(dst));
                     }
+                    if (imm <= 0xff && (flags & UNUSED_RETURN))
+                        return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg));
                 }
-                if (imm <= 0xfff && !(flags & SET_FLAGS))
-                    return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm));
+                if (!(flags & SET_FLAGS)) {
+                    if (imm <= 0xfff)
+                        return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm));
+                    if (negated_imm <= 0xfff)
+                        return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(negated_imm));
+                }
                 imm = get_imm(imm);
                 if (imm != INVALID_IMM)
                     return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
@@ -1111,6 +1131,9 @@


     compiler->temporaries = temporaries;
     compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    compiler->logical_local_size = local_size;
+#endif


     push = (1 << 4);
     if (saveds >= 5)
@@ -1161,6 +1184,9 @@


     compiler->temporaries = temporaries;
     compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    compiler->logical_local_size = local_size;
+#endif


     size = (3 + saveds) * sizeof(sljit_uw);
     local_size += size;
@@ -1175,6 +1201,7 @@


     CHECK_ERROR();
     check_sljit_emit_return(compiler, op, src, srcw);
+    ADJUST_LOCAL_OFFSET(src, srcw);


     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));


@@ -1274,6 +1301,8 @@

     CHECK_ERROR();
     check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
+    ADJUST_LOCAL_OFFSET(dst, dstw);
+    ADJUST_LOCAL_OFFSET(src, srcw);


     compiler->cache_arg = 0;
     compiler->cache_argw = 0;
@@ -1402,6 +1431,9 @@


     CHECK_ERROR();
     check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+    ADJUST_LOCAL_OFFSET(dst, dstw);
+    ADJUST_LOCAL_OFFSET(src1, src1w);
+    ADJUST_LOCAL_OFFSET(src2, src2w);


     compiler->cache_arg = 0;
     compiler->cache_argw = 0;
@@ -1645,22 +1677,12 @@
 /*  Other instructions                                                   */
 /* --------------------------------------------------------------------- */


-SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int saveds, int local_size)
+SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw)
 {
-    int size;
-
     CHECK_ERROR();
-    check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, saveds, local_size);
+    check_sljit_emit_fast_enter(compiler, dst, dstw);
+    ADJUST_LOCAL_OFFSET(dst, dstw);


-    compiler->temporaries = temporaries;
-    compiler->saveds = saveds;
-
-    size = (3 + saveds) * sizeof(sljit_uw);
-    local_size += size;
-    local_size = (local_size + 7) & ~7;
-    local_size -= size;
-    compiler->local_size = local_size;
-
     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS)
         return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG3));
     else if (dst & SLJIT_MEM) {
@@ -1679,6 +1701,7 @@
 {
     CHECK_ERROR();
     check_sljit_emit_fast_return(compiler, src, srcw);
+    ADJUST_LOCAL_OFFSET(src, srcw);


     if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS)
         FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, src)));
@@ -1810,6 +1833,7 @@


     CHECK_ERROR();
     check_sljit_emit_ijump(compiler, type, src, srcw);
+    ADJUST_LOCAL_OFFSET(src, srcw);


     /* In ARM, we don't need to touch the arguments. */
     if (src & SLJIT_IMM) {
@@ -1840,6 +1864,7 @@


     CHECK_ERROR();
     check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
+    ADJUST_LOCAL_OFFSET(dst, dstw);


     if (dst == SLJIT_UNUSED)
         return SLJIT_SUCCESS;
@@ -1887,6 +1912,7 @@


     CHECK_ERROR_PTR();
     check_sljit_emit_const(compiler, dst, dstw, init_value);
+    ADJUST_LOCAL_OFFSET(dst, dstw);


     const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
     PTR_FAIL_IF(!const_);


Modified: code/trunk/sljit/sljitNativeARM_v5.c
===================================================================
--- code/trunk/sljit/sljitNativeARM_v5.c    2012-03-31 18:09:26 UTC (rev 954)
+++ code/trunk/sljit/sljitNativeARM_v5.c    2012-04-03 15:32:36 UTC (rev 955)
@@ -830,6 +830,9 @@


     compiler->temporaries = temporaries;
     compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    compiler->logical_local_size = local_size;
+#endif


     /* Push saved registers, temporary registers
        stmdb sp!, {..., lr} */
@@ -880,6 +883,9 @@


     compiler->temporaries = temporaries;
     compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    compiler->logical_local_size = local_size;
+#endif


     size = (1 + saveds) * sizeof(sljit_uw);
     if (temporaries >= 4)
@@ -896,6 +902,7 @@


     CHECK_ERROR();
     check_sljit_emit_return(compiler, op, src, srcw);
+    ADJUST_LOCAL_OFFSET(src, srcw);


     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));


@@ -1648,7 +1655,8 @@
         src2 = src1;
         src2w = src1w;
     }
-    else {
+    else do { /* do { } while(0) is used because of breaks. */
+        src1_r = 0;
         if ((inp_flags & ALLOW_ANY_IMM) && (src1 & SLJIT_IMM)) {
             /* The second check will generate a hit. */
             src2_r = get_immediate(src1w);
@@ -1656,6 +1664,7 @@
                 flags |= ARGS_SWAPPED;
                 src1 = src2;
                 src1w = src2w;
+                break;
             }
             if (inp_flags & ALLOW_INV_IMM) {
                 src2_r = get_immediate(~src1w);
@@ -1663,16 +1672,26 @@
                     flags |= ARGS_SWAPPED | INV_IMM;
                     src1 = src2;
                     src1w = src2w;
+                    break;
                 }
             }
+            if (GET_OPCODE(op) == SLJIT_ADD) {
+                src2_r = get_immediate(-src1w);
+                if (src2_r) {
+                    /* Note: ARGS_SWAPPED is intentionally not applied! */
+                    src1 = src2;
+                    src1w = src2w;
+                    op = SLJIT_SUB | GET_ALL_FLAGS(op);
+                    break;
+                }
+            }
         }


-        src1_r = 0;
         if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
             FAIL_IF(compiler->error);
             src1_r = TMP_REG1;
         }
-    }
+    } while (0);


     /* Source 2. */
     if (src2_r == 0) {
@@ -1694,6 +1713,22 @@
                         break;
                     }
                 }
+                if (GET_OPCODE(op) == SLJIT_ADD) {
+                    src2_r = get_immediate(-src2w);
+                    if (src2_r) {
+                        op = SLJIT_SUB | GET_ALL_FLAGS(op);
+                        flags &= ~ARGS_SWAPPED;
+                        break;
+                    }
+                }
+                if (GET_OPCODE(op) == SLJIT_SUB && !(flags & ARGS_SWAPPED)) {
+                    src2_r = get_immediate(-src2w);
+                    if (src2_r) {
+                        op = SLJIT_ADD | GET_ALL_FLAGS(op);
+                        flags &= ~ARGS_SWAPPED;
+                        break;
+                    }
+                }
             }


             /* src2_r is 0. */
@@ -1825,6 +1860,8 @@
 {
     CHECK_ERROR();
     check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
+    ADJUST_LOCAL_OFFSET(dst, dstw);
+    ADJUST_LOCAL_OFFSET(src, srcw);


     switch (GET_OPCODE(op)) {
     case SLJIT_MOV:
@@ -1868,7 +1905,7 @@
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
         compiler->skip_checks = 1;
 #endif
-        return sljit_emit_op2(compiler, SLJIT_SUB | GET_FLAGS(op), dst, dstw, SLJIT_IMM, 0, src, srcw);
+        return sljit_emit_op2(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), dst, dstw, SLJIT_IMM, 0, src, srcw);


     case SLJIT_CLZ:
         return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
@@ -1884,6 +1921,9 @@
 {
     CHECK_ERROR();
     check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+    ADJUST_LOCAL_OFFSET(dst, dstw);
+    ADJUST_LOCAL_OFFSET(src1, src1w);
+    ADJUST_LOCAL_OFFSET(src2, src2w);


     switch (GET_OPCODE(op)) {
     case SLJIT_ADD:
@@ -2141,24 +2181,12 @@
 /*  Other instructions                                                   */
 /* --------------------------------------------------------------------- */


-SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int saveds, int local_size)
+SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw)
 {
-    int size;
-
     CHECK_ERROR();
-    check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, saveds, local_size);
+    check_sljit_emit_fast_enter(compiler, dst, dstw);
+    ADJUST_LOCAL_OFFSET(dst, dstw);


-    compiler->temporaries = temporaries;
-    compiler->saveds = saveds;
-
-    size = (1 + saveds) * sizeof(sljit_uw);
-    if (temporaries >= 4)
-        size += (temporaries - 3) * sizeof(sljit_uw);
-    local_size += size;
-    local_size = (local_size + 7) & ~7;
-    local_size -= size;
-    compiler->local_size = local_size;
-
     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS)
         return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(TMP_REG3)));
     else if (dst & SLJIT_MEM) {
@@ -2177,6 +2205,7 @@
 {
     CHECK_ERROR();
     check_sljit_emit_fast_return(compiler, src, srcw);
+    ADJUST_LOCAL_OFFSET(src, srcw);


     if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS)
         EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(src)));
@@ -2316,6 +2345,7 @@


     CHECK_ERROR();
     check_sljit_emit_ijump(compiler, type, src, srcw);
+    ADJUST_LOCAL_OFFSET(src, srcw);


     /* In ARM, we don't need to touch the arguments. */
     if (src & SLJIT_IMM) {
@@ -2355,6 +2385,7 @@


     CHECK_ERROR();
     check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
+    ADJUST_LOCAL_OFFSET(dst, dstw);


     if (dst == SLJIT_UNUSED)
         return SLJIT_SUCCESS;
@@ -2393,6 +2424,7 @@


     CHECK_ERROR_PTR();
     check_sljit_emit_const(compiler, dst, dstw, init_value);
+    ADJUST_LOCAL_OFFSET(dst, dstw);


     const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
     PTR_FAIL_IF(!const_);


Modified: code/trunk/sljit/sljitNativeMIPS_common.c
===================================================================
--- code/trunk/sljit/sljitNativeMIPS_common.c    2012-03-31 18:09:26 UTC (rev 954)
+++ code/trunk/sljit/sljitNativeMIPS_common.c    2012-04-03 15:32:36 UTC (rev 955)
@@ -39,7 +39,6 @@
 #define TMP_REG1    (SLJIT_NO_REGISTERS + 1)
 #define TMP_REG2    (SLJIT_NO_REGISTERS + 2)
 #define TMP_REG3    (SLJIT_NO_REGISTERS + 3)
-#define REAL_STACK_PTR    (SLJIT_NO_REGISTERS + 4)


 /* For position independent code, t9 must contain the function address. */
 #define PIC_ADDR_REG        TMP_REG2
@@ -174,7 +173,7 @@
 #define UIMM_MAX    (0xffff)


static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = {
- 0, 2, 5, 6, 3, 8, 17, 18, 19, 20, 21, 16, 4, 25, 9, 29
+ 0, 2, 5, 6, 3, 8, 16, 17, 18, 19, 20, 29, 4, 25, 9
};

/* dest_reg is the absolute name of the register
@@ -464,42 +463,39 @@

     compiler->temporaries = temporaries;
     compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    compiler->logical_local_size = local_size;
+#endif


-    compiler->has_locals = local_size > 0;
-    local_size += (saveds + 2 + 4) * sizeof(sljit_w);
+    local_size += (saveds + 1 + 4) * sizeof(sljit_w);
     local_size = (local_size + 15) & ~0xf;
     compiler->local_size = local_size;


     if (local_size <= SIMM_MAX) {
         /* Frequent case. */
-        FAIL_IF(push_inst(compiler, ADDIU_W | S(REAL_STACK_PTR) | T(REAL_STACK_PTR) | IMM(-local_size), DR(REAL_STACK_PTR)));
-        base = S(REAL_STACK_PTR);
+        FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_LOCALS_REG) | T(SLJIT_LOCALS_REG) | IMM(-local_size), DR(SLJIT_LOCALS_REG)));
+        base = S(SLJIT_LOCALS_REG);
     }
     else {
         FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size));
-        FAIL_IF(push_inst(compiler, ADDU_W | S(REAL_STACK_PTR) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
-        FAIL_IF(push_inst(compiler, SUBU_W | S(REAL_STACK_PTR) | T(TMP_REG1) | D(REAL_STACK_PTR), DR(REAL_STACK_PTR)));
+        FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_LOCALS_REG) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
+        FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_LOCALS_REG) | T(TMP_REG1) | D(SLJIT_LOCALS_REG), DR(SLJIT_LOCALS_REG)));
         base = S(TMP_REG2);
         local_size = 0;
     }


     FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(local_size - 1 * (int)sizeof(sljit_w)), MOVABLE_INS));
-    if (compiler->has_locals)
-        FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_LOCALS_REG) | IMM(local_size - 2 * (int)sizeof(sljit_w)), MOVABLE_INS));
     if (saveds >= 1)
-        FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG1) | IMM(local_size - 3 * (int)sizeof(sljit_w)), MOVABLE_INS));
+        FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG1) | IMM(local_size - 2 * (int)sizeof(sljit_w)), MOVABLE_INS));
     if (saveds >= 2)
-        FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG2) | IMM(local_size - 4 * (int)sizeof(sljit_w)), MOVABLE_INS));
+        FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG2) | IMM(local_size - 3 * (int)sizeof(sljit_w)), MOVABLE_INS));
     if (saveds >= 3)
-        FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG3) | IMM(local_size - 5 * (int)sizeof(sljit_w)), MOVABLE_INS));
+        FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG3) | IMM(local_size - 4 * (int)sizeof(sljit_w)), MOVABLE_INS));
     if (saveds >= 4)
-        FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_EREG1) | IMM(local_size - 6 * (int)sizeof(sljit_w)), MOVABLE_INS));
+        FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_EREG1) | IMM(local_size - 5 * (int)sizeof(sljit_w)), MOVABLE_INS));
     if (saveds >= 5)
-        FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_EREG2) | IMM(local_size - 7 * (int)sizeof(sljit_w)), MOVABLE_INS));
+        FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_EREG2) | IMM(local_size - 6 * (int)sizeof(sljit_w)), MOVABLE_INS));


-    if (compiler->has_locals)
-        FAIL_IF(push_inst(compiler, ADDIU_W | S(REAL_STACK_PTR) | T(SLJIT_LOCALS_REG) | IMM(4 * sizeof(sljit_w)), DR(SLJIT_LOCALS_REG)));
-
     if (args >= 1)
         FAIL_IF(push_inst(compiler, ADDU_W | SA(4) | TA(0) | D(SLJIT_SAVED_REG1), DR(SLJIT_SAVED_REG1)));
     if (args >= 2)
@@ -517,9 +513,11 @@


     compiler->temporaries = temporaries;
     compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    compiler->logical_local_size = local_size;
+#endif


-    compiler->has_locals = local_size > 0;
-    local_size += (saveds + 2 + 4) * sizeof(sljit_w);
+    local_size += (saveds + 1 + 4) * sizeof(sljit_w);
     compiler->local_size = (local_size + 15) & ~0xf;
 }


@@ -530,38 +528,37 @@

     CHECK_ERROR();
     check_sljit_emit_return(compiler, op, src, srcw);
+    ADJUST_LOCAL_OFFSET(src, srcw);


     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));


     local_size = compiler->local_size;
     if (local_size <= SIMM_MAX)
-        base = S(REAL_STACK_PTR);
+        base = S(SLJIT_LOCALS_REG);
     else {
         FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size));
-        FAIL_IF(push_inst(compiler, ADDU_W | S(REAL_STACK_PTR) | T(TMP_REG1) | D(TMP_REG1), DR(TMP_REG1)));
+        FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_LOCALS_REG) | T(TMP_REG1) | D(TMP_REG1), DR(TMP_REG1)));
         base = S(TMP_REG1);
         local_size = 0;
     }


     FAIL_IF(push_inst(compiler, STACK_LOAD | base | TA(RETURN_ADDR_REG) | IMM(local_size - 1 * (int)sizeof(sljit_w)), RETURN_ADDR_REG));
     if (compiler->saveds >= 5)
-        FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_EREG2) | IMM(local_size - 7 * (int)sizeof(sljit_w)), DR(SLJIT_SAVED_EREG2)));
+        FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_EREG2) | IMM(local_size - 6 * (int)sizeof(sljit_w)), DR(SLJIT_SAVED_EREG2)));
     if (compiler->saveds >= 4)
-        FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_EREG1) | IMM(local_size - 6 * (int)sizeof(sljit_w)), DR(SLJIT_SAVED_EREG1)));
+        FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_EREG1) | IMM(local_size - 5 * (int)sizeof(sljit_w)), DR(SLJIT_SAVED_EREG1)));
     if (compiler->saveds >= 3)
-        FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG3) | IMM(local_size - 5 * (int)sizeof(sljit_w)), DR(SLJIT_SAVED_REG3)));
+        FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG3) | IMM(local_size - 4 * (int)sizeof(sljit_w)), DR(SLJIT_SAVED_REG3)));
     if (compiler->saveds >= 2)
-        FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG2) | IMM(local_size - 4 * (int)sizeof(sljit_w)), DR(SLJIT_SAVED_REG2)));
+        FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG2) | IMM(local_size - 3 * (int)sizeof(sljit_w)), DR(SLJIT_SAVED_REG2)));
     if (compiler->saveds >= 1)
-        FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG1) | IMM(local_size - 3 * (int)sizeof(sljit_w)), DR(SLJIT_SAVED_REG1)));
-    if (compiler->has_locals)
-        FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_LOCALS_REG) | IMM(local_size - 2 * (int)sizeof(sljit_w)), DR(SLJIT_LOCALS_REG)));
+        FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG1) | IMM(local_size - 2 * (int)sizeof(sljit_w)), DR(SLJIT_SAVED_REG1)));


     FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS));
     if (compiler->local_size <= SIMM_MAX)
-        return push_inst(compiler, ADDIU_W | S(REAL_STACK_PTR) | T(REAL_STACK_PTR) | IMM(compiler->local_size), UNMOVABLE_INS);
+        return push_inst(compiler, ADDIU_W | S(SLJIT_LOCALS_REG) | T(SLJIT_LOCALS_REG) | IMM(compiler->local_size), UNMOVABLE_INS);
     else
-        return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(REAL_STACK_PTR), UNMOVABLE_INS);
+        return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_LOCALS_REG), UNMOVABLE_INS);
 }


#undef STACK_STORE
@@ -956,6 +953,8 @@

     CHECK_ERROR();
     check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
+    ADJUST_LOCAL_OFFSET(dst, dstw);
+    ADJUST_LOCAL_OFFSET(src, srcw);


     SLJIT_COMPILE_ASSERT(SLJIT_MOV + 7 == SLJIT_MOVU, movu_offset);


@@ -1029,6 +1028,9 @@

     CHECK_ERROR();
     check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+    ADJUST_LOCAL_OFFSET(dst, dstw);
+    ADJUST_LOCAL_OFFSET(src1, src1w);
+    ADJUST_LOCAL_OFFSET(src2, src2w);


     switch (GET_OPCODE(op)) {
     case SLJIT_ADD:
@@ -1260,18 +1262,12 @@
 /*  Other instructions                                                   */
 /* --------------------------------------------------------------------- */


-SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int saveds, int local_size)
+SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw)
 {
     CHECK_ERROR();
-    check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, saveds, local_size);
+    check_sljit_emit_fast_enter(compiler, dst, dstw);
+    ADJUST_LOCAL_OFFSET(dst, dstw);


-    compiler->temporaries = temporaries;
-    compiler->saveds = saveds;
-
-    compiler->has_locals = local_size > 0;
-    local_size += (saveds + 2 + 4) * sizeof(sljit_w);
-    compiler->local_size = (local_size + 15) & ~0xf;
-
     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS)
         return push_inst(compiler, ADDU_W | SA(RETURN_ADDR_REG) | TA(0) | D(dst), DR(dst));
     else if (dst & SLJIT_MEM)
@@ -1283,6 +1279,7 @@
 {
     CHECK_ERROR();
     check_sljit_emit_fast_return(compiler, src, srcw);
+    ADJUST_LOCAL_OFFSET(src, srcw);


     if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS)
         FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | DA(RETURN_ADDR_REG), RETURN_ADDR_REG));
@@ -1468,6 +1465,8 @@


     CHECK_ERROR_PTR();
     check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w);
+    ADJUST_LOCAL_OFFSET(src1, src1w);
+    ADJUST_LOCAL_OFFSET(src2, src2w);


     compiler->cache_arg = 0;
     compiler->cache_argw = 0;
@@ -1671,6 +1670,7 @@


     CHECK_ERROR();
     check_sljit_emit_ijump(compiler, type, src, srcw);
+    ADJUST_LOCAL_OFFSET(src, srcw);


     if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) {
         if (DR(src) != 4)
@@ -1727,6 +1727,7 @@


     CHECK_ERROR();
     check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
+    ADJUST_LOCAL_OFFSET(dst, dstw);


     if (dst == SLJIT_UNUSED)
         return SLJIT_SUCCESS;
@@ -1814,6 +1815,7 @@


     CHECK_ERROR_PTR();
     check_sljit_emit_const(compiler, dst, dstw, init_value);
+    ADJUST_LOCAL_OFFSET(dst, dstw);


     const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
     PTR_FAIL_IF(!const_);


Modified: code/trunk/sljit/sljitNativePPC_common.c
===================================================================
--- code/trunk/sljit/sljitNativePPC_common.c    2012-03-31 18:09:26 UTC (rev 954)
+++ code/trunk/sljit/sljitNativePPC_common.c    2012-04-03 15:32:36 UTC (rev 955)
@@ -49,7 +49,6 @@
 #define TMP_REG2    (SLJIT_NO_REGISTERS + 2)
 #define TMP_REG3    (SLJIT_NO_REGISTERS + 3)
 #define ZERO_REG    (SLJIT_NO_REGISTERS + 4)
-#define REAL_STACK_PTR    (SLJIT_NO_REGISTERS + 5)


 #define TMP_FREG1    (SLJIT_FLOAT_REG4 + 1)
 #define TMP_FREG2    (SLJIT_FLOAT_REG4 + 2)
@@ -168,10 +167,8 @@
 #define SIMM_MIN    (-0x8000)
 #define UIMM_MAX    (0xffff)


-/* SLJIT_LOCALS_REG is not the real stack register, since it must
- point to the head of the stack chain. */
static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = {
- 0, 3, 4, 5, 6, 7, 29, 28, 27, 26, 25, 31, 8, 9, 10, 30, 1
+ 0, 3, 4, 5, 6, 7, 30, 29, 28, 27, 26, 1, 8, 9, 10, 31
};

static int push_inst(struct sljit_compiler *compiler, sljit_ins ins)
@@ -440,23 +437,23 @@

     compiler->temporaries = temporaries;
     compiler->saveds = saveds;
-    compiler->has_locals = local_size > 0;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    compiler->logical_local_size = local_size;
+#endif


     FAIL_IF(push_inst(compiler, MFLR | D(0)));
-    if (compiler->has_locals)
-        FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_LOCALS_REG) | A(REAL_STACK_PTR) | IMM(-(int)(sizeof(sljit_w))) ));
-    FAIL_IF(push_inst(compiler, STACK_STORE | S(ZERO_REG) | A(REAL_STACK_PTR) | IMM(-2 * (int)(sizeof(sljit_w))) ));
+    FAIL_IF(push_inst(compiler, STACK_STORE | S(ZERO_REG) | A(SLJIT_LOCALS_REG) | IMM(-(int)(sizeof(sljit_w))) ));
     if (saveds >= 1)
-        FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG1) | A(REAL_STACK_PTR) | IMM(-3 * (int)(sizeof(sljit_w))) ));
+        FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG1) | A(SLJIT_LOCALS_REG) | IMM(-2 * (int)(sizeof(sljit_w))) ));
     if (saveds >= 2)
-        FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG2) | A(REAL_STACK_PTR) | IMM(-4 * (int)(sizeof(sljit_w))) ));
+        FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG2) | A(SLJIT_LOCALS_REG) | IMM(-3 * (int)(sizeof(sljit_w))) ));
     if (saveds >= 3)
-        FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG3) | A(REAL_STACK_PTR) | IMM(-5 * (int)(sizeof(sljit_w))) ));
+        FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG3) | A(SLJIT_LOCALS_REG) | IMM(-4 * (int)(sizeof(sljit_w))) ));
     if (saveds >= 4)
-        FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG1) | A(REAL_STACK_PTR) | IMM(-6 * (int)(sizeof(sljit_w))) ));
+        FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG1) | A(SLJIT_LOCALS_REG) | IMM(-5 * (int)(sizeof(sljit_w))) ));
     if (saveds >= 5)
-        FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG2) | A(REAL_STACK_PTR) | IMM(-7 * (int)(sizeof(sljit_w))) ));
-    FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(REAL_STACK_PTR) | IMM(sizeof(sljit_w)) ));
+        FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG2) | A(SLJIT_LOCALS_REG) | IMM(-6 * (int)(sizeof(sljit_w))) ));
+    FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_LOCALS_REG) | IMM(sizeof(sljit_w)) ));


     FAIL_IF(push_inst(compiler, ADDI | D(ZERO_REG) | A(0) | 0));
     if (args >= 1)
@@ -467,30 +464,26 @@
         FAIL_IF(push_inst(compiler, OR | S(SLJIT_TEMPORARY_REG3) | A(SLJIT_SAVED_REG3) | B(SLJIT_TEMPORARY_REG3)));


 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
-    compiler->local_size = (2 + saveds + 2) * sizeof(sljit_w) + local_size;
+    compiler->local_size = (1 + saveds + 2) * sizeof(sljit_w) + local_size;
 #else
-    compiler->local_size = (2 + saveds + 7 + 8) * sizeof(sljit_w) + local_size;
+    compiler->local_size = (1 + saveds + 7 + 8) * sizeof(sljit_w) + local_size;
 #endif
     compiler->local_size = (compiler->local_size + 15) & ~0xf;


 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
     if (compiler->local_size <= SIMM_MAX)
-        FAIL_IF(push_inst(compiler, STWU | S(REAL_STACK_PTR) | A(REAL_STACK_PTR) | IMM(-compiler->local_size)));
+        FAIL_IF(push_inst(compiler, STWU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-compiler->local_size)));
     else {
         FAIL_IF(load_immediate(compiler, 0, -compiler->local_size));
-        FAIL_IF(push_inst(compiler, STWUX | S(REAL_STACK_PTR) | A(REAL_STACK_PTR) | B(0)));
+        FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0)));
     }
-    if (compiler->has_locals)
-        FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_LOCALS_REG) | A(REAL_STACK_PTR) | IMM(2 * sizeof(sljit_w))));
 #else
     if (compiler->local_size <= SIMM_MAX)
-        FAIL_IF(push_inst(compiler, STDU | S(REAL_STACK_PTR) | A(REAL_STACK_PTR) | IMM(-compiler->local_size)));
+        FAIL_IF(push_inst(compiler, STDU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-compiler->local_size)));
     else {
         FAIL_IF(load_immediate(compiler, 0, -compiler->local_size));
-        FAIL_IF(push_inst(compiler, STDUX | S(REAL_STACK_PTR) | A(REAL_STACK_PTR) | B(0)));
+        FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0)));
     }
-    if (compiler->has_locals)
-        FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_LOCALS_REG) | A(REAL_STACK_PTR) | IMM((7 + 8) * sizeof(sljit_w))));
 #endif


     return SLJIT_SUCCESS;
@@ -503,12 +496,14 @@


     compiler->temporaries = temporaries;
     compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    compiler->logical_local_size = local_size;
+#endif


-    compiler->has_locals = local_size > 0;
 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
-    compiler->local_size = (2 + saveds + 2) * sizeof(sljit_w) + local_size;
+    compiler->local_size = (1 + saveds + 2) * sizeof(sljit_w) + local_size;
 #else
-    compiler->local_size = (2 + saveds + 7 + 8) * sizeof(sljit_w) + local_size;
+    compiler->local_size = (1 + saveds + 7 + 8) * sizeof(sljit_w) + local_size;
 #endif
     compiler->local_size = (compiler->local_size + 15) & ~0xf;
 }
@@ -517,30 +512,29 @@
 {
     CHECK_ERROR();
     check_sljit_emit_return(compiler, op, src, srcw);
+    ADJUST_LOCAL_OFFSET(src, srcw);


     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));


     if (compiler->local_size <= SIMM_MAX)
-        FAIL_IF(push_inst(compiler, ADDI | D(REAL_STACK_PTR) | A(REAL_STACK_PTR) | IMM(compiler->local_size)));
+        FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(compiler->local_size)));
     else {
         FAIL_IF(load_immediate(compiler, 0, compiler->local_size));
-        FAIL_IF(push_inst(compiler, ADD | D(REAL_STACK_PTR) | A(REAL_STACK_PTR) | B(0)));
+        FAIL_IF(push_inst(compiler, ADD | D(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0)));
     }


-    FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(REAL_STACK_PTR) | IMM(sizeof(sljit_w))));
+    FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_LOCALS_REG) | IMM(sizeof(sljit_w))));
     if (compiler->saveds >= 5)
-        FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_EREG2) | A(REAL_STACK_PTR) | IMM(-7 * (int)(sizeof(sljit_w))) ));
+        FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_EREG2) | A(SLJIT_LOCALS_REG) | IMM(-6 * (int)(sizeof(sljit_w))) ));
     if (compiler->saveds >= 4)
-        FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_EREG1) | A(REAL_STACK_PTR) | IMM(-6 * (int)(sizeof(sljit_w))) ));
+        FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_EREG1) | A(SLJIT_LOCALS_REG) | IMM(-5 * (int)(sizeof(sljit_w))) ));
     if (compiler->saveds >= 3)
-        FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG3) | A(REAL_STACK_PTR) | IMM(-5 * (int)(sizeof(sljit_w))) ));
+        FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG3) | A(SLJIT_LOCALS_REG) | IMM(-4 * (int)(sizeof(sljit_w))) ));
     if (compiler->saveds >= 2)
-        FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG2) | A(REAL_STACK_PTR) | IMM(-4 * (int)(sizeof(sljit_w))) ));
+        FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG2) | A(SLJIT_LOCALS_REG) | IMM(-3 * (int)(sizeof(sljit_w))) ));
     if (compiler->saveds >= 1)
-        FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG1) | A(REAL_STACK_PTR) | IMM(-3 * (int)(sizeof(sljit_w))) ));
-    FAIL_IF(push_inst(compiler, STACK_LOAD | D(ZERO_REG) | A(REAL_STACK_PTR) | IMM(-2 * (int)(sizeof(sljit_w))) ));
-    if (compiler->has_locals)
-        FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_LOCALS_REG) | A(REAL_STACK_PTR) | IMM(-(int)(sizeof(sljit_w))) ));
+        FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG1) | A(SLJIT_LOCALS_REG) | IMM(-2 * (int)(sizeof(sljit_w))) ));
+    FAIL_IF(push_inst(compiler, STACK_LOAD | D(ZERO_REG) | A(SLJIT_LOCALS_REG) | IMM(-(int)(sizeof(sljit_w))) ));


     FAIL_IF(push_inst(compiler, MTLR | S(0)));
     FAIL_IF(push_inst(compiler, BLR));
@@ -1077,6 +1071,8 @@


     CHECK_ERROR();
     check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
+    ADJUST_LOCAL_OFFSET(dst, dstw);
+    ADJUST_LOCAL_OFFSET(src, srcw);


     if ((src & SLJIT_IMM) && srcw == 0)
         src = ZERO_REG;
@@ -1193,6 +1189,9 @@


     CHECK_ERROR();
     check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+    ADJUST_LOCAL_OFFSET(dst, dstw);
+    ADJUST_LOCAL_OFFSET(src1, src1w);
+    ADJUST_LOCAL_OFFSET(src2, src2w);


     if ((src1 & SLJIT_IMM) && src1w == 0)
         src1 = ZERO_REG;
@@ -1550,22 +1549,12 @@
 /*  Other instructions                                                   */
 /* --------------------------------------------------------------------- */


-SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int saveds, int local_size)
+SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw)
 {
     CHECK_ERROR();
-    check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, saveds, local_size);
+    check_sljit_emit_fast_enter(compiler, dst, dstw);
+    ADJUST_LOCAL_OFFSET(dst, dstw);


-    compiler->temporaries = temporaries;
-    compiler->saveds = saveds;
-
-    compiler->has_locals = local_size > 0;
-#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
-    compiler->local_size = (2 + saveds + 2) * sizeof(sljit_w) + local_size;
-#else
-    compiler->local_size = (2 + saveds + 7 + 8) * sizeof(sljit_w) + local_size;
-#endif
-    compiler->local_size = (compiler->local_size + 15) & ~0xf;
-
     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS)
         return push_inst(compiler, MFLR | D(dst));
     else if (dst & SLJIT_MEM) {
@@ -1580,6 +1569,7 @@
 {
     CHECK_ERROR();
     check_sljit_emit_fast_return(compiler, src, srcw);
+    ADJUST_LOCAL_OFFSET(src, srcw);


     if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS)
         FAIL_IF(push_inst(compiler, MTLR | S(src)));
@@ -1712,6 +1702,7 @@


     CHECK_ERROR();
     check_sljit_emit_ijump(compiler, type, src, srcw);
+    ADJUST_LOCAL_OFFSET(src, srcw);


     bo_bi_flags = get_bo_bi_flags(compiler, type);
     FAIL_IF(!bo_bi_flags);
@@ -1752,6 +1743,7 @@


     CHECK_ERROR();
     check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
+    ADJUST_LOCAL_OFFSET(dst, dstw);


     if (dst == SLJIT_UNUSED)
         return SLJIT_SUCCESS;
@@ -1857,6 +1849,7 @@


     CHECK_ERROR_PTR();
     check_sljit_emit_const(compiler, dst, dstw, init_value);
+    ADJUST_LOCAL_OFFSET(dst, dstw);


     const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
     PTR_FAIL_IF(!const_);


Modified: code/trunk/sljit/sljitNativeX86_32.c
===================================================================
--- code/trunk/sljit/sljitNativeX86_32.c    2012-03-31 18:09:26 UTC (rev 954)
+++ code/trunk/sljit/sljitNativeX86_32.c    2012-04-03 15:32:36 UTC (rev 955)
@@ -66,6 +66,7 @@
 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size)
 {
     int size;
+    int locals_offset;
     sljit_ub *buf;


     CHECK_ERROR();
@@ -75,6 +76,9 @@
     compiler->saveds = saveds;
     compiler->args = args;
     compiler->flags_saved = 0;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    compiler->logical_local_size = local_size;
+#endif


 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
     size = 1 + (saveds <= 3 ? saveds : 3) + (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0);
@@ -132,13 +136,15 @@
     }
 #endif


-    local_size = (local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1);
-    compiler->temporaries_start = local_size;
+    locals_offset = 2 * sizeof(sljit_uw);
+    compiler->temporaries_start = locals_offset;
     if (temporaries > 3)
-        local_size += (temporaries - 3) * sizeof(sljit_uw);
-    compiler->saveds_start = local_size;
+        locals_offset += (temporaries - 3) * sizeof(sljit_uw);
+    compiler->saveds_start = locals_offset;
     if (saveds > 3)
-        local_size += (saveds - 3) * sizeof(sljit_uw);
+        locals_offset += (saveds - 3) * sizeof(sljit_uw);
+    compiler->locals_offset = locals_offset;
+    local_size = locals_offset + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1));


 #ifdef _WIN32
     if (local_size > 1024) {
@@ -157,19 +163,27 @@


 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size)
 {
+    int locals_offset;
+
     CHECK_ERROR_VOID();
     check_sljit_set_context(compiler, args, temporaries, saveds, local_size);


     compiler->temporaries = temporaries;
     compiler->saveds = saveds;
     compiler->args = args;
-    compiler->local_size = (local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1);
-    compiler->temporaries_start = compiler->local_size;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    compiler->logical_local_size = local_size;
+#endif
+
+    locals_offset = 2 * sizeof(sljit_uw);
+    compiler->temporaries_start = locals_offset;
     if (temporaries > 3)
-        compiler->local_size += (temporaries - 3) * sizeof(sljit_uw);
-    compiler->saveds_start = compiler->local_size;
+        locals_offset += (temporaries - 3) * sizeof(sljit_uw);
+    compiler->saveds_start = locals_offset;
     if (saveds > 3)
-        compiler->local_size += (saveds - 3) * sizeof(sljit_uw);
+        locals_offset += (saveds - 3) * sizeof(sljit_uw);
+    compiler->locals_offset = locals_offset;
+    compiler->local_size = locals_offset + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1));
 }


 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int op, int src, sljit_w srcw)
@@ -180,6 +194,7 @@
     CHECK_ERROR();
     check_sljit_emit_return(compiler, op, src, srcw);
     SLJIT_ASSERT(compiler->args >= 0);
+    ADJUST_LOCAL_OFFSET(src, srcw);


     compiler->flags_saved = 0;
     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
@@ -431,24 +446,14 @@
     return SLJIT_SUCCESS;
 }


-SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int saveds, int local_size)
+SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw)
 {
     sljit_ub *buf;


     CHECK_ERROR();
-    check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, saveds, local_size);
+    check_sljit_emit_fast_enter(compiler, dst, dstw);
+    ADJUST_LOCAL_OFFSET(dst, dstw);


-    compiler->temporaries = temporaries;
-    compiler->saveds = saveds;
-    compiler->args = args;
-    compiler->local_size = (local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1);
-    compiler->temporaries_start = compiler->local_size;
-    if (temporaries > 3)
-        compiler->local_size += (temporaries - 3) * sizeof(sljit_uw);
-    compiler->saveds_start = compiler->local_size;
-    if (saveds > 3)
-        compiler->local_size += (saveds - 3) * sizeof(sljit_uw);
-
     CHECK_EXTRA_REGS(dst, dstw, (void)0);


     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
@@ -481,6 +486,7 @@


     CHECK_ERROR();
     check_sljit_emit_fast_return(compiler, src, srcw);
+    ADJUST_LOCAL_OFFSET(src, srcw);


     CHECK_EXTRA_REGS(src, srcw, (void)0);



Modified: code/trunk/sljit/sljitNativeX86_64.c
===================================================================
--- code/trunk/sljit/sljitNativeX86_64.c    2012-03-31 18:09:26 UTC (rev 954)
+++ code/trunk/sljit/sljitNativeX86_64.c    2012-04-03 15:32:36 UTC (rev 955)
@@ -97,6 +97,9 @@
     compiler->temporaries = temporaries;
     compiler->saveds = saveds;
     compiler->flags_saved = 0;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    compiler->logical_local_size = local_size;
+#endif


     size = saveds;
     /* Including the return address saved by the call instruction. */
@@ -105,12 +108,6 @@
     if (saveds >= 2)
         size += saveds - 1;
 #else
-    /* Saving the virtual stack pointer. */
-    compiler->has_locals = local_size > 0;
-    if (local_size > 0) {
-        size += 2;
-        pushed_size += sizeof(sljit_w);
-    }
     if (saveds >= 4)
         size += saveds - 3;
     if (temporaries >= 5) {
@@ -162,11 +159,6 @@
             *buf++ = REX_B;
             PUSH_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]);
         }
-        if (local_size > 0) {
-            SLJIT_COMPILE_ASSERT(reg_map[SLJIT_LOCALS_REG] >= 8, locals_reg_is_hireg);
-            *buf++ = REX_B;
-            PUSH_REG(reg_lmap[SLJIT_LOCALS_REG]);
-        }
 #endif


 #ifndef _WIN64
@@ -229,6 +221,7 @@
         FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_touch_stack)));
     }
 #else
+    local_size += sizeof(sljit_w);
     compiler->local_size = local_size;
     if (local_size > 0) {
 #endif
@@ -256,19 +249,6 @@
     }
 #endif


-#ifdef _WIN64
-    if (compiler->has_locals) {
-        buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
-        FAIL_IF(!buf);
-        INC_SIZE(5);
-        *buf++ = REX_W | REX_R;
-        *buf++ = 0x8d;
-        *buf++ = 0x40 | (reg_lmap[SLJIT_LOCALS_REG] << 3) | 0x4;
-        *buf++ = 0x24;
-        *buf = 4 * sizeof(sljit_w);
-    }
-#endif
-
     return SLJIT_SUCCESS;
 }


@@ -281,18 +261,21 @@

     compiler->temporaries = temporaries;
     compiler->saveds = saveds;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+    compiler->logical_local_size = local_size;
+#endif
+
     /* Including the return address saved by the call instruction. */
     pushed_size = (saveds + 1) * sizeof(sljit_w);
 #ifdef _WIN64
-    compiler->has_locals = local_size > 0;
-    if (local_size > 0)
-        pushed_size += sizeof(sljit_w);
     if (temporaries >= 5)
         pushed_size += sizeof(sljit_w);
 #endif
     compiler->local_size = ((local_size + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size;
 #ifdef _WIN64
     compiler->local_size += 4 * sizeof(sljit_w);
+#else
+    compiler->local_size += sizeof(sljit_w);
 #endif
 }


@@ -303,6 +286,7 @@

     CHECK_ERROR();
     check_sljit_emit_return(compiler, op, src, srcw);
+    ADJUST_LOCAL_OFFSET(src, srcw);


     compiler->flags_saved = 0;
     FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
@@ -333,8 +317,6 @@
     if (compiler->saveds >= 2)
         size += compiler->saveds - 1;
 #else
-    if (compiler->has_locals)
-        size += 2;
     if (compiler->saveds >= 4)
         size += compiler->saveds - 3;
     if (compiler->temporaries >= 5)
@@ -346,10 +328,6 @@
     INC_SIZE(size);


 #ifdef _WIN64
-    if (compiler->has_locals) {
-        *buf++ = REX_B;
-        POP_REG(reg_lmap[SLJIT_LOCALS_REG]);
-    }
     if (compiler->temporaries >= 5) {
         *buf++ = REX_B;
         POP_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]);
@@ -475,10 +453,8 @@
             }
         }


-#ifndef _WIN64
-        if ((b & 0xf) == SLJIT_LOCALS_REG && (b & 0xf0) == 0)
+        if ((b & 0xf) == SLJIT_LOCALS_REG && !(b & 0xf0))
             b |= SLJIT_LOCALS_REG << 4;
-#endif


         if ((b & 0xf0) != SLJIT_UNUSED) {
             inst_size += 1; /* SIB byte. */
@@ -580,9 +556,6 @@
         *buf_ptr++ |= 0xc0 + reg_lmap[b];
 #endif
     else if ((b & 0x0f) != SLJIT_UNUSED) {
-#ifdef _WIN64
-        SLJIT_ASSERT((b & 0xf0) != (SLJIT_LOCALS_REG << 4));
-#endif
         if ((b & 0xf0) == SLJIT_UNUSED || (b & 0xf0) == (SLJIT_LOCALS_REG << 4)) {
             if (immb != 0) {
                 if (immb <= 127 && immb >= -128)
@@ -671,20 +644,14 @@
     return SLJIT_SUCCESS;
 }


-SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int saveds, int local_size)
+SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw)
 {
     sljit_ub *buf;


     CHECK_ERROR();
-    check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, saveds, local_size);
+    check_sljit_emit_fast_enter(compiler, dst, dstw);
+    ADJUST_LOCAL_OFFSET(dst, dstw);


-    compiler->temporaries = temporaries;
-    compiler->saveds = saveds;
-    compiler->local_size = (local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1);
-#ifdef _WIN64
-    compiler->local_size += 4 * sizeof(sljit_w);
-#endif
-
     /* For UNUSED dst. Uncommon, but possible. */
     if (dst == SLJIT_UNUSED)
         dst = TMP_REGISTER;
@@ -707,10 +674,8 @@
         }
     }
     else if (dst & SLJIT_MEM) {
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
         /* REX_W is not necessary (src is not immediate). */
         compiler->mode32 = 1;
-#endif
         buf = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
         FAIL_IF(!buf);
         *buf++ = 0x8f;
@@ -724,9 +689,8 @@


     CHECK_ERROR();
     check_sljit_emit_fast_return(compiler, src, srcw);
+    ADJUST_LOCAL_OFFSET(src, srcw);


-    CHECK_EXTRA_REGS(src, srcw, (void)0);
-
     if ((src & SLJIT_IMM) && NOT_HALFWORD(srcw)) {
         FAIL_IF(emit_load_imm64(compiler, TMP_REGISTER, srcw));
         src = TMP_REGISTER;
@@ -750,10 +714,8 @@
         }
     }
     else if (src & SLJIT_MEM) {
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
         /* REX_W is not necessary (src is not immediate). */
         compiler->mode32 = 1;
-#endif
         buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
         FAIL_IF(!buf);
         *buf++ = 0xff;


Modified: code/trunk/sljit/sljitNativeX86_common.c
===================================================================
--- code/trunk/sljit/sljitNativeX86_common.c    2012-03-31 18:09:26 UTC (rev 954)
+++ code/trunk/sljit/sljitNativeX86_common.c    2012-04-03 15:32:36 UTC (rev 955)
@@ -104,11 +104,11 @@
 #else
 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
-  0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 12, 15, 10, 8, 9
+  0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9
 };
 /* low-map. reg_map & 0x7. */
 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
-  0, 0, 2, 1, 3,  5,  3, 6, 7,  6,  4,  7, 2,  0, 1
+  0, 0, 2, 1, 3,  5,  3, 6, 7,  6,  7, 4, 2,  0, 1
 };
 #endif


@@ -415,18 +415,17 @@
     buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
     FAIL_IF(!buf);
     INC_SIZE(5);
-    *buf++ = 0x9c; /* pushfd */
 #else
     buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
     FAIL_IF(!buf);
     INC_SIZE(6);
-    *buf++ = 0x9c; /* pushfq */
-    *buf++ = 0x48;
+    *buf++ = REX_W;
 #endif
     *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp + sizeof(sljit_w)] */
     *buf++ = 0x64;
     *buf++ = 0x24;
-    *buf++ = sizeof(sljit_w);
+    *buf++ = (sljit_ub)sizeof(sljit_w);
+    *buf++ = 0x9c; /* pushfd / pushfq */
     compiler->flags_saved = 1;
     return SLJIT_SUCCESS;
 }
@@ -439,17 +438,18 @@
     buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
     FAIL_IF(!buf);
     INC_SIZE(5);
+    *buf++ = 0x9d; /* popfd */
 #else
     buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
     FAIL_IF(!buf);
     INC_SIZE(6);
-    *buf++ = 0x48;
+    *buf++ = 0x9d; /* popfq */
+    *buf++ = REX_W;
 #endif
     *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp - sizeof(sljit_w)] */
     *buf++ = 0x64;
     *buf++ = 0x24;
     *buf++ = (sljit_ub)-(int)sizeof(sljit_w);
-    *buf++ = 0x9d; /* popfd / popfq */
     compiler->flags_saved = keep_flags;
     return SLJIT_SUCCESS;
 }
@@ -1050,12 +1050,14 @@


     CHECK_ERROR();
     check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
+    ADJUST_LOCAL_OFFSET(dst, dstw);
+    ADJUST_LOCAL_OFFSET(src, srcw);


+    CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
+    CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
     compiler->mode32 = op & SLJIT_INT_OP;
 #endif
-    CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
-    CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);


     if (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI) {
         op = GET_OPCODE(op);
@@ -1558,14 +1560,11 @@
     dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;


     if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
-        if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
-            /* It is not possible to be both SLJIT_LOCALS_REG. */
-            if (src1 != SLJIT_LOCALS_REG || src2 != SLJIT_LOCALS_REG) {
-                code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
-                FAIL_IF(!code);
-                *code = 0x8d;
-                done = 1;
-            }
+        if ((src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) || src2 == TMP_REGISTER) {
+            code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
+            FAIL_IF(!code);
+            *code = 0x8d;
+            done = 1;
         }
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
         if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
@@ -1831,8 +1830,8 @@
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
         EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
 #else
-        /* [esp - 4] is reserved for eflags. */
-        EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)), SLJIT_PREF_SHIFT_REG, 0);
+        /* [esp+0] contains the flags. */
+        EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_w), SLJIT_PREF_SHIFT_REG, 0);
 #endif
         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
         code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
@@ -1841,8 +1840,7 @@
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
         EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
 #else
-        /* [esp - 4] is reserved for eflags. */
-        EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)));
+        EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_w));
 #endif
         EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
     }
@@ -1892,13 +1890,16 @@
 {
     CHECK_ERROR();
     check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+    ADJUST_LOCAL_OFFSET(dst, dstw);
+    ADJUST_LOCAL_OFFSET(src1, src1w);
+    ADJUST_LOCAL_OFFSET(src2, src2w);


-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-    compiler->mode32 = op & SLJIT_INT_OP;
-#endif
     CHECK_EXTRA_REGS(dst, dstw, (void)0);
     CHECK_EXTRA_REGS(src1, src1w, (void)0);
     CHECK_EXTRA_REGS(src2, src2w, (void)0);
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+    compiler->mode32 = op & SLJIT_INT_OP;
+#endif


     if (GET_OPCODE(op) >= SLJIT_MUL) {
         if (SLJIT_UNLIKELY(GET_FLAGS(op)))
@@ -1912,7 +1913,7 @@
         if (!GET_FLAGS(op)) {
             if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
                 return compiler->error;
-        } 
+        }
         else
             compiler->flags_saved = 0;
         if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
@@ -2008,10 +2009,6 @@
 /*  Floating point operators                                             */
 /* --------------------------------------------------------------------- */


-#if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
-static int sse2_available = 0;
-#endif
-
#if (defined SLJIT_SSE2 && SLJIT_SSE2)

/* Alignment + 2 * 16 bytes. */
@@ -2020,17 +2017,25 @@

 static void init_compiler()
 {
-#if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
-    int features = 0;
-#endif
-
     sse2_buffer = (sljit_i*)(((sljit_uw)sse2_data + 15) & ~0xf);
     sse2_buffer[0] = 0;
     sse2_buffer[1] = 0x80000000;
     sse2_buffer[4] = 0xffffffff;
     sse2_buffer[5] = 0x7fffffff;
+}


-#if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void)
+{
+#if (defined SLJIT_SSE2 && SLJIT_SSE2)
+#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+    static int sse2_available = -1;
+    int features;
+
+    if (sse2_available != -1)
+        return sse2_available;
+
 #ifdef __GNUC__
     /* AT&T syntax. */
     asm (
@@ -2053,18 +2058,16 @@
         mov features, edx
     }
 #else
-    #error "SLJIT_SSE2_AUTO is not implemented for this C compiler"
+    #error "SLJIT_DETECT_SSE2 is not implemented for this C compiler"
 #endif
     sse2_available = (features >> 26) & 0x1;
+    return sse2_available;
+#else
+    return 1;
 #endif
-}
-
+#else
+    return 0;
 #endif
-
-SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void)
-{
-    /* Always available. */
-    return 1;
 }


 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
@@ -2105,11 +2108,7 @@
     return emit_sse2(compiler, 0x11, src, dst, dstw);
 }


-#if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
-#else
-static int sljit_emit_sse2_fop1(struct sljit_compiler *compiler, int op,
-#endif
     int dst, sljit_w dstw,
     int src, sljit_w srcw)
 {
@@ -2167,11 +2166,7 @@
     return SLJIT_SUCCESS;
 }


-#if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
-#else
-static int sljit_emit_sse2_fop2(struct sljit_compiler *compiler, int op,
-#endif
     int dst, sljit_w dstw,
     int src1, sljit_w src1w,
     int src2, sljit_w src2w)
@@ -2229,232 +2224,31 @@
     return SLJIT_SUCCESS;
 }


-#endif
+#else

-#if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) || !(defined SLJIT_SSE2 && SLJIT_SSE2)
-
-static int emit_fld(struct sljit_compiler *compiler,
-    int src, sljit_w srcw)
-{
-    sljit_ub *buf;
-
-    if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
-        buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
-        FAIL_IF(!buf);
-        INC_SIZE(2);
-        *buf++ = 0xd9;
-        *buf = 0xc0 + src - 1;
-        return SLJIT_SUCCESS;
-    }
-
-    buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
-    FAIL_IF(!buf);
-    *buf = 0xdd;
-    return SLJIT_SUCCESS;
-}
-
-static int emit_fop(struct sljit_compiler *compiler,
-    sljit_ub st_arg, sljit_ub st_arg2,
-    sljit_ub m64fp_arg, sljit_ub m64fp_arg2,
-    int src, sljit_w srcw)
-{
-    sljit_ub *buf;
-
-    if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
-        buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
-        FAIL_IF(!buf);
-        INC_SIZE(2);
-        *buf++ = st_arg;
-        *buf = st_arg2 + src;
-        return SLJIT_SUCCESS;
-    }
-
-    buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
-    FAIL_IF(!buf);
-    *buf++ = m64fp_arg;
-    *buf |= m64fp_arg2;
-    return SLJIT_SUCCESS;
-}
-
-static int emit_fop_regs(struct sljit_compiler *compiler,
-    sljit_ub st_arg, sljit_ub st_arg2,
-    int src)
-{
-    sljit_ub *buf;
-
-    buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
-    FAIL_IF(!buf);
-    INC_SIZE(2);
-    *buf++ = st_arg;
-    *buf = st_arg2 + src;
-    return SLJIT_SUCCESS;
-}
-
-#if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
-#else
-static int sljit_emit_fpu_fop1(struct sljit_compiler *compiler, int op,
-#endif
     int dst, sljit_w dstw,
     int src, sljit_w srcw)
 {
-#if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-    sljit_ub *buf;
-#endif
-
     CHECK_ERROR();
+    /* Should cause an assertion fail. */
     check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
-
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-    compiler->mode32 = 1;
-#endif
-
-    if (GET_OPCODE(op) == SLJIT_FCMP) {
-        compiler->flags_saved = 0;
-#if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-        FAIL_IF(emit_fld(compiler, dst, dstw));
-        FAIL_IF(emit_fop(compiler, 0xd8, 0xd8, 0xdc, 0x3 << 3, src, srcw));
-
-        /* Copy flags. */
-        EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
-        buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
-        FAIL_IF(!buf);
-        INC_SIZE(3);
-        *buf++ = 0xdf;
-        *buf++ = 0xe0;
-        /* Note: lahf is not supported on all x86-64 architectures. */
-        *buf++ = 0x9e;
-        EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
-#else
-        if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
-            FAIL_IF(emit_fld(compiler, dst, dstw));
-            FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src));
-        } else {
-            FAIL_IF(emit_fld(compiler, src, srcw));
-            FAIL_IF(emit_fld(compiler, dst + ((dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) ? 1 : 0), dstw));
-            FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src));
-            FAIL_IF(emit_fop_regs(compiler, 0xdd, 0xd8, 0));
-        }
-#endif
-        return SLJIT_SUCCESS;
-    }
-
-    FAIL_IF(emit_fld(compiler, src, srcw));
-
-    switch (op) {
-    case SLJIT_FNEG:
-        FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe0, 0));
-        break;
-    case SLJIT_FABS:
-        FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe1, 0));
-        break;
-    }
-
-    FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw));
-
-    return SLJIT_SUCCESS;
+    compiler->error = SLJIT_ERR_UNSUPPORTED;
+    return SLJIT_ERR_UNSUPPORTED;
 }


-#if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
-#else
-static int sljit_emit_fpu_fop2(struct sljit_compiler *compiler, int op,
-#endif
     int dst, sljit_w dstw,
     int src1, sljit_w src1w,
     int src2, sljit_w src2w)
 {
     CHECK_ERROR();
+    /* Should cause an assertion fail. */
     check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
-
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-    compiler->mode32 = 1;
-#endif
-
-    if (src1 >= SLJIT_FLOAT_REG1 && src1 <= SLJIT_FLOAT_REG4 && dst == src1) {
-        FAIL_IF(emit_fld(compiler, src2, src2w));
-
-        switch (op) {
-        case SLJIT_FADD:
-            FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src1));
-            break;
-        case SLJIT_FSUB:
-            FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe8, src1));
-            break;
-        case SLJIT_FMUL:
-            FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src1));
-            break;
-        case SLJIT_FDIV:
-            FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf8, src1));
-            break;
-        }
-        return SLJIT_SUCCESS;
-    }
-
-    FAIL_IF(emit_fld(compiler, src1, src1w));
-
-    if (src2 >= SLJIT_FLOAT_REG1 && src2 <= SLJIT_FLOAT_REG4 && dst == src2) {
-        switch (op) {
-        case SLJIT_FADD:
-            FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src2));
-            break;
-        case SLJIT_FSUB:
-            FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe0, src2));
-            break;
-        case SLJIT_FMUL:
-            FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src2));
-            break;
-        case SLJIT_FDIV:
-            FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf0, src2));
-            break;
-        }
-        return SLJIT_SUCCESS;
-    }
-
-    switch (op) {
-    case SLJIT_FADD:
-        FAIL_IF(emit_fop(compiler, 0xd8, 0xc0, 0xdc, 0x0 << 3, src2, src2w));
-        break;
-    case SLJIT_FSUB:
-        FAIL_IF(emit_fop(compiler, 0xd8, 0xe0, 0xdc, 0x4 << 3, src2, src2w));
-        break;
-    case SLJIT_FMUL:
-        FAIL_IF(emit_fop(compiler, 0xd8, 0xc8, 0xdc, 0x1 << 3, src2, src2w));
-        break;
-    case SLJIT_FDIV:
-        FAIL_IF(emit_fop(compiler, 0xd8, 0xf0, 0xdc, 0x6 << 3, src2, src2w));
-        break;
-    }
-
-    FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw));
-
-    return SLJIT_SUCCESS;
+    compiler->error = SLJIT_ERR_UNSUPPORTED;
+    return SLJIT_ERR_UNSUPPORTED;
 }
-#endif


-#if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
-
-SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
-    int dst, sljit_w dstw,
-    int src, sljit_w srcw)
-{
-    if (sse2_available)
-        return sljit_emit_sse2_fop1(compiler, op, dst, dstw, src, srcw);
-    else
-        return sljit_emit_fpu_fop1(compiler, op, dst, dstw, src, srcw);
-}
-
-SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
-    int dst, sljit_w dstw,
-    int src1, sljit_w src1w,
-    int src2, sljit_w src2w)
-{
-    if (sse2_available)
-        return sljit_emit_sse2_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
-    else
-        return sljit_emit_fpu_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
-}
-
 #endif


/* --------------------------------------------------------------------- */
@@ -2534,8 +2328,10 @@

     CHECK_ERROR();
     check_sljit_emit_ijump(compiler, type, src, srcw);
+    ADJUST_LOCAL_OFFSET(src, srcw);


     CHECK_EXTRA_REGS(src, srcw, (void)0);
+
     if (SLJIT_UNLIKELY(compiler->flags_saved)) {
         if (type <= SLJIT_JUMP)
             FAIL_IF(emit_restore_flags(compiler, 0));
@@ -2549,23 +2345,11 @@
             EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
             src = TMP_REGISTER;
         }
-        if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG && type >= SLJIT_CALL3) {
-            if (src & 0xf0) {
-                EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
-                src = TMP_REGISTER;
-            }
-            else
-                srcw += sizeof(sljit_w);
-        }
+        if (src == SLJIT_MEM1(SLJIT_LOCALS_REG) && type >= SLJIT_CALL3)
+            srcw += sizeof(sljit_w);
 #else
-        if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG) {
-            if (src & 0xf0) {
-                EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
-                src = TMP_REGISTER;
-            }
-            else
-                srcw += sizeof(sljit_w) * (type - SLJIT_CALL0);
-        }
+        if (src == SLJIT_MEM1(SLJIT_LOCALS_REG))
+            srcw += sizeof(sljit_w) * (type - SLJIT_CALL0);
 #endif
 #endif
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
@@ -2613,6 +2397,8 @@
 {
     sljit_ub *buf;
     sljit_ub cond_set = 0;
+    int dst_save = dst;
+    sljit_w dstw_save = dstw;
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
     int reg;
 #endif
@@ -2623,9 +2409,10 @@
     if (dst == SLJIT_UNUSED)
         return SLJIT_SUCCESS;


+    ADJUST_LOCAL_OFFSET(dst, dstw);
     CHECK_EXTRA_REGS(dst, dstw, (void)0);
     if (SLJIT_UNLIKELY(compiler->flags_saved))
-        FAIL_IF(emit_restore_flags(compiler, 0));
+        FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));


     switch (type) {
     case SLJIT_C_EQUAL:
@@ -2718,7 +2505,7 @@
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
             compiler->skip_checks = 1;
 #endif
-            return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
+            return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REGISTER, 0);
         }
     }
 #else
@@ -2790,13 +2577,42 @@
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
         compiler->skip_checks = 1;
 #endif
-        return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
+        return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REGISTER, 0);
     }
 #endif


     return SLJIT_SUCCESS;
 }


+SLJIT_API_FUNC_ATTRIBUTE int sljit_get_local_base(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w offset)
+{
+    CHECK_ERROR();
+    check_sljit_get_local_base(compiler, dst, dstw, offset);
+    ADJUST_LOCAL_OFFSET(dst, dstw);
+
+    CHECK_EXTRA_REGS(dst, dstw, (void)0);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+    compiler->mode32 = 0;
+#endif
+
+    ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+    if (NOT_HALFWORD(offset)) {
+        FAIL_IF(emit_load_imm64(compiler, TMP_REGISTER, offset));
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+        SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0) != SLJIT_ERR_UNSUPPORTED);
+        return compiler->error;
+#else
+        return emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0);
+#endif
+    }
+#endif
+
+    return emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset);
+}
+
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value)
 {
     sljit_ub *buf;
@@ -2807,6 +2623,7 @@


     CHECK_ERROR_PTR();
     check_sljit_emit_const(compiler, dst, dstw, init_value);
+    ADJUST_LOCAL_OFFSET(dst, dstw);


     CHECK_EXTRA_REGS(dst, dstw, (void)0);