Revision: 885
http://www.exim.org/viewvc/pcre2?view=rev&revision=885
Author: zherczeg
Date: 2017-11-29 13:30:31 +0000 (Wed, 29 Nov 2017)
Log Message:
-----------
JIT compiler update.
Modified Paths:
--------------
code/trunk/src/pcre2_jit_compile.c
code/trunk/src/pcre2_jit_match.c
code/trunk/src/sljit/sljitConfig.h
code/trunk/src/sljit/sljitConfigInternal.h
code/trunk/src/sljit/sljitLir.c
code/trunk/src/sljit/sljitLir.h
code/trunk/src/sljit/sljitNativeARM_32.c
code/trunk/src/sljit/sljitNativeARM_64.c
code/trunk/src/sljit/sljitNativeARM_T2_32.c
code/trunk/src/sljit/sljitNativeMIPS_32.c
code/trunk/src/sljit/sljitNativeMIPS_64.c
code/trunk/src/sljit/sljitNativeMIPS_common.c
code/trunk/src/sljit/sljitNativePPC_64.c
code/trunk/src/sljit/sljitNativePPC_common.c
code/trunk/src/sljit/sljitNativeSPARC_32.c
code/trunk/src/sljit/sljitNativeSPARC_common.c
code/trunk/src/sljit/sljitNativeX86_32.c
code/trunk/src/sljit/sljitNativeX86_64.c
code/trunk/src/sljit/sljitNativeX86_common.c
code/trunk/src/sljit/sljitUtils.c
Modified: code/trunk/src/pcre2_jit_compile.c
===================================================================
--- code/trunk/src/pcre2_jit_compile.c 2017-11-23 07:54:39 UTC (rev 884)
+++ code/trunk/src/pcre2_jit_compile.c 2017-11-29 13:30:31 UTC (rev 885)
@@ -228,7 +228,7 @@
type_then_trap = 1
};
-typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
+typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
/* The following structure is the key data type for the recursive
code generator. It is allocated by compile_matchingpath, and contains
@@ -2753,7 +2753,7 @@
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
}
-static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
+static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
{
while (current != NULL)
{
@@ -6057,7 +6057,7 @@
#if defined SUPPORT_UNICODE
-static PCRE2_SPTR SLJIT_CALL do_utf_caselesscmp(PCRE2_SPTR src1, jit_arguments *args, PCRE2_SPTR end1)
+static PCRE2_SPTR SLJIT_FUNC do_utf_caselesscmp(PCRE2_SPTR src1, jit_arguments *args, PCRE2_SPTR end1)
{
/* This function would be ineffective to do in JIT level. */
sljit_u32 c1, c2;
@@ -7598,8 +7598,9 @@
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), STR_PTR, 0);
- sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
+ sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+
if (common->mode == PCRE2_JIT_COMPLETE)
add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
else
@@ -7924,7 +7925,7 @@
return cc + 1 + LINK_SIZE;
}
-static int SLJIT_CALL do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
+static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
{
PCRE2_SPTR begin;
PCRE2_SIZE *ovector;
@@ -8038,7 +8039,7 @@
/* SLJIT_R0 = arguments */
OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
-sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
+sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
OP1(SLJIT_MOV_S32, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
free_stack(common, callout_arg_size);
@@ -11283,7 +11284,7 @@
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
- sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
+ sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
@@ -11957,7 +11958,7 @@
common->compiler = compiler;
/* Main pcre_jit_exec entry. */
-sljit_emit_enter(compiler, 0, 1, 5, 5, 0, 0, private_data_size);
+sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size);
/* Register init. */
reset_ovector(common, (re->top_bracket + 1) * 2);
@@ -12206,7 +12207,7 @@
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
OP2(SLJIT_SUB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
-sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
+sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
jump = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
Modified: code/trunk/src/pcre2_jit_match.c
===================================================================
--- code/trunk/src/pcre2_jit_match.c 2017-11-23 07:54:39 UTC (rev 884)
+++ code/trunk/src/pcre2_jit_match.c 2017-11-29 13:30:31 UTC (rev 885)
@@ -118,7 +118,7 @@
else if ((options & PCRE2_PARTIAL_SOFT) != 0)
index = 1;
-if (functions->executable_funcs == NULL || functions->executable_funcs[index] == NULL)
+if (functions == NULL || functions->executable_funcs[index] == NULL)
return PCRE2_ERROR_JIT_BADOPTION;
/* Sanity checks should be handled by pcre_exec. */
Modified: code/trunk/src/sljit/sljitConfig.h
===================================================================
--- code/trunk/src/sljit/sljitConfig.h 2017-11-23 07:54:39 UTC (rev 884)
+++ code/trunk/src/sljit/sljitConfig.h 2017-11-29 13:30:31 UTC (rev 885)
@@ -108,8 +108,10 @@
/* Force cdecl calling convention even if a better calling
convention (e.g. fastcall) is supported by the C compiler.
- If this option is enabled, C functions without
- SLJIT_CALL can also be called from JIT code. */
+ If this option is disabled (this is the default), functions
+ called from JIT should be defined with SLJIT_FUNC attribute.
+ Standard C functions can still be called by using the
+ SLJIT_CALL_CDECL jump type. */
#ifndef SLJIT_USE_CDECL_CALLING_CONVENTION
/* Disabled by default */
#define SLJIT_USE_CDECL_CALLING_CONVENTION 0
Modified: code/trunk/src/sljit/sljitConfigInternal.h
===================================================================
--- code/trunk/src/sljit/sljitConfigInternal.h 2017-11-23 07:54:39 UTC (rev 884)
+++ code/trunk/src/sljit/sljitConfigInternal.h 2017-11-29 13:30:31 UTC (rev 885)
@@ -60,11 +60,13 @@
a single precision floating point array by index
SLJIT_F64_SHIFT : the shift required to apply when accessing
a double precision floating point array by index
+ SLJIT_PREF_SHIFT_REG : x86 systems prefers ecx for shifting by register
+ the scratch register index of ecx is stored in this variable
SLJIT_LOCALS_OFFSET : local space starting offset (SLJIT_SP + SLJIT_LOCALS_OFFSET)
SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address
Other macros:
- SLJIT_CALL : C calling convention define for both calling JIT form C and C callbacks for JIT
+ SLJIT_FUNC : calling convention attribute for both calling JIT form C and C calling back from JIT
SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper)
*/
@@ -471,44 +473,44 @@
/* Calling convention of functions generated by SLJIT or called from the generated code. */
/*****************************************************************************************/
-#ifndef SLJIT_CALL
+#ifndef SLJIT_FUNC
#if (defined SLJIT_USE_CDECL_CALLING_CONVENTION && SLJIT_USE_CDECL_CALLING_CONVENTION)
/* Force cdecl. */
-#define SLJIT_CALL
+#define SLJIT_FUNC
#elif (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
#if defined(__GNUC__) && !defined(__APPLE__)
-#define SLJIT_CALL __attribute__ ((fastcall))
+#define SLJIT_FUNC __attribute__ ((fastcall))
#define SLJIT_X86_32_FASTCALL 1
#elif defined(_MSC_VER)
-#define SLJIT_CALL __fastcall
+#define SLJIT_FUNC __fastcall
#define SLJIT_X86_32_FASTCALL 1
#elif defined(__BORLANDC__)
-#define SLJIT_CALL __msfastcall
+#define SLJIT_FUNC __msfastcall
#define SLJIT_X86_32_FASTCALL 1
#else /* Unknown compiler. */
/* The cdecl attribute is the default. */
-#define SLJIT_CALL
+#define SLJIT_FUNC
#endif
#else /* Non x86-32 architectures. */
-#define SLJIT_CALL
+#define SLJIT_FUNC
#endif /* SLJIT_CONFIG_X86_32 */
-#endif /* !SLJIT_CALL */
+#endif /* !SLJIT_FUNC */
#ifndef SLJIT_INDIRECT_CALL
#if ((defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) && (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN)) \
@@ -557,24 +559,20 @@
#define SLJIT_NUMBER_OF_REGISTERS 12
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 9
-#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
#define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset)
-#else
-/* Maximum 3 arguments are passed on the stack, +1 for double alignment. */
-#define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset)
-#endif /* SLJIT_X86_32_FASTCALL */
+#define SLJIT_PREF_SHIFT_REG SLJIT_R2
#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#define SLJIT_NUMBER_OF_REGISTERS 13
#ifndef _WIN64
-#define SLJIT_NUMBER_OF_REGISTERS 13
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 6
#define SLJIT_LOCALS_OFFSET_BASE 0
-#else
-#define SLJIT_NUMBER_OF_REGISTERS 13
+#else /* _WIN64 */
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
#define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset)
-#endif /* _WIN64 */
+#endif /* !_WIN64 */
+#define SLJIT_PREF_SHIFT_REG SLJIT_R3
#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
@@ -622,8 +620,9 @@
#define SLJIT_NUMBER_OF_REGISTERS 18
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 14
#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
-/* Add +1 for double alignment. */
-#define SLJIT_LOCALS_OFFSET_BASE ((23 + 1) * sizeof(sljit_sw))
+/* saved registers (16), return struct pointer (1), space for 6 argument words (1),
+ 4th double arg (2), double alignment (1). */
+#define SLJIT_LOCALS_OFFSET_BASE ((16 + 1 + 6 + 2 + 1) * sizeof(sljit_sw))
#endif
#elif (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
Modified: code/trunk/src/sljit/sljitLir.c
===================================================================
--- code/trunk/src/sljit/sljitLir.c 2017-11-23 07:54:39 UTC (rev 884)
+++ code/trunk/src/sljit/sljitLir.c 2017-11-29 13:30:31 UTC (rev 885)
@@ -97,8 +97,13 @@
#define GET_ALL_FLAGS(op) \
((op) & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK))
+#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
#define TYPE_CAST_NEEDED(op) \
+ (((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S32) || ((op) >= SLJIT_MOVU_U8 && (op) <= SLJIT_MOVU_S32))
+#else
+#define TYPE_CAST_NEEDED(op) \
(((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S16) || ((op) >= SLJIT_MOVU_U8 && (op) <= SLJIT_MOVU_S16))
+#endif
#define BUF_SIZE 4096
@@ -118,6 +123,9 @@
/* When reg can be unused. */
#define SLOW_IS_REG(reg) ((reg) > 0 && (reg) <= REG_MASK)
+/* Mask for argument types. */
+#define SLJIT_DEF_MASK ((1 << SLJIT_DEF_SHIFT) - 1)
+
/* Jump flags. */
#define JUMP_LABEL 0x1
#define JUMP_ADDR 0x2
@@ -591,6 +599,19 @@
compiler->buf = prev;
}
+static SLJIT_INLINE sljit_s32 get_arg_count(sljit_s32 arg_types)
+{
+ sljit_s32 arg_count = 0;
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+ while (arg_types) {
+ arg_count++;
+ arg_types >>= SLJIT_DEF_SHIFT;
+ }
+
+ return arg_count;
+}
+
static SLJIT_INLINE void set_emit_enter(struct sljit_compiler *compiler,
sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
@@ -864,9 +885,13 @@
(char*)"greater", (char*)"less_equal",
(char*)"unordered", (char*)"ordered",
(char*)"jump", (char*)"fast_call",
- (char*)"call0", (char*)"call1", (char*)"call2", (char*)"call3"
+ (char*)"call", (char*)"call.cdecl"
};
+static char* call_arg_names[] = {
+ (char*)"void", (char*)"sw", (char*)"uw", (char*)"s32", (char*)"u32", (char*)"f32", (char*)"f64"
+};
+
#endif /* SLJIT_VERBOSE */
/* --------------------------------------------------------------------- */
@@ -897,53 +922,104 @@
}
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
+ sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ sljit_s32 types, arg_count, curr_type;
+#endif
+
SLJIT_UNUSED_ARG(compiler);
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(!(options & ~SLJIT_F64_ALIGNMENT));
- CHECK_ARGUMENT(args >= 0 && args <= 3);
CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS);
CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS);
CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS);
- CHECK_ARGUMENT(args <= saveds);
CHECK_ARGUMENT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE);
+ CHECK_ARGUMENT((arg_types & SLJIT_DEF_MASK) == 0);
+
+ types = (arg_types >> SLJIT_DEF_SHIFT);
+ arg_count = 0;
+ while (types != 0 && arg_count < 3) {
+ curr_type = (types & SLJIT_DEF_MASK);
+ CHECK_ARGUMENT(curr_type == SLJIT_ARG_TYPE_SW || curr_type == SLJIT_ARG_TYPE_UW);
+ arg_count++;
+ types >>= SLJIT_DEF_SHIFT;
+ }
+ CHECK_ARGUMENT(arg_count <= saveds && types == 0);
+
compiler->last_flags = 0;
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
- if (SLJIT_UNLIKELY(!!compiler->verbose))
- fprintf(compiler->verbose, " enter options:none args:%d scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n",
- args, scratches, saveds, fscratches, fsaveds, local_size);
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ fprintf(compiler->verbose, " enter options:%s args[", (options & SLJIT_F64_ALIGNMENT) ? "f64_align" : "");
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+ while (arg_types) {
+ fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_DEF_MASK]);
+ arg_types >>= SLJIT_DEF_SHIFT;
+ if (arg_types)
+ fprintf(compiler->verbose, ",");
+ }
+
+ fprintf(compiler->verbose, "] scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n",
+ scratches, saveds, fscratches, fsaveds, local_size);
+ }
#endif
CHECK_RETURN_OK;
}
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
+ sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ sljit_s32 types, arg_count, curr_type;
+#endif
+
+ SLJIT_UNUSED_ARG(compiler);
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(!(options & ~SLJIT_F64_ALIGNMENT));
- CHECK_ARGUMENT(args >= 0 && args <= 3);
CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS);
CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS);
CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS);
- CHECK_ARGUMENT(args <= saveds);
CHECK_ARGUMENT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE);
+
+ types = (arg_types >> SLJIT_DEF_SHIFT);
+ arg_count = 0;
+ while (types != 0 && arg_count < 3) {
+ curr_type = (types & SLJIT_DEF_MASK);
+ CHECK_ARGUMENT(curr_type == SLJIT_ARG_TYPE_SW || curr_type == SLJIT_ARG_TYPE_UW);
+ arg_count++;
+ types >>= SLJIT_DEF_SHIFT;
+ }
+ CHECK_ARGUMENT(arg_count <= saveds && types == 0);
+
compiler->last_flags = 0;
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
- if (SLJIT_UNLIKELY(!!compiler->verbose))
- fprintf(compiler->verbose, " set_context options:none args:%d scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n",
- args, scratches, saveds, fscratches, fsaveds, local_size);
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ fprintf(compiler->verbose, " set_context options:%s args[", (options & SLJIT_F64_ALIGNMENT) ? "f64_align" : "");
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+ while (arg_types) {
+ fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_DEF_MASK]);
+ arg_types >>= SLJIT_DEF_SHIFT;
+ if (arg_types)
+ fprintf(compiler->verbose, ",");
+ }
+
+ fprintf(compiler->verbose, "] scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n",
+ scratches, saveds, fscratches, fsaveds, local_size);
+ }
#endif
CHECK_RETURN_OK;
}
@@ -1417,9 +1493,8 @@
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_I32_OP)));
CHECK_ARGUMENT((type & 0xff) != GET_FLAG_TYPE(SLJIT_SET_CARRY) && (type & 0xff) != (GET_FLAG_TYPE(SLJIT_SET_CARRY) + 1));
- CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_CALL3);
+ CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_FAST_CALL);
CHECK_ARGUMENT((type & 0xff) < SLJIT_JUMP || !(type & SLJIT_I32_OP));
- CHECK_ARGUMENT((type & 0xff) <= SLJIT_CALL0 || ((type & 0xff) - SLJIT_CALL0) <= compiler->scratches);
if ((type & 0xff) < SLJIT_JUMP) {
if ((type & 0xff) <= SLJIT_NOT_ZERO)
@@ -1439,6 +1514,63 @@
CHECK_RETURN_OK;
}
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 arg_types)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ sljit_s32 i, types, curr_type, scratches, fscratches;
+
+ CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP)));
+ CHECK_ARGUMENT((type & 0xff) == SLJIT_CALL || (type & 0xff) == SLJIT_CALL_CDECL);
+
+ types = arg_types;
+ scratches = 0;
+ fscratches = 0;
+ for (i = 0; i < 5; i++) {
+ curr_type = (types & SLJIT_DEF_MASK);
+ CHECK_ARGUMENT(curr_type <= SLJIT_ARG_TYPE_F64);
+ if (i > 0) {
+ if (curr_type == 0) {
+ break;
+ }
+ if (curr_type >= SLJIT_ARG_TYPE_F32)
+ fscratches++;
+ else
+ scratches++;
+ } else {
+ if (curr_type >= SLJIT_ARG_TYPE_F32) {
+ CHECK_ARGUMENT(compiler->fscratches > 0);
+ } else if (curr_type >= SLJIT_ARG_TYPE_SW) {
+ CHECK_ARGUMENT(compiler->scratches > 0);
+ }
+ }
+ types >>= SLJIT_DEF_SHIFT;
+ }
+ CHECK_ARGUMENT(compiler->scratches >= scratches);
+ CHECK_ARGUMENT(compiler->fscratches >= fscratches);
+ CHECK_ARGUMENT(types == 0);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ fprintf(compiler->verbose, " %s%s ret[%s", jump_names[type & 0xff],
+ !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", call_arg_names[arg_types & SLJIT_DEF_MASK]);
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+ if (arg_types) {
+ fprintf(compiler->verbose, "], args[");
+ do {
+ fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_DEF_MASK]);
+ arg_types >>= SLJIT_DEF_SHIFT;
+ if (arg_types)
+ fprintf(compiler->verbose, ",");
+ } while (arg_types);
+ }
+ fprintf(compiler->verbose, "]\n");
+ }
+#endif
+ CHECK_RETURN_OK;
+}
+
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
@@ -1488,12 +1620,9 @@
CHECK_RETURN_OK;
}
-static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 src, sljit_sw srcw)
{
-#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- compiler->last_flags = 0;
-#endif
-
if (SLJIT_UNLIKELY(compiler->skip_checks)) {
compiler->skip_checks = 0;
CHECK_RETURN_OK;
@@ -1500,8 +1629,7 @@
}
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- CHECK_ARGUMENT(type >= SLJIT_JUMP && type <= SLJIT_CALL3);
- CHECK_ARGUMENT(type <= SLJIT_CALL0 || (type - SLJIT_CALL0) <= compiler->scratches);
+ CHECK_ARGUMENT(type >= SLJIT_JUMP && type <= SLJIT_FAST_CALL);
FUNCTION_CHECK_SRC(src, srcw);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
@@ -1514,6 +1642,66 @@
CHECK_RETURN_OK;
}
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 arg_types,
+ sljit_s32 src, sljit_sw srcw)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ sljit_s32 i, types, curr_type, scratches, fscratches;
+
+ CHECK_ARGUMENT(type == SLJIT_CALL || type == SLJIT_CALL_CDECL);
+ FUNCTION_CHECK_SRC(src, srcw);
+
+ types = arg_types;
+ scratches = 0;
+ fscratches = 0;
+ for (i = 0; i < 5; i++) {
+ curr_type = (types & SLJIT_DEF_MASK);
+ CHECK_ARGUMENT(curr_type <= SLJIT_ARG_TYPE_F64);
+ if (i > 0) {
+ if (curr_type == 0) {
+ break;
+ }
+ if (curr_type >= SLJIT_ARG_TYPE_F32)
+ fscratches++;
+ else
+ scratches++;
+ } else {
+ if (curr_type >= SLJIT_ARG_TYPE_F32) {
+ CHECK_ARGUMENT(compiler->fscratches > 0);
+ } else if (curr_type >= SLJIT_ARG_TYPE_SW) {
+ CHECK_ARGUMENT(compiler->scratches > 0);
+ }
+ }
+ types >>= SLJIT_DEF_SHIFT;
+ }
+ CHECK_ARGUMENT(compiler->scratches >= scratches);
+ CHECK_ARGUMENT(compiler->fscratches >= fscratches);
+ CHECK_ARGUMENT(types == 0);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ fprintf(compiler->verbose, " i%s%s ret[%s", jump_names[type & 0xff],
+ !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", call_arg_names[arg_types & SLJIT_DEF_MASK]);
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+ if (arg_types) {
+ fprintf(compiler->verbose, "], args[");
+ do {
+ fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_DEF_MASK]);
+ arg_types >>= SLJIT_DEF_SHIFT;
+ if (arg_types)
+ fprintf(compiler->verbose, ",");
+ } while (arg_types);
+ }
+ fprintf(compiler->verbose, "], ");
+ sljit_verbose_param(compiler, src, srcw);
+ fprintf(compiler->verbose, "\n");
+ }
+#endif
+ CHECK_RETURN_OK;
+}
+
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 type)
@@ -1943,12 +2131,12 @@
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
+ sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
SLJIT_UNUSED_ARG(compiler);
SLJIT_UNUSED_ARG(options);
- SLJIT_UNUSED_ARG(args);
+ SLJIT_UNUSED_ARG(arg_types);
SLJIT_UNUSED_ARG(scratches);
SLJIT_UNUSED_ARG(saveds);
SLJIT_UNUSED_ARG(fscratches);
@@ -1959,12 +2147,12 @@
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
+ sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
SLJIT_UNUSED_ARG(compiler);
SLJIT_UNUSED_ARG(options);
- SLJIT_UNUSED_ARG(args);
+ SLJIT_UNUSED_ARG(arg_types);
SLJIT_UNUSED_ARG(scratches);
SLJIT_UNUSED_ARG(saveds);
SLJIT_UNUSED_ARG(fscratches);
@@ -2109,6 +2297,16 @@
return NULL;
}
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 arg_types)
+{
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(type);
+ SLJIT_UNUSED_ARG(arg_types);
+ SLJIT_UNREACHABLE();
+ return NULL;
+}
+
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
@@ -2161,6 +2359,19 @@
return SLJIT_ERR_UNSUPPORTED;
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 arg_types,
+ sljit_s32 src, sljit_sw srcw)
+{
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(type);
+ SLJIT_UNUSED_ARG(arg_types);
+ SLJIT_UNUSED_ARG(src);
+ SLJIT_UNUSED_ARG(srcw);
+ SLJIT_UNREACHABLE();
+ return SLJIT_ERR_UNSUPPORTED;
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 type)
Modified: code/trunk/src/sljit/sljitLir.h
===================================================================
--- code/trunk/src/sljit/sljitLir.h 2017-11-23 07:54:39 UTC (rev 884)
+++ code/trunk/src/sljit/sljitLir.h 2017-11-29 13:30:31 UTC (rev 885)
@@ -213,14 +213,6 @@
#define SLJIT_RETURN_REG SLJIT_R0
-/* x86 prefers specific registers for special purposes. In case of shift
- by register it supports only SLJIT_R2 for shift argument
- (which is the src2 argument of sljit_emit_op2). If another register is
- used, sljit must exchange data between registers which cause a minor
- slowdown. Other architectures has no such limitation. */
-
-#define SLJIT_PREF_SHIFT_REG SLJIT_R2
-
/* --------------------------------------------------------------------- */
/* Floating point registers */
/* --------------------------------------------------------------------- */
@@ -258,6 +250,79 @@
#define SLJIT_FIRST_SAVED_FLOAT_REG (SLJIT_FS0 - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS + 1)
/* --------------------------------------------------------------------- */
+/* Argument type definitions */
+/* --------------------------------------------------------------------- */
+
+/* Argument type definitions.
+ Used by SLJIT_[DEF_]ARGx and SLJIT_[DEF]_RET macros. */
+
+#define SLJIT_ARG_TYPE_VOID 0
+#define SLJIT_ARG_TYPE_SW 1
+#define SLJIT_ARG_TYPE_UW 2
+#define SLJIT_ARG_TYPE_S32 3
+#define SLJIT_ARG_TYPE_U32 4
+#define SLJIT_ARG_TYPE_F32 5
+#define SLJIT_ARG_TYPE_F64 6
+
+/* The following argument type definitions are used by sljit_emit_enter,
+ sljit_set_context, sljit_emit_call, and sljit_emit_icall functions.
+ The following return type definitions are used by sljit_emit_call
+ and sljit_emit_icall functions.
+
+ When a function is called, the first integer argument must be placed
+ in SLJIT_R0, the second in SLJIT_R1, and so on. Similarly the first
+ floating point argument must be placed in SLJIT_FR0, the second in
+ SLJIT_FR1, and so on.
+
+ Example function definition:
+ sljit_f32 SLJIT_FUNC example_c_callback(sljit_sw arg_a,
+ sljit_f64 arg_b, sljit_u32 arg_c, sljit_f32 arg_d);
+
+ Argument type definition:
+ SLJIT_DEF_RET(SLJIT_ARG_TYPE_F32)
+ | SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_SW) | SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_F64)
+ | SLJIT_DEF_ARG3(SLJIT_ARG_TYPE_U32) | SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_F32)
+
+ Short form of argument type definition:
+ SLJIT_RET(F32) | SLJIT_ARG1(SW) | SLJIT_ARG2(F64)
+ | SLJIT_ARG3(S32) | SLJIT_ARG4(F32)
+
+ Argument passing:
+ arg_a must be placed in SLJIT_R0
+ arg_c must be placed in SLJIT_R1
+ arg_b must be placed in SLJIT_FR0
+ arg_d must be placed in SLJIT_FR1
+
+Note:
+ The SLJIT_ARG_TYPE_VOID type is only supported by
+ SLJIT_DEF_RET, and SLJIT_ARG_TYPE_VOID is also the
+ default value when SLJIT_DEF_RET is not specified. */
+#define SLJIT_DEF_SHIFT 4
+#define SLJIT_DEF_RET(type) (type)
+#define SLJIT_DEF_ARG1(type) ((type) << SLJIT_DEF_SHIFT)
+#define SLJIT_DEF_ARG2(type) ((type) << (2 * SLJIT_DEF_SHIFT))
+#define SLJIT_DEF_ARG3(type) ((type) << (3 * SLJIT_DEF_SHIFT))
+#define SLJIT_DEF_ARG4(type) ((type) << (4 * SLJIT_DEF_SHIFT))
+
+/* Short form of the macros above.
+
+ For example the following definition:
+ SLJIT_DEF_RET(SLJIT_ARG_TYPE_SW) | SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_F32)
+
+ can be shortened to:
+ SLJIT_RET(SW) | SLJIT_ARG1(F32)
+
+Note:
+ The VOID type is only supported by SLJIT_RET, and
+ VOID is also the default value when SLJIT_RET is
+ not specified. */
+#define SLJIT_RET(type) SLJIT_DEF_RET(SLJIT_ARG_TYPE_ ## type)
+#define SLJIT_ARG1(type) SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_ ## type)
+#define SLJIT_ARG2(type) SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_ ## type)
+#define SLJIT_ARG3(type) SLJIT_DEF_ARG3(SLJIT_ARG_TYPE_ ## type)
+#define SLJIT_ARG4(type) SLJIT_DEF_ARG4(SLJIT_ARG_TYPE_ ## type)
+
+/* --------------------------------------------------------------------- */
/* Main structures and functions */
/* --------------------------------------------------------------------- */
@@ -331,6 +396,7 @@
sljit_s32 args;
sljit_s32 locals_offset;
sljit_s32 saveds_offset;
+ sljit_s32 stack_tmp_size;
#endif
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@@ -505,8 +571,6 @@
#define SLJIT_HAS_CLZ 3
/* [Emulated] Conditional move is supported. */
#define SLJIT_HAS_CMOV 4
-/* [Limitation] [Emulated] Shifting with register is limited to SLJIT_PREF_SHIFT_REG. */
-#define SLJIT_HAS_PREF_SHIFT_REG 5
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
/* [Not emulated] SSE2 support is available on x86. */
@@ -519,10 +583,10 @@
error, they return with SLJIT_SUCCESS. */
/*
- The executable code is a function call from the viewpoint of the C
+ The executable code is a function from the viewpoint of the C
language. The function calls must obey to the ABI (Application
Binary Interface) of the platform, which specify the purpose of
- all machine registers and stack handling among other things. The
+ machine registers and stack handling among other things. The
sljit_emit_enter function emits the necessary instructions for
setting up a new context for the executable code and moves function
arguments to the saved registers. Furthermore the options argument
@@ -529,17 +593,18 @@
can be used to pass configuration options to the compiler. The
available options are listed before sljit_emit_enter.
- The number of sljit_sw arguments passed to the generated function
- are specified in the "args" parameter. The number of arguments must
- be less than or equal to 3. The first argument goes to SLJIT_S0,
- the second goes to SLJIT_S1 and so on. The register set used by
- the function must be declared as well. The number of scratch and
- saved registers used by the function must be passed to sljit_emit_enter.
- Only R registers between R0 and "scratches" argument can be used
- later. E.g. if "scratches" is set to 2, the register set will be
- limited to R0 and R1. The S registers and the floating point
+ The function argument list is the combination of SLJIT_ARGx
+ (SLJIT_DEF_ARG1) macros. Currently maximum 3 SW / UW
+ (SLJIT_ARG_TYPE_SW / LJIT_ARG_TYPE_UW) arguments are supported.
+ The first argument goes to SLJIT_S0, the second goes to SLJIT_S1
+ and so on. The register set used by the function must be declared
+ as well. The number of scratch and saved registers used by the
+ function must be passed to sljit_emit_enter. Only R registers
+ between R0 and "scratches" argument can be used later. E.g. if
+ "scratches" is set to 2, the scratch register set will be limited
+ to SLJIT_R0 and SLJIT_R1. The S registers and the floating point
registers ("fscratches" and "fsaveds") are specified in a similar
- way. The sljit_emit_enter is also capable of allocating a stack
+ manner. The sljit_emit_enter is also capable of allocating a stack
space for local variables. The "local_size" argument contains the
size in bytes of this local area and its staring address is stored
in SLJIT_SP. The memory area between SLJIT_SP (inclusive) and
@@ -566,7 +631,7 @@
#define SLJIT_MAX_LOCAL_SIZE 65536
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
+ sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size);
/* The machine code has a context (which contains the local stack space size,
@@ -580,7 +645,7 @@
the previous context. */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
+ sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size);
/* Return from machine code. The op argument can be SLJIT_UNUSED which means the
@@ -1136,25 +1201,32 @@
/* Unconditional jump types. */
#define SLJIT_JUMP 24
+ /* Fast calling method. See sljit_emit_fast_enter / sljit_emit_fast_return. */
#define SLJIT_FAST_CALL 25
-#define SLJIT_CALL0 26
-#define SLJIT_CALL1 27
-#define SLJIT_CALL2 28
-#define SLJIT_CALL3 29
+ /* Called function must be declared with the SLJIT_FUNC attribute. */
+#define SLJIT_CALL 26
+ /* Called function must be decalred with cdecl attribute.
+ This is the default attribute for C functions. */
+#define SLJIT_CALL_CDECL 27
-/* Fast calling method. See sljit_emit_fast_enter / sljit_emit_fast_return. */
-
/* The target can be changed during runtime (see: sljit_set_jump_addr). */
#define SLJIT_REWRITABLE_JUMP 0x1000
/* Emit a jump instruction. The destination is not set, only the type of the jump.
- type must be between SLJIT_EQUAL and SLJIT_CALL3
+ type must be between SLJIT_EQUAL and SLJIT_FAST_CALL
type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP
- Flags: does not modify flags for conditional and unconditional
- jumps but destroy all flags for calls. */
+ Flags: does not modify flags. */
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type);
+/* Emit a C compiler (ABI) compatible function call.
+ type must be SLJIT_CALL or SLJIT_CALL_CDECL
+ type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP
+ arg_types is the combination of SLJIT_RET / SLJIT_ARGx (SLJIT_DEF_RET / SLJIT_DEF_ARGx) macros
+
+ Flags: destroy all flags. */
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types);
+
/* Basic arithmetic comparison. In most architectures it is implemented as
an SLJIT_SUB operation (with SLJIT_UNUSED destination and setting
appropriate flags) followed by a sljit_emit_jump. However some
@@ -1162,6 +1234,7 @@
It is suggested to use this comparison form when appropriate.
type must be between SLJIT_EQUAL and SLJIT_I_SIG_LESS_EQUAL
type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP
+
Flags: may destroy flags. */
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 src1, sljit_sw src1w,
@@ -1186,15 +1259,23 @@
/* Set the destination address of the jump to this label. */
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target);
-/* Call function or jump anywhere. Both direct and indirect form
- type must be between SLJIT_JUMP and SLJIT_CALL3
- Direct form: set src to SLJIT_IMM() and srcw to the address
- Indirect form: any other valid addressing mode
+/* Emit an indirect jump or fast call. Both direct and indirect form
+ Direct form: set src to SLJIT_IMM() and srcw to the address
+ Indirect form: any other valid addressing mode
+ type must be between SLJIT_JUMP and SLJIT_FAST_CALL
- Flags: does not modify flags for unconditional jumps but
- destroy all flags for calls. */
+ Flags: does not modify flags. */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw);
+/* Emit a C compiler (ABI) compatible function call.
+ Direct form: set src to SLJIT_IMM() and srcw to the address
+ Indirect form: any other valid addressing mode
+ type must be SLJIT_CALL or SLJIT_CALL_CDECL
+ arg_types is the combination of SLJIT_RET / SLJIT_ARGx (SLJIT_DEF_RET / SLJIT_DEF_ARGx) macros
+
+ Flags: destroy all flags. */
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw);
+
/* Perform the operation using the conditional flags as the second argument.
Type must always be between SLJIT_EQUAL and SLJIT_ORDERED_F64. The value
represented by the type is 1, if the condition represented by the type
@@ -1270,8 +1351,8 @@
#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK)
/* This global lock is useful to compile common functions. */
-SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void);
-SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void);
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_grab_lock(void);
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_release_lock(void);
#endif
#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK)
@@ -1312,8 +1393,8 @@
Note: limit contains the starting stack size in bytes.
Note: the top field is initialized to base.
Note: see sljit_create_compiler for the explanation of allocator_data. */
-SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit, void *allocator_data);
-SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack *stack, void *allocator_data);
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit, void *allocator_data);
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data);
/* Can be used to increase (allocate) or decrease (free) the memory area.
Returns with a non-zero value if unsuccessful. If new_limit is greater than
@@ -1321,7 +1402,7 @@
since the growth ratio can be added to the current limit, and sljit_stack_resize
will do all the necessary checks. The fields of the stack are not changed if
sljit_stack_resize fails. */
-SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_limit);
+SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_FUNC sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_limit);
#endif /* (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) */
Modified: code/trunk/src/sljit/sljitNativeARM_32.c
===================================================================
--- code/trunk/src/sljit/sljitNativeARM_32.c 2017-11-23 07:54:39 UTC (rev 884)
+++ code/trunk/src/sljit/sljitNativeARM_32.c 2017-11-29 13:30:31 UTC (rev 885)
@@ -24,12 +24,18 @@
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#ifdef __SOFTFP__
+#define ARM_ABI_INFO " ABI:softfp"
+#else
+#define ARM_ABI_INFO " ABI:hardfp"
+#endif
+
SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
{
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
- return "ARMv7" SLJIT_CPUINFO;
+ return "ARMv7" SLJIT_CPUINFO ARM_ABI_INFO;
#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
- return "ARMv5" SLJIT_CPUINFO;
+ return "ARMv5" SLJIT_CPUINFO ARM_ABI_INFO;
#else
#error "Internal error: Unknown ARM architecture"
#endif
@@ -40,8 +46,8 @@
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4)
-#define TMP_FREG1 (0)
-#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
+#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
+#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
/* In ARM instruction words.
Cache lines are usually 32 byte aligned. */
@@ -55,9 +61,13 @@
/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
- 0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 14, 12, 15
+ 0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15
};
+static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
+ 0, 0, 1, 2, 3, 4, 5, 6, 7
+};
+
#define RM(rm) (reg_map[rm])
#define RD(rd) (reg_map[rd] << 12)
#define RN(rn) (reg_map[rn] << 16)
@@ -72,32 +82,31 @@
#define CONDITIONAL 0xe0000000
#define PUSH_POOL 0xff000000
-/* DP - Data Processing instruction (use with EMIT_DATA_PROCESS_INS). */
-#define ADC_DP 0x5
-#define ADD_DP 0x4
-#define AND_DP 0x0
+#define ADC 0xe0a00000
+#define ADD 0xe0800000
+#define AND 0xe0000000
#define B 0xea000000
-#define BIC_DP 0xe
+#define BIC 0xe1c00000
#define BL 0xeb000000
#define BLX 0xe12fff30
#define BX 0xe12fff10
#define CLZ 0xe16f0f10
-#define CMN_DP 0xb
-#define CMP_DP 0xa
+#define CMN 0xe1600000
+#define CMP 0xe1400000
#define BKPT 0xe1200070
-#define EOR_DP 0x1
-#define MOV_DP 0xd
+#define EOR 0xe0200000
+#define MOV 0xe1a00000
#define MUL 0xe0000090
-#define MVN_DP 0xf
+#define MVN 0xe1e00000
#define NOP 0xe1a00000
-#define ORR_DP 0xc
+#define ORR 0xe1800000
#define PUSH 0xe92d0000
#define POP 0xe8bd0000
-#define RSB_DP 0x3
-#define RSC_DP 0x7
-#define SBC_DP 0x6
+#define RSB 0xe0600000
+#define RSC 0xe0e00000
+#define SBC 0xe0c00000
#define SMULL 0xe0c00090
-#define SUB_DP 0x2
+#define SUB 0xe0400000
#define UMULL 0xe0800090
#define VABS_F32 0xeeb00ac0
#define VADD_F32 0xee300a00
@@ -108,6 +117,7 @@
#define VDIV_F32 0xee800a00
#define VMOV_F32 0xeeb00a40
#define VMOV 0xee000a10
+#define VMOV2 0xec400a10
#define VMRS 0xeef1fa10
#define VMUL_F32 0xee200a00
#define VNEG_F32 0xeeb10a40
@@ -260,7 +270,9 @@
{
/* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */
SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092));
- return push_inst(compiler, BLX | RM(TMP_REG2));
+ SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
+
+ return push_inst(compiler, BLX | RM(TMP_REG1));
}
static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size)
@@ -889,10 +901,6 @@
#define TYPE2_TRANSFER_IMM(imm) \
(((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22))
-/* Condition: AL. */
-#define EMIT_DATA_PROCESS_INS(opcode, set_flags, dst, src1, src2) \
- (0xe0000000 | ((opcode) << 21) | (set_flags) | RD(dst) | RN(src1) | (src2))
-
static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src1, sljit_sw src1w,
@@ -899,15 +907,15 @@
sljit_s32 src2, sljit_sw src2w);
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
+ sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
- sljit_s32 size, i, tmp;
+ sljit_s32 args, size, i, tmp;
sljit_uw push;
CHECK_ERROR();
- CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
- set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+ CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
+ set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
/* Push saved registers, temporary registers
stmdb sp!, {..., lr} */
@@ -929,25 +937,27 @@
if (local_size > 0)
FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
+ args = get_arg_count(arg_types);
+
if (args >= 1)
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S0, SLJIT_UNUSED, RM(SLJIT_R0))));
+ FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0) | RM(SLJIT_R0)));
if (args >= 2)
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S1, SLJIT_UNUSED, RM(SLJIT_R1))));
+ FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S1) | RM(SLJIT_R1)));
if (args >= 3)
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S2, SLJIT_UNUSED, RM(SLJIT_R2))));
+ FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S2) | RM(SLJIT_R2)));
return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
+ sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
sljit_s32 size;
CHECK_ERROR();
- CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
- set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+ CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
+ set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
compiler->local_size = ((size + local_size + 7) & ~7) - size;
@@ -1009,12 +1019,12 @@
SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); \
\
if (compiler->shift_imm != 0) \
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, \
- dst, SLJIT_UNUSED, (compiler->shift_imm << 7) | (opcode << 5) | RM(src2))); \
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, RM(src2))); \
+ return push_inst(compiler, MOV | (flags & SET_FLAGS) | \
+ RD(dst) | (compiler->shift_imm << 7) | (opcode << 5) | RM(src2)); \
+ return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | RM(src2)); \
} \
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, \
- dst, SLJIT_UNUSED, (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | RM((flags & ARGS_SWAPPED) ? src2 : src1)));
+ return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | \
+ (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | RM((flags & ARGS_SWAPPED) ? src2 : src1));
static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
sljit_s32 dst, sljit_s32 src1, sljit_s32 src2)
@@ -1024,10 +1034,9 @@
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
if (dst != src2) {
if (src2 & SRC2_IMM) {
- return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MVN_DP : MOV_DP, 0,
- dst, SLJIT_UNUSED, src2));
+ return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
}
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(src2)));
+ return push_inst(compiler, MOV | RD(dst) | RM(src2));
}
return SLJIT_SUCCESS;
@@ -1037,9 +1046,9 @@
if (flags & MOVE_REG_CONV) {
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
if (op == SLJIT_MOV_U8)
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff));
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | RM(src2))));
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_U8 ? 0x20 : 0x40) | RM(dst)));
+ return push_inst(compiler, AND | RD(dst) | RN(src2) | SRC2_IMM | 0xff);
+ FAIL_IF(push_inst(compiler, MOV | RD(dst) | (24 << 7) | RM(src2)));
+ return push_inst(compiler, MOV | RD(dst) | (24 << 7) | (op == SLJIT_MOV_U8 ? 0x20 : 0x40) | RM(dst));
#else
return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2));
#endif
@@ -1046,8 +1055,7 @@
}
else if (dst != src2) {
SLJIT_ASSERT(src2 & SRC2_IMM);
- return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MVN_DP : MOV_DP, 0,
- dst, SLJIT_UNUSED, src2));
+ return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
}
return SLJIT_SUCCESS;
@@ -1056,8 +1064,8 @@
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
if (flags & MOVE_REG_CONV) {
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | RM(src2))));
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_U16 ? 0x20 : 0x40) | RM(dst)));
+ FAIL_IF(push_inst(compiler, MOV | RD(dst) | (16 << 7) | RM(src2)));
+ return push_inst(compiler, MOV | RD(dst) | (16 << 7) | (op == SLJIT_MOV_U16 ? 0x20 : 0x40) | RM(dst));
#else
return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2));
#endif
@@ -1064,17 +1072,15 @@
}
else if (dst != src2) {
SLJIT_ASSERT(src2 & SRC2_IMM);
- return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MVN_DP : MOV_DP, 0,
- dst, SLJIT_UNUSED, src2));
+ return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
}
return SLJIT_SUCCESS;
case SLJIT_NOT:
if (src2 & SRC2_IMM) {
- return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MOV_DP : MVN_DP, flags & SET_FLAGS,
- dst, SLJIT_UNUSED, src2));
+ return push_inst(compiler, ((flags & INV_IMM) ? MOV : MVN) | (flags & SET_FLAGS) | RD(dst) | src2);
}
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, RM(src2)));
+ return push_inst(compiler, MVN | (flags & SET_FLAGS) | RD(dst) | RM(src2));
case SLJIT_CLZ:
SLJIT_ASSERT(!(flags & INV_IMM));
@@ -1085,28 +1091,24 @@
case SLJIT_ADD:
SLJIT_ASSERT(!(flags & INV_IMM));
if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED))
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMN_DP, SET_FLAGS,
- SLJIT_UNUSED, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, flags & SET_FLAGS,
- dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
+ return push_inst(compiler, CMN | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
+ return push_inst(compiler, ADD | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
case SLJIT_ADDC:
SLJIT_ASSERT(!(flags & INV_IMM));
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADC_DP, flags & SET_FLAGS,
- dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
+ return push_inst(compiler, ADC | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
case SLJIT_SUB:
SLJIT_ASSERT(!(flags & INV_IMM));
if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED))
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, SET_FLAGS,
- SLJIT_UNUSED, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(!(flags & ARGS_SWAPPED) ? SUB_DP : RSB_DP, flags & SET_FLAGS,
- dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
+ return push_inst(compiler, CMP | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
+ return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SUB : RSB) | (flags & SET_FLAGS)
+ | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
case SLJIT_SUBC:
SLJIT_ASSERT(!(flags & INV_IMM));
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(!(flags & ARGS_SWAPPED) ? SBC_DP : RSC_DP, flags & SET_FLAGS,
- dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
+ return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SBC : RSC) | (flags & SET_FLAGS)
+ | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
case SLJIT_MUL:
SLJIT_ASSERT(!(flags & INV_IMM));
@@ -1118,19 +1120,19 @@
FAIL_IF(push_inst(compiler, SMULL | (reg_map[TMP_REG1] << 16) | (reg_map[dst] << 12) | (reg_map[src2] << 8) | reg_map[src1]));
/* cmp TMP_REG1, dst asr #31. */
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, SET_FLAGS, SLJIT_UNUSED, TMP_REG1, RM(dst) | 0xfc0));
+ return push_inst(compiler, CMP | SET_FLAGS | RN(TMP_REG1) | RM(dst) | 0xfc0);
case SLJIT_AND:
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(!(flags & INV_IMM) ? AND_DP : BIC_DP, flags & SET_FLAGS,
- dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
+ return push_inst(compiler, (!(flags & INV_IMM) ? AND : BIC) | (flags & SET_FLAGS)
+ | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
case SLJIT_OR:
SLJIT_ASSERT(!(flags & INV_IMM));
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(ORR_DP, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
+ return push_inst(compiler, ORR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
case SLJIT_XOR:
SLJIT_ASSERT(!(flags & INV_IMM));
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(EOR_DP, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
+ return push_inst(compiler, EOR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
case SLJIT_SHL:
EMIT_SHIFT_INS_AND_RETURN(0);
@@ -1293,8 +1295,8 @@
return 0;
}
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? MOV_DP : MVN_DP, 0, reg, SLJIT_UNUSED, imm1)));
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? ORR_DP : BIC_DP, 0, reg, reg, imm2)));
+ FAIL_IF(push_inst(compiler, (positive ? MOV : MVN) | RD(reg) | imm1));
+ FAIL_IF(push_inst(compiler, (positive ? ORR : BIC) | RD(reg) | RN(reg) | imm2));
return 1;
}
#endif
@@ -1311,11 +1313,11 @@
/* Create imm by 1 inst. */
tmp = get_imm(imm);
if (tmp)
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, tmp));
+ return push_inst(compiler, MOV | RD(reg) | tmp);
tmp = get_imm(~imm);
if (tmp)
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, tmp));
+ return push_inst(compiler, MVN | RD(reg) | tmp);
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
/* Create imm by 2 inst. */
@@ -1365,7 +1367,7 @@
if (argw != 0 && !is_type1_transfer) {
SLJIT_ASSERT(!(flags & WRITE_BACK));
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_reg, arg, RM(offset_reg) | (argw << 7))));
+ FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | (argw << 7)));
return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, 0, reg, tmp_reg, TYPE2_TRANSFER_IMM(0)));
}
@@ -1381,7 +1383,7 @@
imm = get_imm(argw & ~0xfff);
if (imm) {
offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg;
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, offset_reg, arg, imm)));
+ FAIL_IF(push_inst(compiler, ADD | RD(offset_reg) | RN(arg) | imm));
argw = argw & 0xfff;
arg = offset_reg;
}
@@ -1390,7 +1392,7 @@
imm = get_imm(-argw & ~0xfff);
if (imm) {
offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg;
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, offset_reg, arg, imm)));
+ FAIL_IF(push_inst(compiler, SUB | RD(offset_reg) | RN(arg) | imm));
argw = -(-argw & 0xfff);
arg = offset_reg;
}
@@ -1408,7 +1410,7 @@
imm = get_imm(argw & ~0xff);
if (imm) {
offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg;
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, offset_reg, arg, imm)));
+ FAIL_IF(push_inst(compiler, ADD | RD(offset_reg) | RN(arg) | imm));
argw = argw & 0xff;
arg = offset_reg;
}
@@ -1417,7 +1419,7 @@
imm = get_imm(-argw & ~0xff);
if (imm) {
offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg;
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, offset_reg, arg, imm)));
+ FAIL_IF(push_inst(compiler, SUB | RD(offset_reg) | RN(arg) | imm));
argw = -(-argw & 0xff);
arg = offset_reg;
}
@@ -1785,7 +1787,7 @@
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
{
CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
- return reg << 1;
+ return (freg_map[reg] << 1);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
@@ -1804,9 +1806,9 @@
#define FPU_LOAD (1 << 20)
#define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \
- ((inst) | ((add) << 23) | (reg_map[base] << 16) | (freg << 12) | (offs))
+ ((inst) | ((add) << 23) | (reg_map[base] << 16) | (freg_map[freg] << 12) | (offs))
#define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \
- ((opcode) | (mode) | ((dst) << 12) | (src1) | ((src2) << 16))
+ ((opcode) | (mode) | (freg_map[dst] << 12) | freg_map[src1] | (freg_map[src2] << 16))
static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
{
@@ -1817,7 +1819,7 @@
arg &= ~SLJIT_MEM;
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG2, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))));
+ FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)));
arg = TMP_REG2;
argw = 0;
}
@@ -1831,13 +1833,13 @@
imm = get_imm(argw & ~0x3fc);
if (imm) {
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG2, arg & REG_MASK, imm)));
+ FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | imm));
return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, (argw & 0x3fc) >> 2));
}
imm = get_imm(-argw & ~0x3fc);
if (imm) {
argw = -argw;
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG2, arg & REG_MASK, imm)));
+ FAIL_IF(push_inst(compiler, SUB | RD(TMP_REG2) | RN(arg & REG_MASK) | imm));
return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG2, reg, (argw & 0x3fc) >> 2));
}
}
@@ -1844,7 +1846,7 @@
if (arg) {
FAIL_IF(load_immediate(compiler, TMP_REG2, argw));
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG2, arg & REG_MASK, RM(TMP_REG2))));
+ FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(TMP_REG2)));
}
else
FAIL_IF(load_immediate(compiler, TMP_REG2, argw));
@@ -1866,7 +1868,7 @@
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_F32_OP, TMP_FREG1, src, 0)));
if (FAST_IS_REG(dst))
- return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (TMP_FREG1 << 16));
+ return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (freg_map[TMP_FREG1] << 16));
/* Store the integer value from a VFP register. */
return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
@@ -1881,7 +1883,7 @@
op ^= SLJIT_F32_OP;
if (FAST_IS_REG(src))
- FAIL_IF(push_inst(compiler, VMOV | RD(src) | (TMP_FREG1 << 16)));
+ FAIL_IF(push_inst(compiler, VMOV | RD(src) | (freg_map[TMP_FREG1] << 16)));
else if (src & SLJIT_MEM) {
/* Load the integer value into a VFP register. */
FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
@@ -1888,7 +1890,7 @@
}
else {
FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
- FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | (TMP_FREG1 << 16)));
+ FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | (freg_map[TMP_FREG1] << 16)));
}
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_F32_OP, dst_r, TMP_FREG1, 0)));
@@ -2018,7 +2020,6 @@
#undef FPU_LOAD
#undef EMIT_FPU_DATA_TRANSFER
-#undef EMIT_FPU_OPERATION
/* --------------------------------------------------------------------- */
/* Other instructions */
@@ -2030,13 +2031,13 @@
CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
ADJUST_LOCAL_OFFSET(dst, dstw);
- SLJIT_ASSERT(reg_map[TMP_REG1] == 14);
+ SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
if (FAST_IS_REG(dst))
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(TMP_REG1)));
+ return push_inst(compiler, MOV | RD(dst) | RM(TMP_REG2));
/* Memory. */
- return emit_op_mem(compiler, WORD_DATA, TMP_REG1, dst, dstw, TMP_REG2);
+ return emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw, TMP_REG1);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
@@ -2045,16 +2046,16 @@
CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
ADJUST_LOCAL_OFFSET(src, srcw);
- SLJIT_ASSERT(reg_map[TMP_REG1] == 14);
+ SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
if (FAST_IS_REG(src))
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, 0, RM(src))));
+ FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src)));
else if (src & SLJIT_MEM)
- FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG2));
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG1));
else if (src & SLJIT_IMM)
- FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+ FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
- return push_inst(compiler, BX | RM(TMP_REG1));
+ return push_inst(compiler, BX | RM(TMP_REG2));
}
/* --------------------------------------------------------------------- */
@@ -2111,7 +2112,7 @@
return 0x70000000;
default:
- SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3);
+ SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_CDECL);
return 0xe0000000;
}
}
@@ -2144,12 +2145,13 @@
set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
type &= 0xff;
- /* In ARM, we don't need to touch the arguments. */
+ SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
+
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
if (type >= SLJIT_FAST_CALL)
PTR_FAIL_IF(prepare_blx(compiler));
PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0,
- type <= SLJIT_JUMP ? TMP_PC : TMP_REG2, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0));
+ type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0));
if (jump->flags & SLJIT_REWRITABLE_JUMP) {
jump->addr = compiler->size;
@@ -2166,13 +2168,248 @@
#else
if (type >= SLJIT_FAST_CALL)
jump->flags |= IS_BL;
- PTR_FAIL_IF(emit_imm(compiler, TMP_REG2, 0));
- PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2)) & ~COND_MASK) | get_cc(type)));
+ PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
+ PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(type)));
jump->addr = compiler->size;
#endif
return jump;
}
+#ifdef __SOFTFP__
+
+static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src)
+{
+ sljit_s32 stack_offset = 0;
+ sljit_s32 arg_count = 0;
+ sljit_s32 word_arg_offset = 0;
+ sljit_s32 float_arg_count = 0;
+ sljit_s32 types = 0;
+ sljit_s32 src_offset = 4 * sizeof(sljit_sw);
+ sljit_u8 offsets[4];
+
+ if (src && FAST_IS_REG(*src))
+ src_offset = reg_map[*src] * sizeof(sljit_sw);
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+
+ while (arg_types) {
+ types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK);
+
+ switch (arg_types & SLJIT_DEF_MASK) {
+ case SLJIT_ARG_TYPE_F32:
+ offsets[arg_count] = (sljit_u8)stack_offset;
+ stack_offset += sizeof(sljit_f32);
+ arg_count++;
+ float_arg_count++;
+ break;
+ case SLJIT_ARG_TYPE_F64:
+ if (stack_offset & 0x7)
+ stack_offset += sizeof(sljit_sw);
+ offsets[arg_count] = (sljit_u8)stack_offset;
+ stack_offset += sizeof(sljit_f64);
+ arg_count++;
+ float_arg_count++;
+ break;
+ default:
+ offsets[arg_count] = (sljit_u8)stack_offset;
+ stack_offset += sizeof(sljit_sw);
+ arg_count++;
+ word_arg_offset += sizeof(sljit_sw);
+ break;
+ }
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+ }
+
+ if (stack_offset > 16)
+ FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | (((stack_offset - 16) + 0x7) & ~0x7)));
+
+ /* Process arguments in reversed direction. */
+ while (types) {
+ switch (types & SLJIT_DEF_MASK) {
+ case SLJIT_ARG_TYPE_F32:
+ arg_count--;
+ float_arg_count--;
+ stack_offset = offsets[arg_count];
+
+ if (stack_offset < 16) {
+ if (src_offset == stack_offset) {
+ FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
+ *src = TMP_REG1;
+ }
+ FAIL_IF(push_inst(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (stack_offset << 10)));
+ } else
+ FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800000 | RN(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2)));
+ break;
+ case SLJIT_ARG_TYPE_F64:
+ arg_count--;
+ float_arg_count--;
+ stack_offset = offsets[arg_count];
+
+ SLJIT_ASSERT((stack_offset & 0x7) == 0);
+
+ if (stack_offset < 16) {
+ if (src_offset == stack_offset || src_offset == stack_offset + sizeof(sljit_sw)) {
+ FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
+ *src = TMP_REG1;
+ }
+ FAIL_IF(push_inst(compiler, VMOV2 | 0x100000 | (stack_offset << 10) | ((stack_offset + sizeof(sljit_sw)) << 14) | float_arg_count));
+ } else
+ FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800100 | RN(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2)));
+ break;
+ default:
+ arg_count--;
+ word_arg_offset -= sizeof(sljit_sw);
+ stack_offset = offsets[arg_count];
+
+ SLJIT_ASSERT(stack_offset >= word_arg_offset);
+
+ if (stack_offset != word_arg_offset) {
+ if (stack_offset < 16) {
+ if (src_offset == stack_offset) {
+ FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
+ *src = TMP_REG1;
+ }
+ else if (src_offset == word_arg_offset) {
+ *src = 1 + (stack_offset >> 2);
+ src_offset = stack_offset;
+ }
+ FAIL_IF(push_inst(compiler, MOV | (stack_offset << 10) | (word_arg_offset >> 2)));
+ } else
+ FAIL_IF(push_inst(compiler, data_transfer_insts[WORD_DATA] | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (stack_offset - 16)));
+ }
+ break;
+ }
+
+ types >>= SLJIT_DEF_SHIFT;
+ }
+
+ return SLJIT_SUCCESS;
+}
+
+static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
+{
+ sljit_s32 stack_size = 0;
+
+ if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32)
+ FAIL_IF(push_inst(compiler, VMOV | (0 << 16) | (0 << 12)));
+ if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64)
+ FAIL_IF(push_inst(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0));
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+
+ while (arg_types) {
+ switch (arg_types & SLJIT_DEF_MASK) {
+ case SLJIT_ARG_TYPE_F32:
+ stack_size += sizeof(sljit_f32);
+ break;
+ case SLJIT_ARG_TYPE_F64:
+ if (stack_size & 0x7)
+ stack_size += sizeof(sljit_sw);
+ stack_size += sizeof(sljit_f64);
+ break;
+ default:
+ stack_size += sizeof(sljit_sw);
+ break;
+ }
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+ }
+
+ if (stack_size <= 16)
+ return SLJIT_SUCCESS;
+
+ return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | (((stack_size - 16) + 0x7) & ~0x7));
+}
+
+#else /* !__SOFTFP__ */
+
+static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
+{
+ sljit_u32 remap = 0;
+ sljit_u32 offset = 0;
+ sljit_u32 new_offset, mask;
+
+ /* Remove return value. */
+ arg_types >>= SLJIT_DEF_SHIFT;
+
+ while (arg_types) {
+ if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32) {
+ new_offset = 0;
+ mask = 1;
+
+ while (remap & mask) {
+ new_offset++;
+ mask <<= 1;
+ }
+ remap |= mask;
+
+ if (offset != new_offset)
+ FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
+ 0, (new_offset >> 1) + 1, (offset >> 1) + 1, 0) | ((new_offset & 0x1) ? 0x400000 : 0)));
+
+ offset += 2;
+ }
+ else if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64) {
+ new_offset = 0;
+ mask = 3;
+
+ while (remap & mask) {
+ new_offset += 2;
+ mask <<= 2;
+ }
+ remap |= mask;
+
+ if (offset != new_offset)
+ FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, SLJIT_F32_OP, (new_offset >> 1) + 1, (offset >> 1) + 1, 0)));
+
+ offset += 2;
+ }
+ arg_types >>= SLJIT_DEF_SHIFT;
+ }
+
+ return SLJIT_SUCCESS;
+}
+
+#endif /* __SOFTFP__ */
+
+#undef EMIT_FPU_OPERATION
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 arg_types)
+{
+#ifdef __SOFTFP__
+ struct sljit_jump *jump;
+#endif
+
+ CHECK_ERROR_PTR();
+ CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
+
+#ifdef __SOFTFP__
+ PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL));
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->skip_checks = 1;
+#endif
+
+ jump = sljit_emit_jump(compiler, type);
+ PTR_FAIL_IF(jump == NULL);
+
+ PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types));
+ return jump;
+#else /* !__SOFTFP__ */
+ PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->skip_checks = 1;
+#endif
+
+ return sljit_emit_jump(compiler, type);
+#endif /* __SOFTFP__ */
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
{
struct sljit_jump *jump;
@@ -2181,16 +2418,20 @@
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
ADJUST_LOCAL_OFFSET(src, srcw);
- /* In ARM, we don't need to touch the arguments. */
+ SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
+
if (!(src & SLJIT_IMM)) {
- if (FAST_IS_REG(src))
+ if (FAST_IS_REG(src)) {
+ SLJIT_ASSERT(reg_map[src] != 14);
return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src));
+ }
SLJIT_ASSERT(src & SLJIT_MEM);
- FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG2));
- return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2));
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
+ return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1));
}
+ /* These jumps are converted to jump/call instructions when possible. */
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
FAIL_IF(!jump);
set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
@@ -2199,22 +2440,57 @@
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
if (type >= SLJIT_FAST_CALL)
FAIL_IF(prepare_blx(compiler));
- FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, type <= SLJIT_JUMP ? TMP_PC : TMP_REG2, TMP_PC, 0), 0));
+ FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0));
if (type >= SLJIT_FAST_CALL)
FAIL_IF(emit_blx(compiler));
#else
- FAIL_IF(emit_imm(compiler, TMP_REG2, 0));
- FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2)));
+ FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
+ FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)));
#endif
jump->addr = compiler->size;
return SLJIT_SUCCESS;
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 arg_types,
+ sljit_s32 src, sljit_sw srcw)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
+
+#ifdef __SOFTFP__
+ if (src & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
+ src = TMP_REG1;
+ }
+
+ FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src));
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->skip_checks = 1;
+#endif
+
+ FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
+
+ return softfloat_post_call_with_args(compiler, arg_types);
+#else /* !__SOFTFP__ */
+ FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->skip_checks = 1;
+#endif
+
+ return sljit_emit_ijump(compiler, type, src, srcw);
+#endif /* __SOFTFP__ */
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 type)
{
- sljit_s32 dst_r, flags = GET_ALL_FLAGS(op);
+ sljit_s32 dst_reg, flags = GET_ALL_FLAGS(op);
sljit_uw cc, ins;
CHECK_ERROR();
@@ -2223,31 +2499,31 @@
op = GET_OPCODE(op);
cc = get_cc(type & 0xff);
- dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+ dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
if (op < SLJIT_ADD) {
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 0)));
- FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc));
+ FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | SRC2_IMM | 0));
+ FAIL_IF(push_inst(compiler, ((MOV | RD(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc));
if (dst & SLJIT_MEM)
return emit_op_mem(compiler, WORD_DATA, TMP_REG1, dst, dstw, TMP_REG2);
return SLJIT_SUCCESS;
}
- ins = (op == SLJIT_AND ? AND_DP : (op == SLJIT_OR ? ORR_DP : EOR_DP));
+ ins = (op == SLJIT_AND ? AND : (op == SLJIT_OR ? ORR : EOR));
if (dst & SLJIT_MEM)
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG2));
- FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, dst_r, SRC2_IMM | 1) & ~COND_MASK) | cc));
+ FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc));
if (op == SLJIT_AND)
- FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, dst_r, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
+ FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
if (dst & SLJIT_MEM)
FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG1, dst, dstw, TMP_REG2));
if (flags & SLJIT_SET_Z)
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG2, SLJIT_UNUSED, RM(dst_r)));
+ return push_inst(compiler, MOV | SET_FLAGS | RD(TMP_REG2) | RM(dst_reg));
return SLJIT_SUCCESS;
}
@@ -2267,11 +2543,11 @@
if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
tmp = get_imm(srcw);
if (tmp)
- return push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_reg, SLJIT_UNUSED, tmp) & ~COND_MASK) | cc);
+ return push_inst(compiler, ((MOV | RD(dst_reg) | tmp) & ~COND_MASK) | cc);
tmp = get_imm(~srcw);
if (tmp)
- return push_inst(compiler, (EMIT_DATA_PROCESS_INS(MVN_DP, 0, dst_reg, SLJIT_UNUSED, tmp) & ~COND_MASK) | cc);
+ return push_inst(compiler, ((MVN | RD(dst_reg) | tmp) & ~COND_MASK) | cc);
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
tmp = (sljit_uw) srcw;
@@ -2285,7 +2561,7 @@
#endif
}
- return push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_reg, SLJIT_UNUSED, RM(src)) & ~COND_MASK) | cc);
+ return push_inst(compiler, ((MOV | RD(dst_reg) | RM(src)) & ~COND_MASK) | cc);
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
Modified: code/trunk/src/sljit/sljitNativeARM_64.c
===================================================================
--- code/trunk/src/sljit/sljitNativeARM_64.c 2017-11-23 07:54:39 UTC (rev 884)
+++ code/trunk/src/sljit/sljitNativeARM_64.c 2017-11-29 13:30:31 UTC (rev 885)
@@ -40,13 +40,17 @@
#define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 5)
#define TMP_SP (SLJIT_NUMBER_OF_REGISTERS + 6)
-#define TMP_FREG1 (0)
-#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
+#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
+#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = {
31, 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 11, 30, 31
};
+static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
+ 0, 0, 1, 2, 3, 4, 5, 6, 7
+};
+
#define W_OP (1 << 31)
#define RD(rd) (reg_map[rd])
#define RT(rt) (reg_map[rt])
@@ -53,10 +57,10 @@
#define RN(rn) (reg_map[rn] << 5)
#define RT2(rt2) (reg_map[rt2] << 10)
#define RM(rm) (reg_map[rm] << 16)
-#define VD(vd) (vd)
-#define VT(vt) (vt)
-#define VN(vn) ((vn) << 5)
-#define VM(vm) ((vm) << 16)
+#define VD(vd) (freg_map[vd])
+#define VT(vt) (freg_map[vt])
+#define VN(vn) (freg_map[vn] << 5)
+#define VM(vm) (freg_map[vm] << 16)
/* --------------------------------------------------------------------- */
/* Instrucion forms */
@@ -1088,14 +1092,14 @@
/* --------------------------------------------------------------------- */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
+ sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
- sljit_s32 i, tmp, offs, prev, saved_regs_size;
+ sljit_s32 args, i, tmp, offs, prev, saved_regs_size;
CHECK_ERROR();
- CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
- set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+ CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
+ set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0);
local_size += saved_regs_size + SLJIT_LOCALS_OFFSET;
@@ -1165,6 +1169,8 @@
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
}
+ args = get_arg_count(arg_types);
+
if (args >= 1)
FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0) | RN(TMP_ZERO) | RM(SLJIT_R0)));
if (args >= 2)
@@ -1176,12 +1182,12 @@
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
+ sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
CHECK_ERROR();
- CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
- set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+ CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
+ set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0) + SLJIT_LOCALS_OFFSET;
local_size = (local_size + 15) & ~0xf;
@@ -1568,7 +1574,7 @@
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
{
CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
- return reg;
+ return freg_map[reg];
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
@@ -1936,6 +1942,20 @@
return jump;
}
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 arg_types)
+{
+ CHECK_ERROR_PTR();
+ CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->skip_checks = 1;
+#endif
+
+ return sljit_emit_jump(compiler, type);
+}
+
static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 src, sljit_sw srcw)
{
@@ -1978,7 +1998,6 @@
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
ADJUST_LOCAL_OFFSET(src, srcw);
- /* In ARM, we don't need to touch the arguments. */
if (!(src & SLJIT_IMM)) {
if (src & SLJIT_MEM) {
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw));
@@ -1987,6 +2006,7 @@
return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(src));
}
+ /* These jumps are converted to jump/call instructions when possible. */
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
FAIL_IF(!jump);
set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
@@ -1997,6 +2017,21 @@
return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1));
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 arg_types,
+ sljit_s32 src, sljit_sw srcw)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->skip_checks = 1;
+#endif
+
+ return sljit_emit_ijump(compiler, type, src, srcw);
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 type)
Modified: code/trunk/src/sljit/sljitNativeARM_T2_32.c
===================================================================
--- code/trunk/src/sljit/sljitNativeARM_T2_32.c 2017-11-23 07:54:39 UTC (rev 884)
+++ code/trunk/src/sljit/sljitNativeARM_T2_32.c 2017-11-29 13:30:31 UTC (rev 885)
@@ -26,7 +26,11 @@
SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
{
- return "ARM-Thumb2" SLJIT_CPUINFO;
+#ifdef __SOFTFP__
+ return "ARM-Thumb2" SLJIT_CPUINFO " ABI:softfp";
+#else
+ return "ARM-Thumb2" SLJIT_CPUINFO " ABI:hardfp";
+#endif
}
/* Length of an instruction word. */
@@ -37,14 +41,18 @@
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4)
-#define TMP_FREG1 (0)
-#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
+#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
+#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
- 0, 0, 1, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 13, 3, 14, 15
+ 0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15
};
+static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
+ 0, 0, 1, 2, 3, 4, 5, 6, 7
+};
+
#define COPY_BITS(src, from, to, bits) \
((from >= to ? (src >> (from - to)) : (src << (to - from))) & (((1 << bits) - 1) << to))
@@ -69,9 +77,9 @@
#define RN4(rn) (reg_map[rn] << 16)
#define RM4(rm) (reg_map[rm])
#define RT4(rt) (reg_map[rt] << 12)
-#define DD4(dd) ((dd) << 12)
-#define DN4(dn) ((dn) << 16)
-#define DM4(dm) (dm)
+#define DD4(dd) (freg_map[dd] << 12)
+#define DN4(dn) (freg_map[dn] << 16)
+#define DM4(dm) (freg_map[dm])
#define IMM5(imm) \
(COPY_BITS(imm, 2, 12, 3) | ((imm & 0x3) << 6))
#define IMM12(imm) \
@@ -178,6 +186,7 @@
#define VDIV_F32 0xee800a00
#define VMOV_F32 0xeeb00a40
#define VMOV 0xee000a10
+#define VMOV2 0xec400a10
#define VMRS 0xeef1fa10
#define VMUL_F32 0xee200a00
#define VNEG_F32 0xeeb10a40
@@ -208,10 +217,10 @@
static SLJIT_INLINE sljit_s32 emit_imm32_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm)
{
- FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) |
- COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
- return push_inst32(compiler, MOVT | RD4(dst) |
- COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
+ FAIL_IF(push_inst32(compiler, MOVW | RD4(dst)
+ | COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
+ return push_inst32(compiler, MOVT | RD4(dst)
+ | COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
}
static SLJIT_INLINE void modify_imm32_const(sljit_u16 *inst, sljit_uw new_imm)
@@ -522,13 +531,13 @@
}
/* set low 16 bits, set hi 16 bits to 0. */
- FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) |
- COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
+ FAIL_IF(push_inst32(compiler, MOVW | RD4(dst)
+ | COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
/* set hi 16 bit if needed. */
if (imm >= 0x10000)
- return push_inst32(compiler, MOVT | RD4(dst) |
- COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
+ return push_inst32(compiler, MOVT | RD4(dst)
+ | COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
return SLJIT_SUCCESS;
}
@@ -1088,15 +1097,15 @@
/* --------------------------------------------------------------------- */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
+ sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
- sljit_s32 size, i, tmp;
+ sljit_s32 args, size, i, tmp;
sljit_ins push = 0;
CHECK_ERROR();
- CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
- set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+ CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
+ set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
for (i = SLJIT_S0; i >= tmp; i--)
@@ -1120,6 +1129,8 @@
FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, local_size));
}
+ args = get_arg_count(arg_types);
+
if (args >= 1)
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0, SLJIT_R0)));
if (args >= 2)
@@ -1131,14 +1142,14 @@
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
+ sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
sljit_s32 size;
CHECK_ERROR();
- CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
- set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+ CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
+ set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
compiler->local_size = ((size + local_size + 7) & ~7) - size;
@@ -1219,11 +1230,11 @@
case SLJIT_DIV_UW:
case SLJIT_DIV_SW:
SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
- SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 12);
+ SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3);
saved_reg_count = 0;
if (compiler->scratches >= 4)
- saved_reg_list[saved_reg_count++] = 12;
+ saved_reg_list[saved_reg_count++] = 3;
if (compiler->scratches >= 3)
saved_reg_list[saved_reg_count++] = 2;
if (op >= SLJIT_DIV_UW)
@@ -1448,7 +1459,7 @@
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
{
CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
- return reg << 1;
+ return (freg_map[reg] << 1);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
@@ -1798,7 +1809,6 @@
set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
type &= 0xff;
- /* In ARM, we don't need to touch the arguments. */
PTR_FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0));
if (type < SLJIT_JUMP) {
jump->flags |= IS_COND;
@@ -1818,6 +1828,241 @@
return jump;
}
+#ifdef __SOFTFP__
+
+static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src)
+{
+ sljit_s32 stack_offset = 0;
+ sljit_s32 arg_count = 0;
+ sljit_s32 word_arg_offset = 0;
+ sljit_s32 float_arg_count = 0;
+ sljit_s32 types = 0;
+ sljit_s32 src_offset = 4 * sizeof(sljit_sw);
+ sljit_u8 offsets[4];
+
+ if (src && FAST_IS_REG(*src))
+ src_offset = reg_map[*src] * sizeof(sljit_sw);
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+
+ while (arg_types) {
+ types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK);
+
+ switch (arg_types & SLJIT_DEF_MASK) {
+ case SLJIT_ARG_TYPE_F32:
+ offsets[arg_count] = (sljit_u8)stack_offset;
+ stack_offset += sizeof(sljit_f32);
+ arg_count++;
+ float_arg_count++;
+ break;
+ case SLJIT_ARG_TYPE_F64:
+ if (stack_offset & 0x7)
+ stack_offset += sizeof(sljit_sw);
+ offsets[arg_count] = (sljit_u8)stack_offset;
+ stack_offset += sizeof(sljit_f64);
+ arg_count++;
+ float_arg_count++;
+ break;
+ default:
+ offsets[arg_count] = (sljit_u8)stack_offset;
+ stack_offset += sizeof(sljit_sw);
+ arg_count++;
+ word_arg_offset += sizeof(sljit_sw);
+ break;
+ }
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+ }
+
+ if (stack_offset > 16)
+ FAIL_IF(push_inst16(compiler, SUB_SP | (((stack_offset - 16) + 0x7) & ~0x7) >> 2));
+
+ SLJIT_ASSERT(reg_map[TMP_REG1] == 12);
+
+ /* Process arguments in reversed direction. */
+ while (types) {
+ switch (types & SLJIT_DEF_MASK) {
+ case SLJIT_ARG_TYPE_F32:
+ arg_count--;
+ float_arg_count--;
+ stack_offset = offsets[arg_count];
+
+ if (stack_offset < 16) {
+ if (src_offset == stack_offset) {
+ FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7)));
+ *src = TMP_REG1;
+ }
+ FAIL_IF(push_inst32(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (stack_offset << 10)));
+ } else
+ FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800000 | RN4(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2)));
+ break;
+ case SLJIT_ARG_TYPE_F64:
+ arg_count--;
+ float_arg_count--;
+ stack_offset = offsets[arg_count];
+
+ SLJIT_ASSERT((stack_offset & 0x7) == 0);
+
+ if (stack_offset < 16) {
+ if (src_offset == stack_offset || src_offset == stack_offset + sizeof(sljit_sw)) {
+ FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7)));
+ *src = TMP_REG1;
+ }
+ FAIL_IF(push_inst32(compiler, VMOV2 | 0x100000 | (stack_offset << 10) | ((stack_offset + sizeof(sljit_sw)) << 14) | float_arg_count));
+ } else
+ FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800100 | RN4(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2)));
+ break;
+ default:
+ arg_count--;
+ word_arg_offset -= sizeof(sljit_sw);
+ stack_offset = offsets[arg_count];
+
+ SLJIT_ASSERT(stack_offset >= word_arg_offset);
+
+ if (stack_offset != word_arg_offset) {
+ if (stack_offset < 16) {
+ if (src_offset == stack_offset) {
+ FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7)));
+ *src = TMP_REG1;
+ }
+ else if (src_offset == word_arg_offset) {
+ *src = 1 + (stack_offset >> 2);
+ src_offset = stack_offset;
+ }
+ FAIL_IF(push_inst16(compiler, MOV | (stack_offset >> 2) | (word_arg_offset << 1)));
+ } else
+ FAIL_IF(push_inst16(compiler, STR_SP | (word_arg_offset << 6) | ((stack_offset - 16) >> 2)));
+ }
+ break;
+ }
+
+ types >>= SLJIT_DEF_SHIFT;
+ }
+
+ return SLJIT_SUCCESS;
+}
+
+static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
+{
+ sljit_s32 stack_size = 0;
+
+ if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32)
+ FAIL_IF(push_inst32(compiler, VMOV | (0 << 16) | (0 << 12)));
+ if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64)
+ FAIL_IF(push_inst32(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0));
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+
+ while (arg_types) {
+ switch (arg_types & SLJIT_DEF_MASK) {
+ case SLJIT_ARG_TYPE_F32:
+ stack_size += sizeof(sljit_f32);
+ break;
+ case SLJIT_ARG_TYPE_F64:
+ if (stack_size & 0x7)
+ stack_size += sizeof(sljit_sw);
+ stack_size += sizeof(sljit_f64);
+ break;
+ default:
+ stack_size += sizeof(sljit_sw);
+ break;
+ }
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+ }
+
+ if (stack_size <= 16)
+ return SLJIT_SUCCESS;
+
+ return push_inst16(compiler, ADD_SP | ((((stack_size - 16) + 0x7) & ~0x7) >> 2));
+}
+
+#else
+
+static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
+{
+ sljit_u32 remap = 0;
+ sljit_u32 offset = 0;
+ sljit_u32 new_offset, mask;
+
+ /* Remove return value. */
+ arg_types >>= SLJIT_DEF_SHIFT;
+
+ while (arg_types) {
+ if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32) {
+ new_offset = 0;
+ mask = 1;
+
+ while (remap & mask) {
+ new_offset++;
+ mask <<= 1;
+ }
+ remap |= mask;
+
+ if (offset != new_offset)
+ FAIL_IF(push_inst32(compiler, VMOV_F32 | DD4((new_offset >> 1) + 1)
+ | ((new_offset & 0x1) ? 0x400000 : 0) | DM4((offset >> 1) + 1)));
+
+ offset += 2;
+ }
+ else if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64) {
+ new_offset = 0;
+ mask = 3;
+
+ while (remap & mask) {
+ new_offset += 2;
+ mask <<= 2;
+ }
+ remap |= mask;
+
+ if (offset != new_offset)
+ FAIL_IF(push_inst32(compiler, VMOV_F32 | SLJIT_F32_OP | DD4((new_offset >> 1) + 1) | DM4((offset >> 1) + 1)));
+
+ offset += 2;
+ }
+ arg_types >>= SLJIT_DEF_SHIFT;
+ }
+
+ return SLJIT_SUCCESS;
+}
+
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 arg_types)
+{
+#ifdef __SOFTFP__
+ struct sljit_jump *jump;
+#endif
+
+ CHECK_ERROR_PTR();
+ CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
+
+#ifdef __SOFTFP__
+ PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL));
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->skip_checks = 1;
+#endif
+
+ jump = sljit_emit_jump(compiler, type);
+ PTR_FAIL_IF(jump == NULL);
+
+ PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types));
+ return jump;
+#else
+ PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->skip_checks = 1;
+#endif
+
+ return sljit_emit_jump(compiler, type);
+#endif
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
{
struct sljit_jump *jump;
@@ -1826,10 +2071,13 @@
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
ADJUST_LOCAL_OFFSET(src, srcw);
- /* In ARM, we don't need to touch the arguments. */
+ SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
+
if (!(src & SLJIT_IMM)) {
- if (FAST_IS_REG(src))
+ if (FAST_IS_REG(src)) {
+ SLJIT_ASSERT(reg_map[src] != 14);
return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src));
+ }
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw, TMP_REG1));
if (type >= SLJIT_FAST_CALL)
@@ -1836,6 +2084,7 @@
return push_inst16(compiler, BLX | RN3(TMP_REG1));
}
+ /* These jumps are converted to jump/call instructions when possible. */
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
FAIL_IF(!jump);
set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
@@ -1846,6 +2095,41 @@
return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(TMP_REG1));
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 arg_types,
+ sljit_s32 src, sljit_sw srcw)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
+
+#ifdef __SOFTFP__
+ if (src & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
+ src = TMP_REG1;
+ }
+
+ FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src));
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->skip_checks = 1;
+#endif
+
+ FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
+
+ return softfloat_post_call_with_args(compiler, arg_types);
+#else /* !__SOFTFP__ */
+ FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->skip_checks = 1;
+#endif
+
+ return sljit_emit_ijump(compiler, type, src, srcw);
+#endif /* __SOFTFP__ */
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 type)
@@ -1896,8 +2180,6 @@
return SLJIT_SUCCESS;
/* The condition must always be set, even if the ORR/EORI is not executed above. */
- if (reg_map[dst_r] <= 7)
- return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst_r));
return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r));
}
@@ -1924,8 +2206,8 @@
if (tmp < 0x10000) {
/* set low 16 bits, set hi 16 bits to 0. */
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
- return push_inst32(compiler, MOVW | RD4(dst_reg) |
- COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff));
+ return push_inst32(compiler, MOVW | RD4(dst_reg)
+ | COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff));
}
tmp = get_imm(srcw);
@@ -1943,10 +2225,10 @@
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | ((cc & 0x1) << 3) | 0x4));
tmp = (sljit_uw) srcw;
- FAIL_IF(push_inst32(compiler, MOVW | RD4(dst_reg) |
- COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff)));
- return push_inst32(compiler, MOVT | RD4(dst_reg) |
- COPY_BITS(tmp, 12 + 16, 16, 4) | COPY_BITS(tmp, 11 + 16, 26, 1) | COPY_BITS(tmp, 8 + 16, 12, 3) | ((tmp & 0xff0000) >> 16));
+ FAIL_IF(push_inst32(compiler, MOVW | RD4(dst_reg)
+ | COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff)));
+ return push_inst32(compiler, MOVT | RD4(dst_reg)
+ | COPY_BITS(tmp, 12 + 16, 16, 4) | COPY_BITS(tmp, 11 + 16, 26, 1) | COPY_BITS(tmp, 8 + 16, 12, 3) | ((tmp & 0xff0000) >> 16));
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
Modified: code/trunk/src/sljit/sljitNativeMIPS_32.c
===================================================================
--- code/trunk/src/sljit/sljitNativeMIPS_32.c 2017-11-23 07:54:39 UTC (rev 884)
+++ code/trunk/src/sljit/sljitNativeMIPS_32.c 2017-11-29 13:30:31 UTC (rev 885)
@@ -435,3 +435,232 @@
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 2);
}
+
+static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr)
+{
+ sljit_s32 stack_offset = 0;
+ sljit_s32 arg_count = 0;
+ sljit_s32 float_arg_count = 0;
+ sljit_s32 word_arg_count = 0;
+ sljit_s32 types = 0;
+ sljit_s32 arg_count_save, types_save;
+ sljit_ins prev_ins = NOP;
+ sljit_ins ins = NOP;
+ sljit_u8 offsets[4];
+
+ SLJIT_ASSERT(reg_map[TMP_REG3] == 4 && freg_map[TMP_FREG1] == 12);
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+
+ while (arg_types) {
+ types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK);
+
+ switch (arg_types & SLJIT_DEF_MASK) {
+ case SLJIT_ARG_TYPE_F32:
+ offsets[arg_count] = (sljit_u8)stack_offset;
+
+ if (word_arg_count == 0 && arg_count <= 1)
+ offsets[arg_count] = 254 + arg_count;
+
+ stack_offset += sizeof(sljit_f32);
+ arg_count++;
+ float_arg_count++;
+ break;
+ case SLJIT_ARG_TYPE_F64:
+ if (stack_offset & 0x7)
+ stack_offset += sizeof(sljit_sw);
+ offsets[arg_count] = (sljit_u8)stack_offset;
+
+ if (word_arg_count == 0 && arg_count <= 1)
+ offsets[arg_count] = 254 + arg_count;
+
+ stack_offset += sizeof(sljit_f64);
+ arg_count++;
+ float_arg_count++;
+ break;
+ default:
+ offsets[arg_count] = (sljit_u8)stack_offset;
+ stack_offset += sizeof(sljit_sw);
+ arg_count++;
+ word_arg_count++;
+ break;
+ }
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+ }
+
+ /* Stack is aligned to 16 bytes, max two doubles can be placed on the stack. */
+ if (stack_offset > 16)
+ FAIL_IF(push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-16), DR(SLJIT_SP)));
+
+ types_save = types;
+ arg_count_save = arg_count;
+
+ while (types) {
+ switch (types & SLJIT_DEF_MASK) {
+ case SLJIT_ARG_TYPE_F32:
+ arg_count--;
+ if (offsets[arg_count] < 254)
+ ins = SWC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(offsets[arg_count]);
+ float_arg_count--;
+ break;
+ case SLJIT_ARG_TYPE_F64:
+ arg_count--;
+ if (offsets[arg_count] < 254)
+ ins = SDC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(offsets[arg_count]);
+ float_arg_count--;
+ break;
+ default:
+ if (offsets[arg_count - 1] >= 16)
+ ins = SW | S(SLJIT_SP) | T(word_arg_count) | IMM(offsets[arg_count - 1]);
+ else if (arg_count != word_arg_count)
+ ins = ADDU | S(word_arg_count) | TA(0) | DA(4 + (offsets[arg_count - 1] >> 2));
+ else if (arg_count == 1)
+ ins = ADDU | S(SLJIT_R0) | TA(0) | D(TMP_REG3);
+
+ arg_count--;
+ word_arg_count--;
+ break;
+ }
+
+ if (ins != NOP) {
+ if (prev_ins != NOP)
+ FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS));
+ prev_ins = ins;
+ ins = NOP;
+ }
+
+ types >>= SLJIT_DEF_SHIFT;
+ }
+
+ types = types_save;
+ arg_count = arg_count_save;
+
+ while (types) {
+ switch (types & SLJIT_DEF_MASK) {
+ case SLJIT_ARG_TYPE_F32:
+ arg_count--;
+ if (offsets[arg_count] == 254)
+ ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1);
+ else if (offsets[arg_count] < 16)
+ ins = LW | S(SLJIT_SP) | TA(4 + (offsets[arg_count] >> 2)) | IMM(offsets[arg_count]);
+ break;
+ case SLJIT_ARG_TYPE_F64:
+ arg_count--;
+ if (offsets[arg_count] == 254)
+ ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1);
+ else if (offsets[arg_count] < 16) {
+ if (prev_ins != NOP)
+ FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS));
+ prev_ins = LW | S(SLJIT_SP) | TA(4 + (offsets[arg_count] >> 2)) | IMM(offsets[arg_count]);
+ ins = LW | S(SLJIT_SP) | TA(5 + (offsets[arg_count] >> 2)) | IMM(offsets[arg_count] + sizeof(sljit_sw));
+ }
+ break;
+ default:
+ arg_count--;
+ break;
+ }
+
+ if (ins != NOP) {
+ if (prev_ins != NOP)
+ FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS));
+ prev_ins = ins;
+ ins = NOP;
+ }
+
+ types >>= SLJIT_DEF_SHIFT;
+ }
+
+ *ins_ptr = prev_ins;
+
+ return SLJIT_SUCCESS;
+}
+
+static sljit_s32 post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
+{
+ sljit_s32 stack_offset = 0;
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+
+ while (arg_types) {
+ switch (arg_types & SLJIT_DEF_MASK) {
+ case SLJIT_ARG_TYPE_F32:
+ stack_offset += sizeof(sljit_f32);
+ break;
+ case SLJIT_ARG_TYPE_F64:
+ if (stack_offset & 0x7)
+ stack_offset += sizeof(sljit_sw);
+ stack_offset += sizeof(sljit_f64);
+ break;
+ default:
+ stack_offset += sizeof(sljit_sw);
+ break;
+ }
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+ }
+
+ /* Stack is aligned to 16 bytes, max two doubles can be placed on the stack. */
+ if (stack_offset > 16)
+ return push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(16), DR(SLJIT_SP));
+
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 arg_types)
+{
+ struct sljit_jump *jump;
+ sljit_ins ins;
+
+ CHECK_ERROR_PTR();
+ CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
+
+ jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+ PTR_FAIL_IF(!jump);
+ set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+ type &= 0xff;
+
+ PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins));
+
+ SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
+
+ PTR_FAIL_IF(emit_const(compiler, PIC_ADDR_REG, 0));
+
+ jump->flags |= IS_JAL | IS_CALL;
+ PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
+ jump->addr = compiler->size;
+ PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS));
+
+ PTR_FAIL_IF(post_call_with_args(compiler, arg_types));
+
+ return jump;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 arg_types,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_ins ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
+
+ SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
+
+ if (src & SLJIT_IMM)
+ FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw));
+ else if (FAST_IS_REG(src))
+ FAIL_IF(push_inst(compiler, ADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG)));
+ else if (src & SLJIT_MEM) {
+ ADJUST_LOCAL_OFFSET(src, srcw);
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw));
+ }
+
+ FAIL_IF(call_with_args(compiler, arg_types, &ins));
+
+ /* Register input. */
+ FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
+ FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS));
+ return post_call_with_args(compiler, arg_types);
+}
Modified: code/trunk/src/sljit/sljitNativeMIPS_64.c
===================================================================
--- code/trunk/src/sljit/sljitNativeMIPS_64.c 2017-11-23 07:54:39 UTC (rev 884)
+++ code/trunk/src/sljit/sljitNativeMIPS_64.c 2017-11-29 13:30:31 UTC (rev 885)
@@ -537,3 +537,132 @@
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 6);
}
+
+static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr)
+{
+ sljit_s32 arg_count = 0;
+ sljit_s32 word_arg_count = 0;
+ sljit_s32 float_arg_count = 0;
+ sljit_s32 types = 0;
+ sljit_ins prev_ins = NOP;
+ sljit_ins ins = NOP;
+
+ SLJIT_ASSERT(reg_map[TMP_REG3] == 4 && freg_map[TMP_FREG1] == 12);
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+
+ while (arg_types) {
+ types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK);
+
+ switch (arg_types & SLJIT_DEF_MASK) {
+ case SLJIT_ARG_TYPE_F32:
+ case SLJIT_ARG_TYPE_F64:
+ arg_count++;
+ float_arg_count++;
+ break;
+ default:
+ arg_count++;
+ word_arg_count++;
+ break;
+ }
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+ }
+
+ while (types) {
+ switch (types & SLJIT_DEF_MASK) {
+ case SLJIT_ARG_TYPE_F32:
+ if (arg_count != float_arg_count)
+ ins = MOV_S | FMT_S | FS(float_arg_count) | FD(arg_count);
+ else if (arg_count == 1)
+ ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1);
+ arg_count--;
+ float_arg_count--;
+ break;
+ case SLJIT_ARG_TYPE_F64:
+ if (arg_count != float_arg_count)
+ ins = MOV_S | FMT_D | FS(float_arg_count) | FD(arg_count);
+ else if (arg_count == 1)
+ ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1);
+ arg_count--;
+ float_arg_count--;
+ break;
+ default:
+ if (arg_count != word_arg_count)
+ ins = DADDU | S(word_arg_count) | TA(0) | D(arg_count);
+ else if (arg_count == 1)
+ ins = DADDU | S(SLJIT_R0) | TA(0) | D(TMP_REG3);
+ arg_count--;
+ word_arg_count--;
+ break;
+ }
+
+ if (ins != NOP) {
+ if (prev_ins != NOP)
+ FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS));
+ prev_ins = ins;
+ ins = NOP;
+ }
+
+ types >>= SLJIT_DEF_SHIFT;
+ }
+
+ *ins_ptr = prev_ins;
+
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 arg_types)
+{
+ struct sljit_jump *jump;
+ sljit_ins ins;
+
+ CHECK_ERROR_PTR();
+ CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
+
+ jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+ PTR_FAIL_IF(!jump);
+ set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+ type &= 0xff;
+
+ PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins));
+
+ SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
+
+ PTR_FAIL_IF(emit_const(compiler, PIC_ADDR_REG, 0));
+
+ jump->flags |= IS_JAL | IS_CALL;
+ PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
+ jump->addr = compiler->size;
+ PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS));
+
+ return jump;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 arg_types,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_ins ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
+
+ SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
+
+ if (src & SLJIT_IMM)
+ FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw));
+ else if (FAST_IS_REG(src))
+ FAIL_IF(push_inst(compiler, DADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG)));
+ else if (src & SLJIT_MEM) {
+ ADJUST_LOCAL_OFFSET(src, srcw);
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw));
+ }
+
+ FAIL_IF(call_with_args(compiler, arg_types, &ins));
+
+ /* Register input. */
+ FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
+ return push_inst(compiler, ins, UNMOVABLE_INS);
+}
Modified: code/trunk/src/sljit/sljitNativeMIPS_common.c
===================================================================
--- code/trunk/src/sljit/sljitNativeMIPS_common.c 2017-11-23 07:54:39 UTC (rev 884)
+++ code/trunk/src/sljit/sljitNativeMIPS_common.c 2017-11-29 13:30:31 UTC (rev 885)
@@ -60,13 +60,27 @@
#define EQUAL_FLAG 31
#define OTHER_FLAG 1
-#define TMP_FREG1 (0)
-#define TMP_FREG2 ((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) << 1)
+#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
+#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 3, 25, 4
};
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+
+static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
+ 0, 0, 14, 2, 4, 6, 8, 12, 10
+};
+
+#else
+
+static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
+ 0, 0, 13, 14, 15, 16, 17, 12, 18
+};
+
+#endif
+
/* --------------------------------------------------------------------- */
/* Instrucion forms */
/* --------------------------------------------------------------------- */
@@ -74,21 +88,23 @@
#define S(s) (reg_map[s] << 21)
#define T(t) (reg_map[t] << 16)
#define D(d) (reg_map[d] << 11)
+#define FT(t) (freg_map[t] << 16)
+#define FS(s) (freg_map[s] << 11)
+#define FD(d) (freg_map[d] << 6)
/* Absolute registers. */
#define SA(s) ((s) << 21)
#define TA(t) ((t) << 16)
#define DA(d) ((d) << 11)
-#define FT(t) ((t) << 16)
-#define FS(s) ((s) << 11)
-#define FD(d) ((d) << 6)
#define IMM(imm) ((imm) & 0xffff)
#define SH_IMM(imm) ((imm) << 6)
#define DR(dr) (reg_map[dr])
+#define FR(dr) (freg_map[dr])
#define HI(opcode) ((opcode) << 26)
#define LO(opcode) (opcode)
/* S = (16 << 21) D = (17 << 21) */
#define FMT_S (16 << 21)
+#define FMT_D (17 << 21)
#define ABS_S (HI(17) | FMT_S | LO(5))
#define ADD_S (HI(17) | FMT_S | LO(0))
@@ -153,6 +169,7 @@
#define OR (HI(0) | LO(37))
#define ORI (HI(13))
#define SD (HI(63))
+#define SDC1 (HI(61))
#define SLT (HI(0) | LO(42))
#define SLTI (HI(10))
#define SLTIU (HI(11))
@@ -166,6 +183,7 @@
#define SUB_S (HI(17) | FMT_S | LO(1))
#define SUBU (HI(0) | LO(35))
#define SW (HI(43))
+#define SWC1 (HI(57))
#define TRUNC_W_S (HI(17) | FMT_S | LO(13))
#define XOR (HI(0) | LO(38))
#define XORI (HI(14))
@@ -564,6 +582,8 @@
#define STACK_LOAD LD
#endif
+static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw);
+
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
#include "sljitNativeMIPS_32.c"
#else
@@ -571,15 +591,15 @@
#endif
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
+ sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
sljit_ins base;
- sljit_s32 i, tmp, offs;
+ sljit_s32 args, i, tmp, offs;
CHECK_ERROR();
- CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
- set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+ CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
+ set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
@@ -616,6 +636,8 @@
FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offs), MOVABLE_INS));
}
+ args = get_arg_count(arg_types);
+
if (args >= 1)
FAIL_IF(push_inst(compiler, ADDU_W | SA(4) | TA(0) | D(SLJIT_S0), DR(SLJIT_S0)));
if (args >= 2)
@@ -627,12 +649,12 @@
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
+ sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
CHECK_ERROR();
- CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
- set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+ CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
+ set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
@@ -1298,7 +1320,7 @@
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
{
CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
- return reg << 1;
+ return FR(reg);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
@@ -1328,11 +1350,9 @@
#endif
if (src & SLJIT_MEM) {
- FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
+ FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src, srcw, dst, dstw));
src = TMP_FREG1;
}
- else
- src <<= 1;
FAIL_IF(push_inst(compiler, (TRUNC_W_S ^ (flags >> 19)) | FMT(op) | FS(src) | FD(TMP_FREG1), MOVABLE_INS));
@@ -1340,7 +1360,7 @@
return push_inst(compiler, MFC1 | flags | T(dst) | FS(TMP_FREG1), MOVABLE_INS);
/* Store the integer value from a VFP register. */
- return emit_op_mem2(compiler, flags ? DOUBLE_DATA : SINGLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
+ return emit_op_mem2(compiler, flags ? DOUBLE_DATA : SINGLE_DATA, FR(TMP_FREG1), dst, dstw, 0, 0);
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
# undef is_long
@@ -1357,13 +1377,13 @@
sljit_s32 flags = (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) << 21;
#endif
- sljit_s32 dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
+ sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
if (FAST_IS_REG(src))
FAIL_IF(push_inst(compiler, MTC1 | flags | T(src) | FS(TMP_FREG1), MOVABLE_INS));
else if (src & SLJIT_MEM) {
/* Load the integer value into a VFP register. */
- FAIL_IF(emit_op_mem2(compiler, ((flags) ? DOUBLE_DATA : SINGLE_DATA) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
+ FAIL_IF(emit_op_mem2(compiler, ((flags) ? DOUBLE_DATA : SINGLE_DATA) | LOAD_DATA, FR(TMP_FREG1), src, srcw, dst, dstw));
}
else {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@@ -1377,7 +1397,7 @@
FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | (((op & SLJIT_F32_OP) ^ SLJIT_F32_OP) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS));
if (dst & SLJIT_MEM)
- return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
+ return emit_op_mem2(compiler, FLOAT_DATA(op), FR(TMP_FREG1), dst, dstw, 0, 0);
return SLJIT_SUCCESS;
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
@@ -1392,18 +1412,14 @@
sljit_ins inst;
if (src1 & SLJIT_MEM) {
- FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+ FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, src2, src2w));
src1 = TMP_FREG1;
}
- else
- src1 <<= 1;
if (src2 & SLJIT_MEM) {
- FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
+ FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, 0, 0));
src2 = TMP_FREG2;
}
- else
- src2 <<= 1;
switch (GET_FLAG_TYPE(op)) {
case SLJIT_EQUAL_F64:
@@ -1443,14 +1459,12 @@
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
op ^= SLJIT_F32_OP;
- dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
if (src & SLJIT_MEM) {
- FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
+ FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(dst_r), src, srcw, dst, dstw));
src = dst_r;
}
- else
- src <<= 1;
switch (GET_OPCODE(op)) {
case SLJIT_MOV_F64:
@@ -1474,7 +1488,7 @@
}
if (dst & SLJIT_MEM)
- return emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0);
+ return emit_op_mem2(compiler, FLOAT_DATA(op), FR(dst_r), dst, dstw, 0, 0);
return SLJIT_SUCCESS;
}
@@ -1494,42 +1508,38 @@
compiler->cache_arg = 0;
compiler->cache_argw = 0;
- dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2;
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
if (src1 & SLJIT_MEM) {
- if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
+ if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w)) {
FAIL_IF(compiler->error);
src1 = TMP_FREG1;
} else
flags |= SLOW_SRC1;
}
- else
- src1 <<= 1;
if (src2 & SLJIT_MEM) {
- if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
+ if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w)) {
FAIL_IF(compiler->error);
src2 = TMP_FREG2;
} else
flags |= SLOW_SRC2;
}
- else
- src2 <<= 1;
if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
- FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
- FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
+ FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, src1, src1w));
+ FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, dst, dstw));
}
else {
- FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
- FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
+ FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, src2, src2w));
+ FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, dst, dstw));
}
}
else if (flags & SLOW_SRC1)
- FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
+ FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, dst, dstw));
else if (flags & SLOW_SRC2)
- FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
+ FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, dst, dstw));
if (flags & SLOW_SRC1)
src1 = TMP_FREG1;
@@ -1555,7 +1565,7 @@
}
if (dst_r == TMP_FREG2)
- FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
+ FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), FR(TMP_FREG2), dst, dstw, 0, 0));
return SLJIT_SUCCESS;
}
@@ -1705,19 +1715,16 @@
PTR_FAIL_IF(push_inst(compiler, inst, UNMOVABLE_INS));
PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0));
- if (type <= SLJIT_JUMP) {
+
+ if (type <= SLJIT_JUMP)
PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS));
- jump->addr = compiler->size;
- PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
- } else {
- SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
- /* Cannot be optimized out if type is >= CALL0. */
- jump->flags |= IS_JAL | (type >= SLJIT_CALL0 ? IS_CALL : 0);
+ else {
+ jump->flags |= IS_JAL;
PTR_FAIL_IF(push_inst(compiler, JALR | S(TMP_REG2) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
- jump->addr = compiler->size;
- /* A NOP if type < CALL1. */
- PTR_FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), UNMOVABLE_INS));
}
+
+ jump->addr = compiler->size;
+ PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
return jump;
}
@@ -1873,7 +1880,6 @@
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
{
- sljit_s32 src_r = TMP_REG2;
struct sljit_jump *jump = NULL;
CHECK_ERROR();
@@ -1880,34 +1886,6 @@
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
ADJUST_LOCAL_OFFSET(src, srcw);
- if (FAST_IS_REG(src)) {
- if (DR(src) != 4)
- src_r = src;
- else
- FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
- }
-
- if (type >= SLJIT_CALL0) {
- SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
- if (src & (SLJIT_IMM | SLJIT_MEM)) {
- if (src & SLJIT_IMM)
- FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw));
- else {
- SLJIT_ASSERT(src_r == TMP_REG2 && (src & SLJIT_MEM));
- FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
- }
- FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
- /* We need an extra instruction in any case. */
- return push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), UNMOVABLE_INS);
- }
-
- /* Register input. */
- if (type >= SLJIT_CALL1)
- FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), 4));
- FAIL_IF(push_inst(compiler, JALR | S(src_r) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
- return push_inst(compiler, ADDU_W | S(src_r) | TA(0) | D(PIC_ADDR_REG), UNMOVABLE_INS);
- }
-
if (src & SLJIT_IMM) {
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
FAIL_IF(!jump);
@@ -1918,11 +1896,14 @@
jump->flags |= IS_MOVABLE;
FAIL_IF(emit_const(compiler, TMP_REG2, 0));
+ src = TMP_REG2;
}
- else if (src & SLJIT_MEM)
- FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
+ else if (src & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(TMP_REG2), src, srcw));
+ src = TMP_REG2;
+ }
- FAIL_IF(push_inst(compiler, JR | S(src_r), UNMOVABLE_INS));
+ FAIL_IF(push_inst(compiler, JR | S(src), UNMOVABLE_INS));
if (jump)
jump->addr = compiler->size;
FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
Modified: code/trunk/src/sljit/sljitNativePPC_64.c
===================================================================
--- code/trunk/src/sljit/sljitNativePPC_64.c 2017-11-23 07:54:39 UTC (rev 884)
+++ code/trunk/src/sljit/sljitNativePPC_64.c 2017-11-29 13:30:31 UTC (rev 885)
@@ -413,6 +413,61 @@
return SLJIT_SUCCESS;
}
+static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src)
+{
+ sljit_s32 arg_count = 0;
+ sljit_s32 word_arg_count = 0;
+ sljit_s32 types = 0;
+ sljit_s32 reg = 0;
+
+ if (src)
+ reg = *src & REG_MASK;
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+
+ while (arg_types) {
+ types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK);
+
+ switch (arg_types & SLJIT_DEF_MASK) {
+ case SLJIT_ARG_TYPE_F32:
+ case SLJIT_ARG_TYPE_F64:
+ arg_count++;
+ break;
+ default:
+ arg_count++;
+ word_arg_count++;
+
+ if (arg_count != word_arg_count && arg_count == reg) {
+ FAIL_IF(push_inst(compiler, OR | S(reg) | A(TMP_CALL_REG) | B(reg)));
+ *src = TMP_CALL_REG;
+ }
+ break;
+ }
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+ }
+
+ while (types) {
+ switch (types & SLJIT_DEF_MASK) {
+ case SLJIT_ARG_TYPE_F32:
+ case SLJIT_ARG_TYPE_F64:
+ arg_count--;
+ break;
+ default:
+ if (arg_count != word_arg_count)
+ FAIL_IF(push_inst(compiler, OR | S(word_arg_count) | A(arg_count) | B(word_arg_count)));
+
+ arg_count--;
+ word_arg_count--;
+ break;
+ }
+
+ types >>= SLJIT_DEF_SHIFT;
+ }
+
+ return SLJIT_SUCCESS;
+}
+
static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw init_value)
{
FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 48)));
Modified: code/trunk/src/sljit/sljitNativePPC_common.c
===================================================================
--- code/trunk/src/sljit/sljitNativePPC_common.c 2017-11-23 07:54:39 UTC (rev 884)
+++ code/trunk/src/sljit/sljitNativePPC_common.c 2017-11-29 13:30:31 UTC (rev 885)
@@ -102,13 +102,17 @@
#define TMP_CALL_REG TMP_REG2
#endif
-#define TMP_FREG1 (0)
-#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
+#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
+#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
0, 3, 4, 5, 6, 7, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 8, 9, 10, 31, 12
};
+static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
+ 0, 1, 2, 3, 4, 5, 6, 0, 7
+};
+
/* --------------------------------------------------------------------- */
/* Instrucion forms */
/* --------------------------------------------------------------------- */
@@ -117,11 +121,11 @@
#define A(a) (reg_map[a] << 16)
#define B(b) (reg_map[b] << 11)
#define C(c) (reg_map[c] << 6)
-#define FD(fd) ((fd) << 21)
-#define FS(fs) ((fs) << 21)
-#define FA(fa) ((fa) << 16)
-#define FB(fb) ((fb) << 11)
-#define FC(fc) ((fc) << 6)
+#define FD(fd) (freg_map[fd] << 21)
+#define FS(fs) (freg_map[fs] << 21)
+#define FA(fa) (freg_map[fa] << 16)
+#define FB(fb) (freg_map[fb] << 11)
+#define FC(fc) (freg_map[fc] << 6)
#define IMM(imm) ((imm) & 0xffff)
#define CRD(d) ((d) << 21)
@@ -610,14 +614,14 @@
#endif
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
+ sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
- sljit_s32 i, tmp, offs;
+ sljit_s32 args, i, tmp, offs;
CHECK_ERROR();
- CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
- set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+ CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
+ set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
FAIL_IF(push_inst(compiler, MFLR | D(0)));
offs = -(sljit_s32)(sizeof(sljit_sw));
@@ -643,6 +647,9 @@
#endif
FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0));
+
+ args = get_arg_count(arg_types);
+
if (args >= 1)
FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(SLJIT_S0) | B(SLJIT_R0)));
if (args >= 2)
@@ -674,12 +681,12 @@
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
+ sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
CHECK_ERROR();
- CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
- set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+ CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
+ set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
compiler->local_size = (local_size + 15) & ~0xf;
@@ -1385,21 +1392,26 @@
if (GET_FLAG_TYPE(op_flags) == SLJIT_OVERFLOW)
FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
+ if (op < SLJIT_NOT && FAST_IS_REG(src) && src == dst) {
+ if (!TYPE_CAST_NEEDED(op))
+ return SLJIT_SUCCESS;
+ }
+
if (op_flags & SLJIT_I32_OP) {
if (op < SLJIT_NOT) {
- if (FAST_IS_REG(src) && src == dst) {
- if (!TYPE_CAST_NEEDED(op))
- return SLJIT_SUCCESS;
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+ if (src & SLJIT_MEM) {
+ if (op == SLJIT_MOV_S32)
+ op = SLJIT_MOV_U32;
+ if (op == SLJIT_MOVU_S32)
+ op = SLJIT_MOVU_U32;
}
-#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
- if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM))
- op = SLJIT_MOV_U32;
- if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM))
- op = SLJIT_MOVU_U32;
- if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM))
- op = SLJIT_MOV_S32;
- if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM))
- op = SLJIT_MOVU_S32;
+ else if (src & SLJIT_IMM) {
+ if (op == SLJIT_MOV_U32)
+ op = SLJIT_MOV_S32;
+ if (op == SLJIT_MOVU_U32)
+ op = SLJIT_MOVU_S32;
+ }
#endif
}
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
@@ -1746,7 +1758,7 @@
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
{
CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
- return reg;
+ return freg_map[reg];
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
@@ -2183,7 +2195,7 @@
return (4 << 21) | ((4 + 3) << 16);
default:
- SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3);
+ SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_CDECL);
return (20 << 21);
}
}
@@ -2209,7 +2221,7 @@
if (type < SLJIT_JUMP)
jump->flags |= IS_COND;
#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
- if (type >= SLJIT_CALL0)
+ if (type >= SLJIT_CALL)
jump->flags |= IS_CALL;
#endif
@@ -2220,6 +2232,24 @@
return jump;
}
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 arg_types)
+{
+ CHECK_ERROR_PTR();
+ CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+ PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL));
+#endif
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->skip_checks = 1;
+#endif
+
+ return sljit_emit_jump(compiler, type);
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
{
struct sljit_jump *jump = NULL;
@@ -2231,7 +2261,7 @@
if (FAST_IS_REG(src)) {
#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
- if (type >= SLJIT_CALL0) {
+ if (type >= SLJIT_CALL) {
FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src)));
src_r = TMP_CALL_REG;
}
@@ -2241,12 +2271,13 @@
src_r = src;
#endif
} else if (src & SLJIT_IMM) {
+ /* These jumps are converted to jump/call instructions when possible. */
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
FAIL_IF(!jump);
set_jump(jump, compiler, JUMP_ADDR);
jump->u.target = srcw;
#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
- if (type >= SLJIT_CALL0)
+ if (type >= SLJIT_CALL)
jump->flags |= IS_CALL;
#endif
FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
@@ -2263,6 +2294,31 @@
return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0));
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 arg_types,
+ sljit_s32 src, sljit_sw srcw)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+ if (src & SLJIT_MEM) {
+ ADJUST_LOCAL_OFFSET(src, srcw);
+ FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw));
+ src = TMP_CALL_REG;
+ }
+
+ FAIL_IF(call_with_args(compiler, arg_types, &src));
+#endif
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->skip_checks = 1;
+#endif
+
+ return sljit_emit_ijump(compiler, type, src, srcw);
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 type)
Modified: code/trunk/src/sljit/sljitNativeSPARC_32.c
===================================================================
--- code/trunk/src/sljit/sljitNativeSPARC_32.c 2017-11-23 07:54:39 UTC (rev 884)
+++ code/trunk/src/sljit/sljitNativeSPARC_32.c 2017-11-29 13:30:31 UTC (rev 885)
@@ -138,6 +138,125 @@
return SLJIT_SUCCESS;
}
+static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src)
+{
+ sljit_s32 reg_index = 8;
+ sljit_s32 word_reg_index = 8;
+ sljit_s32 float_arg_index = 1;
+ sljit_s32 double_arg_count = 0;
+ sljit_s32 float_offset = (16 + 6) * sizeof(sljit_sw);
+ sljit_s32 types = 0;
+ sljit_s32 reg = 0;
+ sljit_s32 move_to_tmp2 = 0;
+
+ if (src)
+ reg = reg_map[*src & REG_MASK];
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+
+ while (arg_types) {
+ types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK);
+
+ switch (arg_types & SLJIT_DEF_MASK) {
+ case SLJIT_ARG_TYPE_F32:
+ float_arg_index++;
+ if (reg_index == reg)
+ move_to_tmp2 = 1;
+ reg_index++;
+ break;
+ case SLJIT_ARG_TYPE_F64:
+ float_arg_index++;
+ double_arg_count++;
+ if (reg_index == reg || reg_index + 1 == reg)
+ move_to_tmp2 = 1;
+ reg_index += 2;
+ break;
+ default:
+ if (reg_index != word_reg_index && reg_index < 14 && reg_index == reg)
+ move_to_tmp2 = 1;
+ reg_index++;
+ word_reg_index++;
+ break;
+ }
+
+ if (move_to_tmp2) {
+ move_to_tmp2 = 0;
+ if (reg < 14)
+ FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2A(reg), DR(TMP_REG1)));
+ *src = TMP_REG1;
+ }
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+ }
+
+ arg_types = types;
+
+ while (arg_types) {
+ switch (arg_types & SLJIT_DEF_MASK) {
+ case SLJIT_ARG_TYPE_F32:
+ float_arg_index--;
+ FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS));
+ float_offset -= sizeof(sljit_f64);
+ break;
+ case SLJIT_ARG_TYPE_F64:
+ float_arg_index--;
+ if (float_arg_index == 4 && double_arg_count == 4) {
+ FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | S1(SLJIT_SP) | IMM((16 + 7) * sizeof(sljit_sw)), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | (1 << 25) | S1(SLJIT_SP) | IMM((16 + 8) * sizeof(sljit_sw)), MOVABLE_INS));
+ }
+ else
+ FAIL_IF(push_inst(compiler, STDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS));
+ float_offset -= sizeof(sljit_f64);
+ break;
+ default:
+ break;
+ }
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+ }
+
+ float_offset = (16 + 6) * sizeof(sljit_sw);
+
+ while (types) {
+ switch (types & SLJIT_DEF_MASK) {
+ case SLJIT_ARG_TYPE_F32:
+ reg_index--;
+ if (reg_index < 14)
+ FAIL_IF(push_inst(compiler, LDUW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index));
+ float_offset -= sizeof(sljit_f64);
+ break;
+ case SLJIT_ARG_TYPE_F64:
+ reg_index -= 2;
+ if (reg_index < 14) {
+ if ((reg_index & 0x1) != 0) {
+ FAIL_IF(push_inst(compiler, LDUW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index));
+ if (reg_index < 13)
+ FAIL_IF(push_inst(compiler, LDUW | DA(reg_index + 1) | S1(SLJIT_SP) | IMM(float_offset + sizeof(sljit_sw)), reg_index + 1));
+ }
+ else
+ FAIL_IF(push_inst(compiler, LDD | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index));
+ }
+ float_offset -= sizeof(sljit_f64);
+ break;
+ default:
+ reg_index--;
+ word_reg_index--;
+
+ if (reg_index != word_reg_index) {
+ if (reg_index < 14)
+ FAIL_IF(push_inst(compiler, OR | DA(reg_index) | S1(0) | S2A(word_reg_index), reg_index));
+ else
+ FAIL_IF(push_inst(compiler, STW | DA(word_reg_index) | S1(SLJIT_SP) | IMM(92), word_reg_index));
+ }
+ break;
+ }
+
+ types >>= SLJIT_DEF_SHIFT;
+ }
+
+ return SLJIT_SUCCESS;
+}
+
static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value)
{
FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((init_value >> 10) & 0x3fffff), DR(dst)));
Modified: code/trunk/src/sljit/sljitNativeSPARC_common.c
===================================================================
--- code/trunk/src/sljit/sljitNativeSPARC_common.c 2017-11-23 07:54:39 UTC (rev 884)
+++ code/trunk/src/sljit/sljitNativeSPARC_common.c 2017-11-29 13:30:31 UTC (rev 885)
@@ -90,24 +90,35 @@
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
+/* This register is modified by calls, which affects the instruction
+ in the delay slot if it is used as a source register. */
#define TMP_LINK (SLJIT_NUMBER_OF_REGISTERS + 5)
-#define TMP_FREG1 (0)
-#define TMP_FREG2 ((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) << 1)
+#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
+#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
- 0, 8, 9, 10, 13, 29, 28, 27, 23, 22, 21, 20, 19, 18, 17, 16, 26, 25, 24, 14, 1, 11, 12, 15
+ 0, 8, 9, 10, 11, 29, 28, 27, 23, 22, 21, 20, 19, 18, 17, 16, 26, 25, 24, 14, 1, 12, 13, 15
};
+static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
+ 0, 0, 2, 4, 6, 8, 10, 12, 14
+};
+
/* --------------------------------------------------------------------- */
/* Instrucion forms */
/* --------------------------------------------------------------------- */
#define D(d) (reg_map[d] << 25)
+#define FD(d) (freg_map[d] << 25)
+#define FDN(d) ((freg_map[d] | 0x1) << 25)
#define DA(d) ((d) << 25)
#define S1(s1) (reg_map[s1] << 14)
+#define FS1(s1) (freg_map[s1] << 14)
+#define S1A(s1) ((s1) << 14)
#define S2(s2) (reg_map[s2])
-#define S1A(s1) ((s1) << 14)
+#define FS2(s2) (freg_map[s2])
+#define FS2N(s2) (freg_map[s2] | 0x1)
#define S2A(s2) (s2)
#define IMM_ARG 0x2000
#define DOP(op) ((op) << 5)
@@ -144,6 +155,8 @@
#define FSUBD (OPC1(0x2) | OPC3(0x34) | DOP(0x46))
#define FSUBS (OPC1(0x2) | OPC3(0x34) | DOP(0x45))
#define JMPL (OPC1(0x2) | OPC3(0x38))
+#define LDD (OPC1(0x3) | OPC3(0x03))
+#define LDUW (OPC1(0x3) | OPC3(0x00))
#define NOP (OPC1(0x0) | OPC2(0x04))
#define OR (OPC1(0x2) | OPC3(0x02))
#define ORN (OPC1(0x2) | OPC3(0x06))
@@ -157,6 +170,9 @@
#define SRAX (OPC1(0x2) | OPC3(0x27) | (1 << 12))
#define SRL (OPC1(0x2) | OPC3(0x26))
#define SRLX (OPC1(0x2) | OPC3(0x26) | (1 << 12))
+#define STDF (OPC1(0x3) | OPC3(0x27))
+#define STF (OPC1(0x3) | OPC3(0x24))
+#define STW (OPC1(0x3) | OPC3(0x04))
#define SUB (OPC1(0x2) | OPC3(0x04))
#define SUBC (OPC1(0x2) | OPC3(0x0c))
#define TA (OPC1(0x2) | OPC3(0x3a) | (8 << 25))
@@ -455,12 +471,12 @@
#endif
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
+ sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
CHECK_ERROR();
- CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
- set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+ CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
+ set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
local_size = (local_size + SLJIT_LOCALS_OFFSET + 7) & ~0x7;
compiler->local_size = local_size;
@@ -479,12 +495,12 @@
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
+ sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
CHECK_ERROR();
- CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
- set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+ CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
+ set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 7) & ~0x7;
return SLJIT_SUCCESS;
@@ -553,7 +569,7 @@
if (SLJIT_UNLIKELY(flags & ARG_TEST))
return 1;
FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK]
- | ((flags & MEM_MASK) <= GPR_REG ? D(reg) : DA(reg))
+ | ((flags & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))
| S1(arg & REG_MASK) | ((arg & OFFS_REG_MASK) ? S2(OFFS_REG(arg)) : IMM(argw)),
((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS));
return -1;
@@ -638,7 +654,7 @@
}
}
- dest = ((flags & MEM_MASK) <= GPR_REG ? D(reg) : DA(reg));
+ dest = ((flags & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg));
delay_slot = ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS;
if (!base)
return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(arg2) | IMM(0), delay_slot);
@@ -962,7 +978,7 @@
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
{
CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
- return reg << 1;
+ return freg_map[reg];
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
@@ -990,10 +1006,8 @@
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
src = TMP_FREG1;
}
- else
- src <<= 1;
- FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOI, FDTOI) | DA(TMP_FREG1) | S2A(src), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOI, FDTOI) | FD(TMP_FREG1) | FS2(src), MOVABLE_INS));
if (FAST_IS_REG(dst)) {
FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
@@ -1008,7 +1022,7 @@
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
- sljit_s32 dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
+ sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
if (src & SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@@ -1027,7 +1041,7 @@
}
FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
- FAIL_IF(push_inst(compiler, SELECT_FOP(op, FITOS, FITOD) | DA(dst_r) | S2A(TMP_FREG1), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, SELECT_FOP(op, FITOS, FITOD) | FD(dst_r) | FS2(TMP_FREG1), MOVABLE_INS));
if (dst & SLJIT_MEM)
return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
@@ -1042,17 +1056,13 @@
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
src1 = TMP_FREG1;
}
- else
- src1 <<= 1;
if (src2 & SLJIT_MEM) {
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
src2 = TMP_FREG2;
}
- else
- src2 <<= 1;
- return push_inst(compiler, SELECT_FOP(op, FCMPS, FCMPD) | S1A(src1) | S2A(src2), FCC_IS_SET | MOVABLE_INS);
+ return push_inst(compiler, SELECT_FOP(op, FCMPS, FCMPD) | FS1(src1) | FS2(src2), FCC_IS_SET | MOVABLE_INS);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
@@ -1071,22 +1081,20 @@
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
op ^= SLJIT_F32_OP;
- dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
if (src & SLJIT_MEM) {
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
src = dst_r;
}
- else
- src <<= 1;
switch (GET_OPCODE(op)) {
case SLJIT_MOV_F64:
if (src != dst_r) {
if (dst_r != TMP_FREG1) {
- FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r) | S2A(src), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, FMOVS | FD(dst_r) | FS2(src), MOVABLE_INS));
if (!(op & SLJIT_F32_OP))
- FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS));
}
else
dst_r = src;
@@ -1093,17 +1101,17 @@
}
break;
case SLJIT_NEG_F64:
- FAIL_IF(push_inst(compiler, FNEGS | DA(dst_r) | S2A(src), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, FNEGS | FD(dst_r) | FS2(src), MOVABLE_INS));
if (dst_r != src && !(op & SLJIT_F32_OP))
- FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS));
break;
case SLJIT_ABS_F64:
- FAIL_IF(push_inst(compiler, FABSS | DA(dst_r) | S2A(src), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, FABSS | FD(dst_r) | FS2(src), MOVABLE_INS));
if (dst_r != src && !(op & SLJIT_F32_OP))
- FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS));
break;
case SLJIT_CONV_F64_FROM_F32:
- FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOD, FDTOS) | DA(dst_r) | S2A(src), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOD, FDTOS) | FD(dst_r) | FS2(src), MOVABLE_INS));
op ^= SLJIT_F32_OP;
break;
}
@@ -1129,7 +1137,7 @@
compiler->cache_arg = 0;
compiler->cache_argw = 0;
- dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2;
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
if (src1 & SLJIT_MEM) {
if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
@@ -1138,8 +1146,6 @@
} else
flags |= SLOW_SRC1;
}
- else
- src1 <<= 1;
if (src2 & SLJIT_MEM) {
if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
@@ -1148,8 +1154,6 @@
} else
flags |= SLOW_SRC2;
}
- else
- src2 <<= 1;
if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
@@ -1173,19 +1177,19 @@
switch (GET_OPCODE(op)) {
case SLJIT_ADD_F64:
- FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADDD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADDD) | FD(dst_r) | FS1(src1) | FS2(src2), MOVABLE_INS));
break;
case SLJIT_SUB_F64:
- FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUBD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUBD) | FD(dst_r) | FS1(src1) | FS2(src2), MOVABLE_INS));
break;
case SLJIT_MUL_F64:
- FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMULD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMULD) | FD(dst_r) | FS1(src1) | FS2(src2), MOVABLE_INS));
break;
case SLJIT_DIV_F64:
- FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIVD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIVD) | FD(dst_r) | FS1(src1) | FS2(src2), MOVABLE_INS));
break;
}
@@ -1339,7 +1343,8 @@
#else
#error "Implementation required"
#endif
- } else {
+ }
+ else {
if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS)
jump->flags |= IS_MOVABLE;
if (type >= SLJIT_FAST_CALL)
@@ -1346,8 +1351,8 @@
jump->flags |= IS_CALL;
}
- PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0));
- PTR_FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? TMP_LINK : 0) | S1(TMP_REG2) | IMM(0), UNMOVABLE_INS));
+ PTR_FAIL_IF(emit_const(compiler, TMP_REG1, 0));
+ PTR_FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? TMP_LINK : 0) | S1(TMP_REG1) | IMM(0), UNMOVABLE_INS));
jump->addr = compiler->size;
PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
@@ -1354,6 +1359,22 @@
return jump;
}
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 arg_types)
+{
+ CHECK_ERROR_PTR();
+ CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
+
+ PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL));
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->skip_checks = 1;
+#endif
+
+ return sljit_emit_jump(compiler, type);
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
{
struct sljit_jump *jump = NULL;
@@ -1370,17 +1391,18 @@
FAIL_IF(!jump);
set_jump(jump, compiler, JUMP_ADDR);
jump->u.target = srcw;
+
if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS)
jump->flags |= IS_MOVABLE;
if (type >= SLJIT_FAST_CALL)
jump->flags |= IS_CALL;
- FAIL_IF(emit_const(compiler, TMP_REG2, 0));
- src_r = TMP_REG2;
+ FAIL_IF(emit_const(compiler, TMP_REG1, 0));
+ src_r = TMP_REG1;
}
else {
- FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw));
- src_r = TMP_REG2;
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
+ src_r = TMP_REG1;
}
FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? TMP_LINK : 0) | S1(src_r) | IMM(0), UNMOVABLE_INS));
@@ -1389,6 +1411,29 @@
return push_inst(compiler, NOP, UNMOVABLE_INS);
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 arg_types,
+ sljit_s32 src, sljit_sw srcw)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
+
+ if (src & SLJIT_MEM) {
+ ADJUST_LOCAL_OFFSET(src, srcw);
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
+ src = TMP_REG1;
+ }
+
+ FAIL_IF(call_with_args(compiler, arg_types, &src));
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->skip_checks = 1;
+#endif
+
+ return sljit_emit_ijump(compiler, type, src, srcw);
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 type)
Modified: code/trunk/src/sljit/sljitNativeX86_32.c
===================================================================
--- code/trunk/src/sljit/sljitNativeX86_32.c 2017-11-23 07:54:39 UTC (rev 884)
+++ code/trunk/src/sljit/sljitNativeX86_32.c 2017-11-29 13:30:31 UTC (rev 885)
@@ -64,29 +64,28 @@
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
+ sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
- sljit_s32 size;
+ sljit_s32 args, size;
sljit_u8 *inst;
CHECK_ERROR();
- CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
- set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+ CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
+ set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
+ args = get_arg_count(arg_types);
compiler->args = args;
-#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
- /* [esp+0] for saving temporaries and third argument for calls. */
- compiler->saveds_offset = 1 * sizeof(sljit_sw);
-#else
- /* [esp+0] for saving temporaries and space for maximum three arguments. */
- if (scratches <= 1)
- compiler->saveds_offset = 1 * sizeof(sljit_sw);
- else
- compiler->saveds_offset = ((scratches == 2) ? 2 : 3) * sizeof(sljit_sw);
+ /* [esp+0] for saving temporaries and function calls. */
+ compiler->stack_tmp_size = 2 * sizeof(sljit_sw);
+
+#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+ if (scratches > 3)
+ compiler->stack_tmp_size = 3 * sizeof(sljit_sw);
#endif
+ compiler->saveds_offset = compiler->stack_tmp_size;
if (scratches > 3)
compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw);
@@ -178,10 +177,10 @@
/* Space for a single argument. This amount is excluded when the stack is allocated below. */
local_size -= sizeof(sljit_sw);
FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
- FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
+ FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, sizeof(sljit_sw)));
#endif
- FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
+ FAIL_IF(sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
}
#endif
@@ -192,12 +191,12 @@
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SP, 0);
/* Some space might allocated during sljit_grow_stack() above on WIN32. */
- FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
+ FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size + sizeof(sljit_sw)));
#if defined _WIN32 && !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (compiler->local_size > 1024)
- FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
+ FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, sizeof(sljit_sw)));
#endif
@@ -213,31 +212,29 @@
return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), compiler->local_size, TMP_REG1, 0);
}
#endif
- return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
+ return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
+ sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
CHECK_ERROR();
- CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
- set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+ CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
+ set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
- compiler->args = args;
+ compiler->args = get_arg_count(arg_types);
-#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
- /* [esp+0] for saving temporaries and third argument for calls. */
- compiler->saveds_offset = 1 * sizeof(sljit_sw);
-#else
- /* [esp+0] for saving temporaries and space for maximum three arguments. */
- if (scratches <= 1)
- compiler->saveds_offset = 1 * sizeof(sljit_sw);
- else
- compiler->saveds_offset = ((scratches == 2) ? 2 : 3) * sizeof(sljit_sw);
+ /* [esp+0] for saving temporaries and function calls. */
+ compiler->stack_tmp_size = 2 * sizeof(sljit_sw);
+
+#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+ if (scratches > 3)
+ compiler->stack_tmp_size = 3 * sizeof(sljit_sw);
#endif
+ compiler->saveds_offset = compiler->stack_tmp_size;
if (scratches > 3)
compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw);
@@ -278,10 +275,10 @@
if (compiler->options & SLJIT_F64_ALIGNMENT)
EMIT_MOV(compiler, SLJIT_SP, 0, SLJIT_MEM1(SLJIT_SP), compiler->local_size)
else
- FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
+ FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
#else
- FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
+ FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
#endif
@@ -418,7 +415,7 @@
if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
- if ((a & SLJIT_IMM) || (a == 0))
+ if (a & SLJIT_IMM)
*buf_ptr = 0;
else if (!(flags & EX86_SSE2_OP1))
*buf_ptr = reg_map[a] << 3;
@@ -490,42 +487,324 @@
/* Call / return instructions */
/* --------------------------------------------------------------------- */
-static SLJIT_INLINE sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type)
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+
+static sljit_s32 c_fast_call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr)
{
+ sljit_s32 stack_size = 0;
+ sljit_s32 word_arg_count = 0;
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+
+ while (arg_types) {
+ switch (arg_types & SLJIT_DEF_MASK) {
+ case SLJIT_ARG_TYPE_F32:
+ stack_size += sizeof(sljit_f32);
+ break;
+ case SLJIT_ARG_TYPE_F64:
+ stack_size += sizeof(sljit_f64);
+ break;
+ default:
+ word_arg_count++;
+ if (word_arg_count > 2)
+ stack_size += sizeof(sljit_sw);
+ break;
+ }
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+ }
+
+ if (word_arg_count_ptr)
+ *word_arg_count_ptr = word_arg_count;
+
+ return stack_size;
+}
+
+static sljit_s32 c_fast_call_with_args(struct sljit_compiler *compiler,
+ sljit_s32 arg_types, sljit_s32 stack_size, sljit_s32 word_arg_count, sljit_s32 swap_args)
+{
sljit_u8 *inst;
+ sljit_s32 float_arg_count;
-#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
- inst = (sljit_u8*)ensure_buf(compiler, type >= SLJIT_CALL3 ? 1 + 2 + 1 : 1 + 2);
- FAIL_IF(!inst);
- INC_SIZE(type >= SLJIT_CALL3 ? 2 + 1 : 2);
+ if (stack_size == sizeof(sljit_sw) && word_arg_count == 3) {
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
+ FAIL_IF(!inst);
+ INC_SIZE(1);
+ PUSH_REG(reg_map[SLJIT_R2]);
+ }
+ else if (stack_size > 0) {
+ if (word_arg_count >= 4)
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->saveds_offset - sizeof(sljit_sw));
- if (type >= SLJIT_CALL3)
- PUSH_REG(reg_map[SLJIT_R2]);
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0];
+ FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
+ SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size));
+
+ stack_size = 0;
+ arg_types >>= SLJIT_DEF_SHIFT;
+ word_arg_count = 0;
+ float_arg_count = 0;
+ while (arg_types) {
+ switch (arg_types & SLJIT_DEF_MASK) {
+ case SLJIT_ARG_TYPE_F32:
+ float_arg_count++;
+ FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
+ stack_size += sizeof(sljit_f32);
+ break;
+ case SLJIT_ARG_TYPE_F64:
+ float_arg_count++;
+ FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
+ stack_size += sizeof(sljit_f64);
+ break;
+ default:
+ word_arg_count++;
+ if (word_arg_count == 3) {
+ EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, SLJIT_R2, 0);
+ stack_size += sizeof(sljit_sw);
+ }
+ else if (word_arg_count == 4) {
+ EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, TMP_REG1, 0);
+ stack_size += sizeof(sljit_sw);
+ }
+ break;
+ }
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+ }
+ }
+
+ if (word_arg_count > 0) {
+ if (swap_args) {
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
+ FAIL_IF(!inst);
+ INC_SIZE(1);
+
+ *inst++ = XCHG_EAX_r | reg_map[SLJIT_R2];
+ }
+ else {
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
+ FAIL_IF(!inst);
+ INC_SIZE(2);
+
+ *inst++ = MOV_r_rm;
+ *inst++ = MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0];
+ }
+ }
+
+ return SLJIT_SUCCESS;
+}
+
+#endif
+
+static sljit_s32 cdecl_call_get_stack_size(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr)
+{
+ sljit_s32 stack_size = 0;
+ sljit_s32 word_arg_count = 0;
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+
+ while (arg_types) {
+ switch (arg_types & SLJIT_DEF_MASK) {
+ case SLJIT_ARG_TYPE_F32:
+ stack_size += sizeof(sljit_f32);
+ break;
+ case SLJIT_ARG_TYPE_F64:
+ stack_size += sizeof(sljit_f64);
+ break;
+ default:
+ word_arg_count++;
+ stack_size += sizeof(sljit_sw);
+ break;
+ }
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+ }
+
+ if (word_arg_count_ptr)
+ *word_arg_count_ptr = word_arg_count;
+
+ if (stack_size <= compiler->stack_tmp_size)
+ return 0;
+
+#if defined(__APPLE__)
+ return ((stack_size - compiler->stack_tmp_size + 15) & ~15);
#else
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 * (type - SLJIT_CALL0));
+ return stack_size - compiler->stack_tmp_size;
+#endif
+}
+
+static sljit_s32 cdecl_call_with_args(struct sljit_compiler *compiler,
+ sljit_s32 arg_types, sljit_s32 stack_size, sljit_s32 word_arg_count)
+{
+ sljit_s32 float_arg_count = 0;
+
+ if (word_arg_count >= 4)
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->saveds_offset - sizeof(sljit_sw));
+
+ if (stack_size > 0)
+ FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
+ SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size));
+
+ stack_size = 0;
+ word_arg_count = 0;
+ arg_types >>= SLJIT_DEF_SHIFT;
+
+ while (arg_types) {
+ switch (arg_types & SLJIT_DEF_MASK) {
+ case SLJIT_ARG_TYPE_F32:
+ float_arg_count++;
+ FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
+ stack_size += sizeof(sljit_f32);
+ break;
+ case SLJIT_ARG_TYPE_F64:
+ float_arg_count++;
+ FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
+ stack_size += sizeof(sljit_f64);
+ break;
+ default:
+ word_arg_count++;
+ EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, (word_arg_count >= 4) ? TMP_REG1 : word_arg_count, 0);
+ stack_size += sizeof(sljit_sw);
+ break;
+ }
+
+ arg_types >>= SLJIT_DEF_SHIFT;
+ }
+
+ return SLJIT_SUCCESS;
+}
+
+static sljit_s32 post_call_with_args(struct sljit_compiler *compiler,
+ sljit_s32 arg_types, sljit_s32 stack_size)
+{
+ sljit_u8 *inst;
+ sljit_s32 single;
+
+ if (stack_size > 0)
+ FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
+ SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size));
+
+ if ((arg_types & SLJIT_DEF_MASK) < SLJIT_ARG_TYPE_F32)
+ return SLJIT_SUCCESS;
+
+ single = ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32);
+
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
FAIL_IF(!inst);
- INC_SIZE(4 * (type - SLJIT_CALL0));
+ INC_SIZE(3);
+ inst[0] = single ? FSTPS : FSTPD;
+ inst[1] = (0x03 << 3) | 0x04;
+ inst[2] = (0x04 << 3) | reg_map[SLJIT_SP];
- *inst++ = MOV_rm_r;
- *inst++ = MOD_DISP8 | (reg_map[SLJIT_R0] << 3) | 0x4 /* SIB */;
- *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
- *inst++ = 0;
- if (type >= SLJIT_CALL2) {
- *inst++ = MOV_rm_r;
- *inst++ = MOD_DISP8 | (reg_map[SLJIT_R1] << 3) | 0x4 /* SIB */;
- *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
- *inst++ = sizeof(sljit_sw);
+ return emit_sse2_load(compiler, single, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), 0);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 arg_types)
+{
+ struct sljit_jump *jump;
+ sljit_s32 stack_size = 0;
+ sljit_s32 word_arg_count;
+
+ CHECK_ERROR_PTR();
+ CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
+
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+ if ((type & 0xff) == SLJIT_CALL) {
+ stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count);
+ PTR_FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, 0));
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->skip_checks = 1;
+#endif
+
+ jump = sljit_emit_jump(compiler, type);
+ PTR_FAIL_IF(jump == NULL);
+
+ PTR_FAIL_IF(post_call_with_args(compiler, arg_types, 0));
+ return jump;
}
- if (type >= SLJIT_CALL3) {
- *inst++ = MOV_rm_r;
- *inst++ = MOD_DISP8 | (reg_map[SLJIT_R2] << 3) | 0x4 /* SIB */;
- *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
- *inst++ = 2 * sizeof(sljit_sw);
+#endif
+
+ stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count);
+ PTR_FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count));
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->skip_checks = 1;
+#endif
+
+ jump = sljit_emit_jump(compiler, type);
+ PTR_FAIL_IF(jump == NULL);
+
+ PTR_FAIL_IF(post_call_with_args(compiler, arg_types, stack_size));
+ return jump;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 arg_types,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 stack_size = 0;
+ sljit_s32 word_arg_count;
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+ sljit_s32 swap_args;
+#endif
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
+
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+ SLJIT_ASSERT(reg_map[SLJIT_R0] == 0 && reg_map[SLJIT_R2] == 1 && SLJIT_R0 == 1 && SLJIT_R2 == 3);
+
+ if ((type & 0xff) == SLJIT_CALL) {
+ stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count);
+ swap_args = 0;
+
+ if (word_arg_count > 0) {
+ if ((src & REG_MASK) == SLJIT_R2 || OFFS_REG(src) == SLJIT_R2) {
+ swap_args = 1;
+ if (((src & REG_MASK) | 0x2) == SLJIT_R2)
+ src ^= 0x2;
+ if ((OFFS_REG(src) | 0x2) == SLJIT_R2)
+ src ^= TO_OFFS_REG(0x2);
+ }
+ }
+
+ FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, swap_args));
+
+ compiler->saveds_offset += stack_size;
+ compiler->locals_offset += stack_size;
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->skip_checks = 1;
+#endif
+ FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
+
+ compiler->saveds_offset -= stack_size;
+ compiler->locals_offset -= stack_size;
+
+ return post_call_with_args(compiler, arg_types, 0);
}
#endif
- return SLJIT_SUCCESS;
+
+ stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count);
+ FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count));
+
+ compiler->saveds_offset += stack_size;
+ compiler->locals_offset += stack_size;
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->skip_checks = 1;
+#endif
+ FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
+
+ compiler->saveds_offset -= stack_size;
+ compiler->locals_offset -= stack_size;
+
+ return post_call_with_args(compiler, arg_types, stack_size);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
Modified: code/trunk/src/sljit/sljitNativeX86_64.c
===================================================================
--- code/trunk/src/sljit/sljitNativeX86_64.c 2017-11-23 07:54:39 UTC (rev 884)
+++ code/trunk/src/sljit/sljitNativeX86_64.c 2017-11-29 13:30:31 UTC (rev 885)
@@ -66,15 +66,15 @@
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
+ sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
- sljit_s32 i, tmp, size, saved_register_size;
+ sljit_s32 args, i, tmp, size, saved_register_size;
sljit_u8 *inst;
CHECK_ERROR();
- CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
- set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+ CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
+ set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
#ifdef _WIN64
/* Two/four register slots for parameters plus space for xmm6 register if needed. */
@@ -108,6 +108,8 @@
PUSH_REG(reg_lmap[i]);
}
+ args = get_arg_count(arg_types);
+
if (args > 0) {
size = args * 3;
inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
@@ -182,7 +184,7 @@
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
- FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
+ FAIL_IF(sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
}
#endif
@@ -223,14 +225,14 @@
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
- sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
+ sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
sljit_s32 saved_register_size;
CHECK_ERROR();
- CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
- set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+ CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
+ set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
#ifdef _WIN64
/* Two/four register slots for parameters plus space for xmm6 register if needed. */
@@ -414,7 +416,11 @@
}
}
}
- else if (!(flags & EX86_SSE2_OP2) && reg_map[b] >= 8)
+ else if (!(flags & EX86_SSE2_OP2)) {
+ if (reg_map[b] >= 8)
+ rex |= REX_B;
+ }
+ else if (freg_map[b] >= 8)
rex |= REX_B;
if (a & SLJIT_IMM) {
@@ -441,7 +447,11 @@
else {
SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
/* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
- if (!(flags & EX86_SSE2_OP1) && reg_map[a] >= 8)
+ if (!(flags & EX86_SSE2_OP1)) {
+ if (reg_map[a] >= 8)
+ rex |= REX_R;
+ }
+ else if (freg_map[a] >= 8)
rex |= REX_R;
}
@@ -468,12 +478,12 @@
if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
- if ((a & SLJIT_IMM) || (a == 0))
+ if (a & SLJIT_IMM)
*buf_ptr = 0;
else if (!(flags & EX86_SSE2_OP1))
*buf_ptr = reg_lmap[a] << 3;
else
- *buf_ptr = a << 3;
+ *buf_ptr = freg_lmap[a] << 3;
}
else {
if (a & SLJIT_IMM) {
@@ -487,7 +497,7 @@
}
if (!(b & SLJIT_MEM))
- *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : b);
+ *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : freg_lmap[b]);
else if ((b & REG_MASK) != SLJIT_UNUSED) {
if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
@@ -545,45 +555,161 @@
/* Call / return instructions */
/* --------------------------------------------------------------------- */
-static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type)
+#ifndef _WIN64
+
+static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr, sljit_sw srcw)
{
- sljit_u8 *inst;
+ sljit_s32 src = src_ptr ? (*src_ptr) : 0;
+ sljit_s32 word_arg_count = 0;
- /* After any change update IS_REG_CHANGED_BY_CALL as well. */
-#ifndef _WIN64
- SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8 && reg_map[TMP_REG1] == 2);
+ SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R3] == 1 && reg_map[TMP_REG1] == 2);
- inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
- FAIL_IF(!inst);
- INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
- if (type >= SLJIT_CALL3) {
- /* Move third argument to TMP_REG1. */
- *inst++ = REX_W;
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_R2];
+ compiler->mode32 = 0;
+
+ /* Remove return value. */
+ arg_types >>= SLJIT_DEF_SHIFT;
+
+ while (arg_types) {
+ if ((arg_types & SLJIT_DEF_MASK) < SLJIT_ARG_TYPE_F32)
+ word_arg_count++;
+ arg_types >>= SLJIT_DEF_SHIFT;
}
- *inst++ = REX_W;
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_R0];
+
+ if (word_arg_count == 0)
+ return SLJIT_SUCCESS;
+
+ if (src & SLJIT_MEM) {
+ ADJUST_LOCAL_OFFSET(src, srcw);
+ EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
+ *src_ptr = TMP_REG2;
+ }
+ else if (src == SLJIT_R2 && word_arg_count >= SLJIT_R2)
+ *src_ptr = TMP_REG1;
+
+ if (word_arg_count >= 3)
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R2, 0);
+ return emit_mov(compiler, SLJIT_R2, 0, SLJIT_R0, 0);
+}
+
#else
- SLJIT_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8 && reg_map[TMP_REG1] == 8);
- inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
- FAIL_IF(!inst);
- INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
- if (type >= SLJIT_CALL3) {
- /* Move third argument to TMP_REG1. */
- *inst++ = REX_W | REX_R;
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_R2];
+static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr, sljit_sw srcw)
+{
+ sljit_s32 src = src_ptr ? (*src_ptr) : 0;
+ sljit_s32 arg_count = 0;
+ sljit_s32 word_arg_count = 0;
+ sljit_s32 float_arg_count = 0;
+ sljit_s32 types = 0;
+ sljit_s32 data_trandfer = 0;
+ static sljit_u8 word_arg_regs[5] = { 0, SLJIT_R3, SLJIT_R1, SLJIT_R2, TMP_REG1 };
+
+ SLJIT_ASSERT(reg_map[SLJIT_R3] == 1 && reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R2] == 8 && reg_map[TMP_REG1] == 9);
+
+ compiler->mode32 = 0;
+ arg_types >>= SLJIT_DEF_SHIFT;
+
+ while (arg_types) {
+ types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK);
+
+ switch (arg_types & SLJIT_DEF_MASK) {
+ case SLJIT_ARG_TYPE_F32:
+ case SLJIT_ARG_TYPE_F64:
+ arg_count++;
+ float_arg_count++;
+
+ if (arg_count != float_arg_count)
+ data_trandfer = 1;
+ break;
+ default:
+ arg_count++;
+ word_arg_count++;
+
+ if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count]) {
+ data_trandfer = 1;
+
+ if (src == word_arg_regs[arg_count]) {
+ EMIT_MOV(compiler, TMP_REG2, 0, src, 0);
+ *src_ptr = TMP_REG2;
+ }
+ }
+ break;
+ }
+
+ arg_types >>= SLJIT_DEF_SHIFT;
}
- *inst++ = REX_W;
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_R0];
-#endif
+
+ if (!data_trandfer)
+ return SLJIT_SUCCESS;
+
+ if (src & SLJIT_MEM) {
+ ADJUST_LOCAL_OFFSET(src, srcw);
+ EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
+ *src_ptr = TMP_REG2;
+ }
+
+ while (types) {
+ switch (types & SLJIT_DEF_MASK) {
+ case SLJIT_ARG_TYPE_F32:
+ if (arg_count != float_arg_count)
+ FAIL_IF(emit_sse2_load(compiler, 1, arg_count, float_arg_count, 0));
+ arg_count--;
+ float_arg_count--;
+ break;
+ case SLJIT_ARG_TYPE_F64:
+ if (arg_count != float_arg_count)
+ FAIL_IF(emit_sse2_load(compiler, 0, arg_count, float_arg_count, 0));
+ arg_count--;
+ float_arg_count--;
+ break;
+ default:
+ if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count])
+ EMIT_MOV(compiler, word_arg_regs[arg_count], 0, word_arg_count, 0);
+ arg_count--;
+ word_arg_count--;
+ break;
+ }
+
+ types >>= SLJIT_DEF_SHIFT;
+ }
+
return SLJIT_SUCCESS;
}
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 arg_types)
+{
+ CHECK_ERROR_PTR();
+ CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
+
+ PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL, 0));
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->skip_checks = 1;
+#endif
+
+ return sljit_emit_jump(compiler, type);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 arg_types,
+ sljit_s32 src, sljit_sw srcw)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
+
+ FAIL_IF(call_with_args(compiler, arg_types, &src, srcw));
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->skip_checks = 1;
+#endif
+
+ return sljit_emit_ijump(compiler, type, src, srcw);
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
{
sljit_u8 *inst;
@@ -679,7 +805,6 @@
return SLJIT_SUCCESS;
}
-
/* --------------------------------------------------------------------- */
/* Extend input */
/* --------------------------------------------------------------------- */
Modified: code/trunk/src/sljit/sljitNativeX86_common.c
===================================================================
--- code/trunk/src/sljit/sljitNativeX86_common.c 2017-11-23 07:54:39 UTC (rev 884)
+++ code/trunk/src/sljit/sljitNativeX86_common.c 2017-11-29 13:30:31 UTC (rev 885)
@@ -26,7 +26,11 @@
SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
{
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+ return "x86" SLJIT_CPUINFO " ABI:fastcall";
+#else
return "x86" SLJIT_CPUINFO;
+#endif
}
/*
@@ -92,23 +96,32 @@
#ifndef _WIN64
/* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
- 0, 0, 6, 1, 7, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
+ 0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
};
/* low-map. reg_map & 0x7. */
static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
- 0, 0, 6, 1, 7, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1
+ 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1
};
#else
/* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
- 0, 0, 2, 1, 10, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 8, 9
+ 0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10
};
/* low-map. reg_map & 0x7. */
static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
- 0, 0, 2, 1, 2, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 0, 1
+ 0, 0, 2, 0, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 1, 2
};
#endif
+/* Args: xmm0-xmm3 */
+static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
+ 4, 0, 1, 2, 3, 5, 6
+};
+/* low-map. freg_map & 0x7. */
+static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
+ 4, 0, 1, 2, 3, 5, 6
+};
+
#define REX_W 0x48
#define REX_R 0x44
#define REX_X 0x42
@@ -178,6 +191,8 @@
#define CVTTSD2SI_r_xm 0x2c
#define DIV (/* GROUP_F7 */ 6 << 3)
#define DIVSD_x_xm 0x5e
+#define FSTPS 0xd9
+#define FSTPD 0xdd
#define INT3 0xcc
#define IDIV (/* GROUP_F7 */ 7 << 3)
#define IMUL (/* GROUP_F7 */ 5 << 3)
@@ -613,9 +628,6 @@
get_cpu_features();
return cpu_has_cmov;
- case SLJIT_HAS_PREF_SHIFT_REG:
- return 1;
-
case SLJIT_HAS_SSE2:
#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
if (cpu_has_sse2 == -1)
@@ -634,14 +646,16 @@
/* Operators */
/* --------------------------------------------------------------------- */
+#define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode))
+
static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
- sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
+ sljit_u32 op_types,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w);
static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
- sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
+ sljit_u32 op_types,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w);
@@ -653,10 +667,16 @@
#define EMIT_MOV(compiler, dst, dstw, src, srcw) \
FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
+static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
+ sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src);
+
+static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
+ sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
+
#ifdef _WIN32
#include <malloc.h>
-static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
+static void SLJIT_FUNC sljit_grow_stack(sljit_sw local_size)
{
/* Workaround for calling the internal _chkstk() function on Windows.
This function touches all 4k pages belongs to the requested stack space,
@@ -1275,20 +1295,25 @@
compiler->mode32 = 0;
#endif
+ if (FAST_IS_REG(src) && src == dst) {
+ if (!TYPE_CAST_NEEDED(op))
+ return SLJIT_SUCCESS;
+ }
+
if (op_flags & SLJIT_I32_OP) {
- if (FAST_IS_REG(src) && src == dst) {
- if (!TYPE_CAST_NEEDED(op))
- return SLJIT_SUCCESS;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (src & SLJIT_MEM) {
+ if (op == SLJIT_MOV_S32)
+ op = SLJIT_MOV_U32;
+ if (op == SLJIT_MOVU_S32)
+ op = SLJIT_MOVU_U32;
}
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM))
- op = SLJIT_MOV_U32;
- if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM))
- op = SLJIT_MOVU_U32;
- if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM))
- op = SLJIT_MOV_S32;
- if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM))
- op = SLJIT_MOVU_S32;
+ else if (src & SLJIT_IMM) {
+ if (op == SLJIT_MOV_U32)
+ op = SLJIT_MOV_S32;
+ if (op == SLJIT_MOVU_U32)
+ op = SLJIT_MOVU_S32;
+ }
#endif
}
@@ -1372,11 +1397,11 @@
if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK)) {
if ((src & OFFS_REG_MASK) != 0) {
- FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
+ FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
(src & REG_MASK), 0, (src & REG_MASK), 0, OFFS_REG(dst), 0));
}
else if (srcw != 0) {
- FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
+ FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
(src & REG_MASK), 0, (src & REG_MASK), 0, SLJIT_IMM, srcw));
}
}
@@ -1383,11 +1408,11 @@
if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK)) {
if ((dst & OFFS_REG_MASK) != 0) {
- FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
+ FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
(dst & REG_MASK), 0, (dst & REG_MASK), 0, OFFS_REG(dst), 0));
}
else if (dstw != 0) {
- FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
+ FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
(dst & REG_MASK), 0, (dst & REG_MASK), 0, SLJIT_IMM, dstw));
}
}
@@ -1445,12 +1470,16 @@
#endif
static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
- sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
+ sljit_u32 op_types,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
sljit_u8* inst;
+ sljit_u8 op_eax_imm = (op_types >> 24);
+ sljit_u8 op_rm = (op_types >> 16) & 0xff;
+ sljit_u8 op_mr = (op_types >> 8) & 0xff;
+ sljit_u8 op_imm = op_types & 0xff;
if (dst == SLJIT_UNUSED) {
EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
@@ -1561,12 +1590,16 @@
}
static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
- sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
+ sljit_u32 op_types,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
sljit_u8* inst;
+ sljit_u8 op_eax_imm = (op_types >> 24);
+ sljit_u8 op_rm = (op_types >> 16) & 0xff;
+ sljit_u8 op_mr = (op_types >> 8) & 0xff;
+ sljit_u8 op_imm = op_types & 0xff;
if (dst == SLJIT_UNUSED) {
EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
@@ -2044,7 +2077,7 @@
*inst |= mode;
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
}
- else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
+ else if (SLOW_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
if (src1 != dst)
EMIT_MOV(compiler, dst, 0, src1, src1w);
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
@@ -2057,8 +2090,8 @@
else {
/* This case is complex since ecx itself may be used for
addressing, and this case must be supported as well. */
+ EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
@@ -2065,19 +2098,16 @@
FAIL_IF(!inst);
*inst |= mode;
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
- EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
#else
- EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
- EMIT_MOV(compiler, TMP_REG2, 0, src2, src2w);
- inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
- FAIL_IF(!inst);
- *inst = XCHG_r_rm;
+ EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
+ EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
FAIL_IF(!inst);
*inst |= mode;
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
- EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
#endif
+ if (dst != SLJIT_UNUSED)
+ return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
}
return SLJIT_SUCCESS;
@@ -2101,7 +2131,7 @@
if (!set_flags)
return emit_mov(compiler, dst, dstw, src1, src1w);
/* OR dst, src, 0 */
- return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
+ return emit_cum_binary(compiler, BINARY_OPCODE(OR),
dst, dstw, src1, src1w, SLJIT_IMM, 0);
}
@@ -2111,10 +2141,10 @@
if (!FAST_IS_REG(dst))
FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
- FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
+ FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w));
if (FAST_IS_REG(dst))
- return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
+ return emit_cmp_binary(compiler, (dst == SLJIT_UNUSED) ? TMP_REG1 : dst, dstw, SLJIT_IMM, 0);
return SLJIT_SUCCESS;
}
@@ -2145,10 +2175,10 @@
if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
return compiler->error;
}
- return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
+ return emit_cum_binary(compiler, BINARY_OPCODE(ADD),
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_ADDC:
- return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
+ return emit_cum_binary(compiler, BINARY_OPCODE(ADC),
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_SUB:
if (!HAS_FLAGS(op)) {
@@ -2158,10 +2188,10 @@
if (dst == SLJIT_UNUSED)
return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
- return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
+ return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_SUBC:
- return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
+ return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB),
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_MUL:
return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
@@ -2168,13 +2198,13 @@
case SLJIT_AND:
if (dst == SLJIT_UNUSED)
return emit_test_binary(compiler, src1, src1w, src2, src2w);
- return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
+ return emit_cum_binary(compiler, BINARY_OPCODE(AND),
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_OR:
- return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
+ return emit_cum_binary(compiler, BINARY_OPCODE(OR),
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_XOR:
- return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
+ return emit_cum_binary(compiler, BINARY_OPCODE(XOR),
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_SHL:
return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
@@ -2203,7 +2233,11 @@
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
{
CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
return reg;
+#else
+ return freg_map[reg];
+#endif
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
@@ -2345,6 +2379,7 @@
FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
src1 = TMP_FREG;
}
+
return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w);
}
@@ -2516,9 +2551,6 @@
set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
type &= 0xff;
- if (type >= SLJIT_CALL1)
- PTR_FAIL_IF(call_with_args(compiler, type));
-
/* Worst case size. */
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
@@ -2534,14 +2566,6 @@
return jump;
}
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-#ifndef _WIN64
-#define IS_REG_CHANGED_BY_CALL(src, type) ((src) == SLJIT_R3)
-#else
-#define IS_REG_CHANGED_BY_CALL(src, type) ((src) == SLJIT_R2)
-#endif
-#endif
-
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
{
sljit_u8 *inst;
@@ -2553,25 +2577,6 @@
CHECK_EXTRA_REGS(src, srcw, (void)0);
- if (type >= SLJIT_CALL1) {
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
- if (src == SLJIT_R2) {
- EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
- src = TMP_REG1;
- }
- if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
- srcw += sizeof(sljit_sw);
-#endif
-#else
- if ((src & SLJIT_MEM) || IS_REG_CHANGED_BY_CALL(src, type)) {
- EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
- src = TMP_REG2;
- }
-#endif
- FAIL_IF(call_with_args(compiler, type));
- }
-
if (src == SLJIT_IMM) {
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
FAIL_IF_NULL(jump);
Modified: code/trunk/src/sljit/sljitUtils.c
===================================================================
--- code/trunk/src/sljit/sljitUtils.c 2017-11-23 07:54:39 UTC (rev 884)
+++ code/trunk/src/sljit/sljitUtils.c 2017-11-29 13:30:31 UTC (rev 885)
@@ -48,12 +48,12 @@
#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK)
-SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void)
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_grab_lock(void)
{
/* Always successful. */
}
-SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void)
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_release_lock(void)
{
/* Always successful. */
}
@@ -88,7 +88,7 @@
static HANDLE global_mutex = 0;
-SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void)
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_grab_lock(void)
{
/* No idea what to do if an error occures. Static mutexes should never fail... */
if (!global_mutex)
@@ -97,7 +97,7 @@
WaitForSingleObject(global_mutex, INFINITE);
}
-SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void)
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_release_lock(void)
{
ReleaseMutex(global_mutex);
}
@@ -130,12 +130,12 @@
static pthread_mutex_t global_mutex = PTHREAD_MUTEX_INITIALIZER;
-SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void)
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_grab_lock(void)
{
pthread_mutex_lock(&global_mutex);
}
-SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void)
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_release_lock(void)
{
pthread_mutex_unlock(&global_mutex);
}
@@ -203,7 +203,7 @@
/* Planning to make it even more clever in the future. */
static sljit_sw sljit_page_align = 0;
-SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit, void *allocator_data)
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit, void *allocator_data)
{
struct sljit_stack *stack;
void *ptr;
@@ -276,7 +276,7 @@
#undef PAGE_ALIGN
-SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack *stack, void *allocator_data)
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data)
{
SLJIT_UNUSED_ARG(allocator_data);
#ifdef _WIN32
@@ -287,7 +287,7 @@
SLJIT_FREE(stack, allocator_data);
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_limit)
+SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_FUNC sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_limit)
{
sljit_uw aligned_old_limit;
sljit_uw aligned_new_limit;