[Pcre-svn] [847] code/trunk: fix signed/ unsigned half load …

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [847] code/trunk: fix signed/ unsigned half load mismatches and JIT compiler update
Revision: 847
          http://vcs.pcre.org/viewvc?view=rev&revision=847
Author:   zherczeg
Date:     2012-01-03 17:49:03 +0000 (Tue, 03 Jan 2012)


Log Message:
-----------
fix signed/unsigned half load mismatches and JIT compiler update

Modified Paths:
--------------
    code/trunk/pcre_jit_compile.c
    code/trunk/pcretest.c
    code/trunk/sljit/sljitLir.c
    code/trunk/sljit/sljitLir.h
    code/trunk/sljit/sljitNativeARM_Thumb2.c
    code/trunk/sljit/sljitNativeARM_v5.c
    code/trunk/sljit/sljitNativeMIPS_common.c
    code/trunk/sljit/sljitNativePPC_32.c
    code/trunk/sljit/sljitNativePPC_64.c
    code/trunk/sljit/sljitNativePPC_common.c
    code/trunk/sljit/sljitNativeX86_64.c
    code/trunk/sljit/sljitNativeX86_common.c


Modified: code/trunk/pcre_jit_compile.c
===================================================================
--- code/trunk/pcre_jit_compile.c    2012-01-03 13:57:27 UTC (rev 846)
+++ code/trunk/pcre_jit_compile.c    2012-01-03 17:49:03 UTC (rev 847)
@@ -322,7 +322,7 @@
   int ucharptr;
   union {
     sljit_i asint;
-    sljit_h asshort;
+    sljit_uh asushort;
 #ifdef COMPILE_PCRE8
     sljit_ub asbyte;
     sljit_ub asuchars[4];
@@ -334,7 +334,7 @@
   } c;
   union {
     sljit_i asint;
-    sljit_h asshort;
+    sljit_uh asushort;
 #ifdef COMPILE_PCRE8
     sljit_ub asbyte;
     sljit_ub asuchars[4];
@@ -2493,7 +2493,7 @@
   if (context->length >= 4)
     OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
   else if (context->length >= 2)
-    OP1(SLJIT_MOV_SH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+    OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
   else
 #endif
     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
@@ -2504,7 +2504,7 @@
     OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
   else
 #endif
-    OP1(SLJIT_MOV_SH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+    OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
 #endif
 #endif /* COMPILE_PCRE8 */
   context->sourcereg = TMP2;
@@ -2545,12 +2545,12 @@
       OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
 #ifdef COMPILE_PCRE8
     else if (context->length >= 2)
-      OP1(SLJIT_MOV_SH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+      OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
     else if (context->length >= 1)
       OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
 #else
     else if (context->length >= 2)
-      OP1(SLJIT_MOV_SH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+      OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
 #endif
     context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;


@@ -2563,9 +2563,9 @@
       break;


       case 2 / sizeof(pcre_uchar):
-      if (context->oc.asshort != 0)
-        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asshort);
-      add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asshort | context->oc.asshort));
+      if (context->oc.asushort != 0)
+        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
+      add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
       break;


#ifdef COMPILE_PCRE8

Modified: code/trunk/pcretest.c
===================================================================
--- code/trunk/pcretest.c    2012-01-03 13:57:27 UTC (rev 846)
+++ code/trunk/pcretest.c    2012-01-03 17:49:03 UTC (rev 847)
@@ -2491,7 +2491,7 @@
   pcre_uint8 *p, *pp, *ppp;
   pcre_uint8 *to_file = NULL;
   const pcre_uint8 *tables = NULL;
-  pcre_uint32 get_options;
+  unsigned long int get_options;
   unsigned long int true_size, true_study_size = 0;
   size_t size, regex_gotten_store;
   int do_allcaps = 0;


Modified: code/trunk/sljit/sljitLir.c
===================================================================
--- code/trunk/sljit/sljitLir.c    2012-01-03 13:57:27 UTC (rev 846)
+++ code/trunk/sljit/sljitLir.c    2012-01-03 17:49:03 UTC (rev 847)
@@ -616,6 +616,7 @@
 static SLJIT_CONST char* op_names[] = {
     /* op0 */
     (char*)"breakpoint", (char*)"nop",
+    (char*)"umul", (char*)"smul", (char*)"udiv", (char*)"sdiv",
     /* op1 */
     (char*)"mov", (char*)"mov.ub", (char*)"mov.sb", (char*)"mov.uh",
     (char*)"mov.sh", (char*)"mov.ui", (char*)"mov.si", (char*)"movu",
@@ -793,10 +794,11 @@
     SLJIT_UNUSED_ARG(compiler);
     SLJIT_UNUSED_ARG(op);


-    SLJIT_ASSERT(op >= SLJIT_BREAKPOINT && op <= SLJIT_NOP);
+    SLJIT_ASSERT((op >= SLJIT_BREAKPOINT && op <= SLJIT_SMUL)
+        || ((op & ~SLJIT_INT_OP) >= SLJIT_UDIV && (op & ~SLJIT_INT_OP) <= SLJIT_SDIV));
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
     if (SLJIT_UNLIKELY(!!compiler->verbose))
-        fprintf(compiler->verbose, "  %s\n", op_names[op]);
+        fprintf(compiler->verbose, "  %s%s\n", op_names[GET_OPCODE(op)], !(op & SLJIT_INT_OP) ? "" : "i");
 #endif
 }



Modified: code/trunk/sljit/sljitLir.h
===================================================================
--- code/trunk/sljit/sljitLir.h    2012-01-03 13:57:27 UTC (rev 846)
+++ code/trunk/sljit/sljitLir.h    2012-01-03 17:49:03 UTC (rev 847)
@@ -127,9 +127,11 @@


 #define SLJIT_RETURN_REG    SLJIT_TEMPORARY_REG1


-/* x86 prefers temporary registers for special purposes. If other
- registers are used such purpose, it costs a little performance
- drawback. It doesn't matter for other archs. */
+/* x86 prefers specific registers for special purposes. In case of shift
+ by register it supports only SLJIT_TEMPORARY_REG3 for shift argument
+ (which is the src2 argument of sljit_emit_op2). If another register is
+ used, sljit must exchange data between registers which cause a minor
+ slowdown. Other architectures has no such limitation. */

 #define SLJIT_PREF_SHIFT_REG    SLJIT_TEMPORARY_REG3


@@ -370,15 +372,16 @@
*/

 /*
-   IMPORATNT NOTE: memory access MUST be naturally aligned.
+   IMPORATNT NOTE: memory access MUST be naturally aligned except
+                   SLJIT_UNALIGNED macro is defined and its value is 1.
+
      length | alignment
    ---------+-----------
      byte   | 1 byte (not aligned)
      half   | 2 byte (real_address & 0x1 == 0)
      int    | 4 byte (real_address & 0x3 == 0)
-    sljit_w | 4 byte if SLJIT_32BIT_ARCHITECTURE defined
-            | 8 byte if SLJIT_64BIT_ARCHITECTURE defined
-   (This is a strict requirement for embedded systems.)
+    sljit_w | 4 byte if SLJIT_32BIT_ARCHITECTURE is defined and its value is 1
+            | 8 byte if SLJIT_64BIT_ARCHITECTURE is defined and its value is 1


    Note: different architectures have different addressing limitations
          Thus sljit may generate several instructions for other addressing modes
@@ -450,6 +453,24 @@
    Note: may or may not cause an extra cycle wait
          it can even decrease the runtime in a few cases. */
 #define SLJIT_NOP            1
+/* Flags: may destroy flags
+   Unsigned multiplication of SLJIT_TEMPORARY_REG1 and SLJIT_TEMPORARY_REG2.
+   Result goes to SLJIT_TEMPORARY_REG2:SLJIT_TEMPORARY_REG1 (high:low) word */
+#define SLJIT_UMUL            2
+/* Flags: may destroy flags
+   Signed multiplication of SLJIT_TEMPORARY_REG1 and SLJIT_TEMPORARY_REG2.
+   Result goes to SLJIT_TEMPORARY_REG2:SLJIT_TEMPORARY_REG1 (high:low) word */
+#define SLJIT_SMUL            3
+/* Flags: I | may destroy flags
+   Unsigned divide the value in SLJIT_TEMPORARY_REG1 by SLJIT_TEMPORARY_REG2.
+   Result goes to SLJIT_TEMPORARY_REG1 and remainder goes to SLJIT_TEMPORARY_REG2.
+   Note: if SLJIT_TEMPORARY_REG2 is equal to 0, the behaviour is undefined. */
+#define SLJIT_UDIV            4
+/* Flags: I | may destroy flags
+   Signed divide the value in SLJIT_TEMPORARY_REG1 by SLJIT_TEMPORARY_REG2.
+   Result goes to SLJIT_TEMPORARY_REG1 and remainder goes to SLJIT_TEMPORARY_REG2.
+   Note: if SLJIT_TEMPORARY_REG2 is equal to 0, the behaviour is undefined. */
+#define SLJIT_SDIV            5


SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op);

@@ -462,68 +483,68 @@
    SH = unsgined half (16 bit) */


 /* Flags: - (never set any flags) */
-#define SLJIT_MOV            2
+#define SLJIT_MOV            6
 /* Flags: - (never set any flags) */
-#define SLJIT_MOV_UB            3
+#define SLJIT_MOV_UB            7
 /* Flags: - (never set any flags) */
-#define SLJIT_MOV_SB            4
+#define SLJIT_MOV_SB            8
 /* Flags: - (never set any flags) */
-#define SLJIT_MOV_UH            5
+#define SLJIT_MOV_UH            9
 /* Flags: - (never set any flags) */
-#define SLJIT_MOV_SH            6
+#define SLJIT_MOV_SH            10
 /* Flags: - (never set any flags) */
-#define SLJIT_MOV_UI            7
+#define SLJIT_MOV_UI            11
 /* Flags: - (never set any flags) */
-#define SLJIT_MOV_SI            8
+#define SLJIT_MOV_SI            12
 /* Flags: - (never set any flags) */
-#define SLJIT_MOVU            9
+#define SLJIT_MOVU            13
 /* Flags: - (never set any flags) */
-#define SLJIT_MOVU_UB            10
+#define SLJIT_MOVU_UB            14
 /* Flags: - (never set any flags) */
-#define SLJIT_MOVU_SB            11
+#define SLJIT_MOVU_SB            15
 /* Flags: - (never set any flags) */
-#define SLJIT_MOVU_UH            12
+#define SLJIT_MOVU_UH            16
 /* Flags: - (never set any flags) */
-#define SLJIT_MOVU_SH            13
+#define SLJIT_MOVU_SH            17
 /* Flags: - (never set any flags) */
-#define SLJIT_MOVU_UI            14
+#define SLJIT_MOVU_UI            18
 /* Flags: - (never set any flags) */
-#define SLJIT_MOVU_SI            15
+#define SLJIT_MOVU_SI            19
 /* Flags: I | E | K */
-#define SLJIT_NOT            16
+#define SLJIT_NOT            20
 /* Flags: I | E | O | K */
-#define SLJIT_NEG            17
+#define SLJIT_NEG            21
 /* Count leading zeroes
    Flags: I | E | K */
-#define SLJIT_CLZ            18
+#define SLJIT_CLZ            22


 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op,
     int dst, sljit_w dstw,
     int src, sljit_w srcw);


 /* Flags: I | E | O | C | K */
-#define SLJIT_ADD            19
+#define SLJIT_ADD            23
 /* Flags: I | C | K */
-#define SLJIT_ADDC            20
+#define SLJIT_ADDC            24
 /* Flags: I | E | S | U | O | C | K */
-#define SLJIT_SUB            21
+#define SLJIT_SUB            25
 /* Flags: I | C | K */
-#define SLJIT_SUBC            22
+#define SLJIT_SUBC            26
 /* Note: integer mul */
 /* Flags: I | O (see SLJIT_C_MUL_*) | K */
-#define SLJIT_MUL            23
+#define SLJIT_MUL            27
 /* Flags: I | E | K */
-#define SLJIT_AND            24
+#define SLJIT_AND            28
 /* Flags: I | E | K */
-#define SLJIT_OR            25
+#define SLJIT_OR            29
 /* Flags: I | E | K */
-#define SLJIT_XOR            26
+#define SLJIT_XOR            30
 /* Flags: I | E | K */
-#define SLJIT_SHL            27
+#define SLJIT_SHL            31
 /* Flags: I | E | K */
-#define SLJIT_LSHR            28
+#define SLJIT_LSHR            32
 /* Flags: I | E | K */
-#define SLJIT_ASHR            29
+#define SLJIT_ASHR            33


 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op,
     int dst, sljit_w dstw,
@@ -560,26 +581,26 @@
    Note: NaN check is always performed. If SLJIT_C_FLOAT_NAN is set,
          the comparison result is unpredictable.
    Flags: E | S (see SLJIT_C_FLOAT_*) */
-#define SLJIT_FCMP            30
+#define SLJIT_FCMP            34
 /* Flags: - (never set any flags) */
-#define SLJIT_FMOV            31
+#define SLJIT_FMOV            35
 /* Flags: - (never set any flags) */
-#define SLJIT_FNEG            32
+#define SLJIT_FNEG            36
 /* Flags: - (never set any flags) */
-#define SLJIT_FABS            33
+#define SLJIT_FABS            37


 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
     int dst, sljit_w dstw,
     int src, sljit_w srcw);


 /* Flags: - (never set any flags) */
-#define SLJIT_FADD            34
+#define SLJIT_FADD            38
 /* Flags: - (never set any flags) */
-#define SLJIT_FSUB            35
+#define SLJIT_FSUB            39
 /* Flags: - (never set any flags) */
-#define SLJIT_FMUL            36
+#define SLJIT_FMUL            40
 /* Flags: - (never set any flags) */
-#define SLJIT_FDIV            37
+#define SLJIT_FDIV            41


 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
     int dst, sljit_w dstw,


Modified: code/trunk/sljit/sljitNativeARM_Thumb2.c
===================================================================
--- code/trunk/sljit/sljitNativeARM_Thumb2.c    2012-01-03 13:57:27 UTC (rev 846)
+++ code/trunk/sljit/sljitNativeARM_Thumb2.c    2012-01-03 17:49:03 UTC (rev 847)
@@ -38,7 +38,7 @@
 #define TMP_FREG1    (SLJIT_FLOAT_REG4 + 1)
 #define TMP_FREG2    (SLJIT_FLOAT_REG4 + 2)


-/* See sljit_emit_enter if you want to change them. */
+/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
   0, 0, 1, 2, 12, 5, 6, 7, 8, 10, 11, 13, 3, 4, 14, 15
 };
@@ -158,6 +158,7 @@
 #define SXTH        0xb200
 #define SXTH_W        0xfa0ff080
 #define TST        0x4200
+#define UMULL        0xfba00000
 #define UXTB        0xb2c0
 #define UXTB_W        0xfa5ff080
 #define UXTH        0xb280
@@ -1188,6 +1189,21 @@
 /*  Operators                                                            */
 /* --------------------------------------------------------------------- */


+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(__GNUC__)
+extern unsigned int __aeabi_uidivmod(unsigned numerator, unsigned denominator);
+extern unsigned int __aeabi_idivmod(unsigned numerator, unsigned denominator);
+#else
+#error "Software divmod functions are needed"
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
 {
     CHECK_ERROR();
@@ -1201,6 +1217,25 @@
     case SLJIT_NOP:
         push_inst16(compiler, NOP);
         break;
+    case SLJIT_UMUL:
+    case SLJIT_SMUL:
+        return push_inst32(compiler, (op == SLJIT_UMUL ? UMULL : SMULL)
+            | (reg_map[SLJIT_TEMPORARY_REG2] << 8)
+            | (reg_map[SLJIT_TEMPORARY_REG1] << 12)
+            | (reg_map[SLJIT_TEMPORARY_REG1] << 16)
+            | reg_map[SLJIT_TEMPORARY_REG2]);
+    case SLJIT_UDIV:
+    case SLJIT_SDIV:
+        FAIL_IF(push_inst32(compiler, 0xf84d2d04 /* str r2, [sp, #-4]! */));
+        FAIL_IF(push_inst32(compiler, 0xf84dcd04 /* str ip, [sp, #-4]! */));
+#if defined(__GNUC__)
+        FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
+            (op == SLJIT_UDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
+#else
+#error "Software divmod functions are needed"
+#endif
+        FAIL_IF(push_inst32(compiler, 0xf85dcb04 /* ldr ip, [sp], #4 */));
+        return push_inst32(compiler, 0xf85d2b04 /* ldr r2, [sp], #4 */);
     }


     return SLJIT_SUCCESS;


Modified: code/trunk/sljit/sljitNativeARM_v5.c
===================================================================
--- code/trunk/sljit/sljitNativeARM_v5.c    2012-01-03 13:57:27 UTC (rev 846)
+++ code/trunk/sljit/sljitNativeARM_v5.c    2012-01-03 17:49:03 UTC (rev 847)
@@ -54,7 +54,7 @@
 #define MAX_DIFFERENCE(max_diff) \
     (((max_diff) / (int)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1))


-/* See sljit_emit_enter if you want to change them. */
+/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
   0, 0, 1, 2, 10, 11, 4, 5, 6, 7, 8, 13, 3, 12, 14, 15
 };
@@ -84,7 +84,7 @@
 #define BX        0xe12fff10
 #define CLZ        0xe16f0f10
 #define CMP_DP        0xa
-#define DEBUGGER    0xe1200070
+#define BKPT        0xe1200070
 #define EOR_DP        0x1
 #define MOV_DP        0xd
 #define MUL        0xe0000090
@@ -98,6 +98,7 @@
 #define SBC_DP        0x6
 #define SMULL        0xe0c00090
 #define SUB_DP        0x2
+#define UMULL        0xe0800090
 #define VABS_F64    0xeeb00bc0
 #define VADD_F64    0xee300b00
 #define VCMP_F64    0xeeb40b40
@@ -1755,6 +1756,21 @@
     return SLJIT_SUCCESS;
 }


+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(__GNUC__)
+extern unsigned int __aeabi_uidivmod(unsigned numerator, unsigned denominator);
+extern unsigned int __aeabi_idivmod(unsigned numerator, unsigned denominator);
+#else
+#error "Software divmod functions are needed"
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
 {
     CHECK_ERROR();
@@ -1763,11 +1779,37 @@
     op = GET_OPCODE(op);
     switch (op) {
     case SLJIT_BREAKPOINT:
-        EMIT_INSTRUCTION(DEBUGGER);
+        EMIT_INSTRUCTION(BKPT);
         break;
     case SLJIT_NOP:
         EMIT_INSTRUCTION(NOP);
         break;
+    case SLJIT_UMUL:
+    case SLJIT_SMUL:
+#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+        return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL)
+            | (reg_map[SLJIT_TEMPORARY_REG2] << 16)
+            | (reg_map[SLJIT_TEMPORARY_REG1] << 12)
+            | (reg_map[SLJIT_TEMPORARY_REG1] << 8)
+            | reg_map[SLJIT_TEMPORARY_REG2]);
+#else
+        EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_TEMPORARY_REG2)));
+        return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL)
+            | (reg_map[SLJIT_TEMPORARY_REG2] << 16)
+            | (reg_map[SLJIT_TEMPORARY_REG1] << 12)
+            | (reg_map[SLJIT_TEMPORARY_REG1] << 8)
+            | reg_map[TMP_REG1]);
+#endif
+    case SLJIT_UDIV:
+    case SLJIT_SDIV:
+        EMIT_INSTRUCTION(0xe52d2008 /* str r2, [sp, #-8]! */);
+#if defined(__GNUC__)
+        FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
+            (op == SLJIT_UDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
+#else
+#error "Software divmod functions are needed"
+#endif
+        return push_inst(compiler, 0xe49d2008 /* ldr r2, [sp], #8 */);
     }


     return SLJIT_SUCCESS;


Modified: code/trunk/sljit/sljitNativeMIPS_common.c
===================================================================
--- code/trunk/sljit/sljitNativeMIPS_common.c    2012-01-03 13:57:27 UTC (rev 846)
+++ code/trunk/sljit/sljitNativeMIPS_common.c    2012-01-03 17:49:03 UTC (rev 847)
@@ -110,6 +110,8 @@
 #define C_UN_D        (HI(17) | FMT_D | LO(49))
 #define C_UEQ_D        (HI(17) | FMT_D | LO(51))
 #define C_ULT_D        (HI(17) | FMT_D | LO(53))
+#define DIV        (HI(0) | LO(26))
+#define DIVU        (HI(0) | LO(27))
 #define DIV_D        (HI(17) | FMT_D | LO(3))
 #define J        (HI(2))
 #define JAL        (HI(3))
@@ -128,6 +130,7 @@
 #define MOVZ        (HI(0) | LO(10))
 #define MUL_D        (HI(17) | FMT_D | LO(2))
 #define MULT        (HI(0) | LO(24))
+#define MULTU        (HI(0) | LO(25))
 #define NOP        (HI(0) | LO(0))
 #define NOR        (HI(0) | LO(39))
 #define OR        (HI(0) | LO(37))
@@ -929,6 +932,20 @@
         return push_inst(compiler, BREAK, UNMOVABLE_INS);
     case SLJIT_NOP:
         return push_inst(compiler, NOP, UNMOVABLE_INS);
+    case SLJIT_UMUL:
+    case SLJIT_SMUL:
+        FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? MULTU : MULT) | S(SLJIT_TEMPORARY_REG1) | T(SLJIT_TEMPORARY_REG2), MOVABLE_INS));
+        FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_TEMPORARY_REG1), DR(SLJIT_TEMPORARY_REG1)));
+        return push_inst(compiler, MFHI | D(SLJIT_TEMPORARY_REG2), DR(SLJIT_TEMPORARY_REG2));
+    case SLJIT_UDIV:
+    case SLJIT_SDIV:
+#if !(defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
+        FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+        FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+#endif
+        FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVU : DIV) | S(SLJIT_TEMPORARY_REG1) | T(SLJIT_TEMPORARY_REG2), MOVABLE_INS));
+        FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_TEMPORARY_REG1), DR(SLJIT_TEMPORARY_REG1)));
+        return push_inst(compiler, MFHI | D(SLJIT_TEMPORARY_REG2), DR(SLJIT_TEMPORARY_REG2));
     }


     return SLJIT_SUCCESS;


Modified: code/trunk/sljit/sljitNativePPC_32.c
===================================================================
--- code/trunk/sljit/sljitNativePPC_32.c    2012-01-03 13:57:27 UTC (rev 846)
+++ code/trunk/sljit/sljitNativePPC_32.c    2012-01-03 17:49:03 UTC (rev 847)
@@ -31,6 +31,9 @@
     if (imm <= SIMM_MAX && imm >= SIMM_MIN)
         return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm));


+    if (!(imm & ~0xffff))
+        return push_inst(compiler, ORI | S(ZERO_REG) | A(reg) | IMM(imm));
+
     FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16)));
     return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS;
 }


Modified: code/trunk/sljit/sljitNativePPC_64.c
===================================================================
--- code/trunk/sljit/sljitNativePPC_64.c    2012-01-03 13:57:27 UTC (rev 846)
+++ code/trunk/sljit/sljitNativePPC_64.c    2012-01-03 17:49:03 UTC (rev 847)
@@ -49,6 +49,9 @@
     if (imm <= SIMM_MAX && imm >= SIMM_MIN)
         return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm));


+    if (!(imm & ~0xffff))
+        return push_inst(compiler, ORI | S(ZERO_REG) | A(reg) | IMM(imm));
+
     if (imm <= SLJIT_W(0x7fffffff) && imm >= SLJIT_W(-0x80000000)) {
         FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16)));
         return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS;


Modified: code/trunk/sljit/sljitNativePPC_common.c
===================================================================
--- code/trunk/sljit/sljitNativePPC_common.c    2012-01-03 13:57:27 UTC (rev 846)
+++ code/trunk/sljit/sljitNativePPC_common.c    2012-01-03 17:49:03 UTC (rev 847)
@@ -101,6 +101,10 @@
 #define CMPL        (HI(31) | LO(32))
 #define CMPLI        (HI(10))
 #define CROR        (HI(19) | LO(449))
+#define DIVD        (HI(31) | LO(489))
+#define DIVDU        (HI(31) | LO(457))
+#define DIVW        (HI(31) | LO(491))
+#define DIVWU        (HI(31) | LO(459))
 #define EXTSB        (HI(31) | LO(954))
 #define EXTSH        (HI(31) | LO(922))
 #define EXTSW        (HI(31) | LO(986))
@@ -123,6 +127,10 @@
 #define MTCTR        (HI(31) | LO(467) | 0x90000)
 #define MTLR        (HI(31) | LO(467) | 0x80000)
 #define MTXER        (HI(31) | LO(467) | 0x10000)
+#define MULHD        (HI(31) | LO(73))
+#define MULHDU        (HI(31) | LO(9))
+#define MULHW        (HI(31) | LO(75))
+#define MULHWU        (HI(31) | LO(11))
 #define MULLD        (HI(31) | LO(233))
 #define MULLI        (HI(7))
 #define MULLW        (HI(31) | LO(235))
@@ -1028,12 +1036,38 @@
     CHECK_ERROR();
     check_sljit_emit_op0(compiler, op);


-    op = GET_OPCODE(op);
-    switch (op) {
+    switch (GET_OPCODE(op)) {
     case SLJIT_BREAKPOINT:
     case SLJIT_NOP:
         return push_inst(compiler, NOP);
         break;
+    case SLJIT_UMUL:
+    case SLJIT_SMUL:
+        FAIL_IF(push_inst(compiler, OR | S(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG1)));
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+        FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2)));
+        return push_inst(compiler, (GET_OPCODE(op) == SLJIT_UMUL ? MULHDU : MULHD) | D(SLJIT_TEMPORARY_REG2) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2));
+#else
+        FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2)));
+        return push_inst(compiler, (GET_OPCODE(op) == SLJIT_UMUL ? MULHWU : MULHW) | D(SLJIT_TEMPORARY_REG2) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2));
+#endif
+    case SLJIT_UDIV:
+    case SLJIT_SDIV:
+        FAIL_IF(push_inst(compiler, OR | S(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG1)));
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+        if (op & SLJIT_INT_OP) {
+            FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2)));
+            FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG1) | B(SLJIT_TEMPORARY_REG2)));
+            return push_inst(compiler, SUBF | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG2) | B(TMP_REG1));
+        }
+        FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_UDIV ? DIVDU : DIVD) | D(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2)));
+        FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG1) | B(SLJIT_TEMPORARY_REG2)));
+        return push_inst(compiler, SUBF | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG2) | B(TMP_REG1));
+#else
+        FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2)));
+        FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG1) | B(SLJIT_TEMPORARY_REG2)));
+        return push_inst(compiler, SUBF | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG2) | B(TMP_REG1));
+#endif
     }


     return SLJIT_SUCCESS;


Modified: code/trunk/sljit/sljitNativeX86_64.c
===================================================================
--- code/trunk/sljit/sljitNativeX86_64.c    2012-01-03 13:57:27 UTC (rev 846)
+++ code/trunk/sljit/sljitNativeX86_64.c    2012-01-03 17:49:03 UTC (rev 847)
@@ -821,33 +821,24 @@
             code = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
             FAIL_IF(!code);
             *code++ = 0x63;
-        }
-        else {
-            if (dst_r == src) {
-                compiler->mode32 = 1;
-                code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, 0);
-                FAIL_IF(!code);
-                *code++ = 0x8b;
-                compiler->mode32 = 0;
-            }
+        } else if (dst_r == src) {
+            compiler->mode32 = 0;
+            code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 32, dst, 0);
+            FAIL_IF(!code);
+            *code |= 0x4 << 3;
+            code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 32, dst, 0);
+            FAIL_IF(!code);
+            *code |= 0x7 << 3;
+        } else {
             /* xor reg, reg. */
             code = emit_x86_instruction(compiler, 1, dst_r, 0, dst_r, 0);
             FAIL_IF(!code);
             *code++ = 0x33;
-            if (dst_r != src) {
-                compiler->mode32 = 1;
-                code = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
-                FAIL_IF(!code);
-                *code++ = 0x8b;
-                compiler->mode32 = 0;
-            }
-            else {
-                compiler->mode32 = 1;
-                code = emit_x86_instruction(compiler, 1, src, 0, TMP_REGISTER, 0);
-                FAIL_IF(!code);
-                *code++ = 0x8b;
-                compiler->mode32 = 0;
-            }
+            compiler->mode32 = 1;
+            code = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
+            FAIL_IF(!code);
+            *code++ = 0x8b;
+            compiler->mode32 = 0;
         }
     }



Modified: code/trunk/sljit/sljitNativeX86_common.c
===================================================================
--- code/trunk/sljit/sljitNativeX86_common.c    2012-01-03 13:57:27 UTC (rev 846)
+++ code/trunk/sljit/sljitNativeX86_common.c    2012-01-03 17:49:03 UTC (rev 847)
@@ -474,32 +474,6 @@
 #include "sljitNativeX86_64.c"
 #endif


-SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
-{
-    sljit_ub *buf;
-
-    CHECK_ERROR();
-    check_sljit_emit_op0(compiler, op);
-
-    op = GET_OPCODE(op);
-    switch (op) {
-    case SLJIT_BREAKPOINT:
-        buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
-        FAIL_IF(!buf);
-        INC_SIZE(1);
-        *buf = 0xcc;
-        break;
-    case SLJIT_NOP:
-        buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
-        FAIL_IF(!buf);
-        INC_SIZE(1);
-        *buf = 0x90;
-        break;
-    }
-
-    return SLJIT_SUCCESS;
-}
-
 static int emit_mov(struct sljit_compiler *compiler,
     int dst, sljit_w dstw,
     int src, sljit_w srcw)
@@ -568,6 +542,150 @@
 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
     FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));


+SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
+{
+    sljit_ub *buf;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+    int size;
+#endif
+
+    CHECK_ERROR();
+    check_sljit_emit_op0(compiler, op);
+
+    switch (GET_OPCODE(op)) {
+    case SLJIT_BREAKPOINT:
+        buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+        FAIL_IF(!buf);
+        INC_SIZE(1);
+        *buf = 0xcc;
+        break;
+    case SLJIT_NOP:
+        buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+        FAIL_IF(!buf);
+        INC_SIZE(1);
+        *buf = 0x90;
+        break;
+    case SLJIT_UMUL:
+    case SLJIT_SMUL:
+    case SLJIT_UDIV:
+    case SLJIT_SDIV:
+        compiler->flags_saved = 0;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#ifdef _WIN64
+        SLJIT_COMPILE_ASSERT(
+            reg_map[SLJIT_TEMPORARY_REG1] == 0
+            && reg_map[SLJIT_TEMPORARY_REG2] == 2
+            && reg_map[TMP_REGISTER] > 7,
+            invalid_register_assignment_for_div_mul);
+#else
+        SLJIT_COMPILE_ASSERT(
+            reg_map[SLJIT_TEMPORARY_REG1] == 0
+            && reg_map[SLJIT_TEMPORARY_REG2] < 7
+            && reg_map[TMP_REGISTER] == 2,
+            invalid_register_assignment_for_div_mul);
+#endif
+        compiler->mode32 = op & SLJIT_INT_OP;
+#endif
+
+        op = GET_OPCODE(op);
+        if (op == SLJIT_UDIV) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
+            EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
+            buf = emit_x86_instruction(compiler, 1, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0);
+#else
+            buf = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
+#endif
+            FAIL_IF(!buf);
+            *buf = 0x33;
+        }
+
+        if (op == SLJIT_SDIV) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
+            EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
+            EMIT_MOV(compiler, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG1, 0);
+#else
+            EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
+#endif
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+            buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
+            FAIL_IF(!buf);
+            INC_SIZE(3);
+            *buf++ = 0xc1;
+            *buf++ = 0xfa;
+            *buf = 0x1f;
+#else
+            if (compiler->mode32) {
+                buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
+                FAIL_IF(!buf);
+                INC_SIZE(3);
+                *buf++ = 0xc1;
+                *buf++ = 0xfa;
+                *buf = 0x1f;
+            } else {
+                buf = (sljit_ub*)ensure_buf(compiler, 1 + 4);
+                FAIL_IF(!buf);
+                INC_SIZE(4);
+                *buf++ = REX_W;
+                *buf++ = 0xc1;
+                *buf++ = 0xfa;
+                *buf = 0x3f;
+            }
+#endif
+        }
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+        buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
+        FAIL_IF(!buf);
+        INC_SIZE(2);
+        *buf++ = 0xf7;
+        *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_map[TMP_REGISTER] : reg_map[SLJIT_TEMPORARY_REG2]);
+#else
+#ifdef _WIN64
+        size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
+#else
+        size = (!compiler->mode32) ? 3 : 2;
+#endif
+        buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
+        FAIL_IF(!buf);
+        INC_SIZE(size);
+#ifdef _WIN64
+        if (!compiler->mode32)
+            *buf++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0);
+        else if (op >= SLJIT_UDIV)
+            *buf++ = REX_B;
+        *buf++ = 0xf7;
+        *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REGISTER] : reg_lmap[SLJIT_TEMPORARY_REG2]);
+#else
+        if (!compiler->mode32)
+            *buf++ = REX_W;
+        *buf++ = 0xf7;
+        *buf = 0xc0 | reg_map[SLJIT_TEMPORARY_REG2];
+#endif
+#endif
+        switch (op) {
+        case SLJIT_UMUL:
+            *buf |= 4 << 3;
+            break;
+        case SLJIT_SMUL:
+            *buf |= 5 << 3;
+            break;
+        case SLJIT_UDIV:
+            *buf |= 6 << 3;
+            break;
+        case SLJIT_SDIV:
+            *buf |= 7 << 3;
+            break;
+        }
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
+        EMIT_MOV(compiler, SLJIT_TEMPORARY_REG2, 0, TMP_REGISTER, 0);
+#endif
+        break;
+    }
+
+    return SLJIT_SUCCESS;
+}
+
 #define ENCODE_PREFIX(prefix) \
     do { \
         code = (sljit_ub*)ensure_buf(compiler, 1 + 1); \