[Pcre-svn] [1662] code/trunk: Fix unaligned accesses on x86.

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [1662] code/trunk: Fix unaligned accesses on x86.
Revision: 1662
          http://vcs.pcre.org/viewvc?view=rev&revision=1662
Author:   zherczeg
Date:     2016-07-15 10:57:53 +0100 (Fri, 15 Jul 2016)
Log Message:
-----------
Fix unaligned accesses on x86. Patch by Marc Mutz.


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/sljit/sljitConfigInternal.h
    code/trunk/sljit/sljitNativeX86_32.c
    code/trunk/sljit/sljitNativeX86_64.c
    code/trunk/sljit/sljitNativeX86_common.c


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2016-07-06 14:55:40 UTC (rev 1661)
+++ code/trunk/ChangeLog    2016-07-15 09:57:53 UTC (rev 1662)
@@ -17,7 +17,9 @@


4. Ignore "show all captures" (/=) for DFA matching.

+5. Fix unaligned accesses on x86. Patch by Marc Mutz.

+
Version 8.39 14-June-2016
-------------------------


Modified: code/trunk/sljit/sljitConfigInternal.h
===================================================================
--- code/trunk/sljit/sljitConfigInternal.h    2016-07-06 14:55:40 UTC (rev 1661)
+++ code/trunk/sljit/sljitConfigInternal.h    2016-07-15 09:57:53 UTC (rev 1662)
@@ -214,6 +214,10 @@
 #define SLJIT_MEMMOVE(dest, src, len) memmove(dest, src, len)
 #endif


+#ifndef SLJIT_MEMCPY
+#define SLJIT_MEMCPY(dest, src, len) memcpy(dest, src, len)
+#endif
+
#ifndef SLJIT_ZEROMEM
#define SLJIT_ZEROMEM(dest, len) memset(dest, 0, len)
#endif

Modified: code/trunk/sljit/sljitNativeX86_32.c
===================================================================
--- code/trunk/sljit/sljitNativeX86_32.c    2016-07-06 14:55:40 UTC (rev 1661)
+++ code/trunk/sljit/sljitNativeX86_32.c    2016-07-15 09:57:53 UTC (rev 1662)
@@ -34,7 +34,7 @@
     FAIL_IF(!inst);
     INC_SIZE(1 + sizeof(sljit_sw));
     *inst++ = opcode;
-    *(sljit_sw*)inst = imm;
+    sljit_unaligned_store_sw(inst, imm);
     return SLJIT_SUCCESS;
 }


@@ -57,7 +57,7 @@
     if (jump->flags & JUMP_LABEL)
         jump->flags |= PATCH_MW;
     else
-        *(sljit_sw*)code_ptr = jump->u.target - (jump->addr + 4);
+        sljit_unaligned_store_sw(code_ptr, jump->u.target - (jump->addr + 4));
     code_ptr += 4;


     return code_ptr;
@@ -151,12 +151,12 @@
         inst[1] = MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[SLJIT_SP];
         inst[2] = GROUP_F7;
         inst[3] = MOD_REG | (0 << 3) | reg_map[SLJIT_SP];
-        *(sljit_sw*)(inst + 4) = 0x4;
+        sljit_unaligned_store_sw(inst + 4, 0x4);
         inst[8] = JNE_i8;
         inst[9] = 6;
         inst[10] = GROUP_BINARY_81;
         inst[11] = MOD_REG | (5 << 3) | reg_map[SLJIT_SP];
-        *(sljit_sw*)(inst + 12) = 0x4;
+        sljit_unaligned_store_sw(inst + 12, 0x4);
         inst[16] = PUSH_r + reg_map[TMP_REG1];
     }
     else
@@ -406,7 +406,7 @@
                 if (immb <= 127 && immb >= -128)
                     *buf_ptr++ = immb; /* 8 bit displacement. */
                 else {
-                    *(sljit_sw*)buf_ptr = immb; /* 32 bit displacement. */
+                    sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
                     buf_ptr += sizeof(sljit_sw);
                 }
             }
@@ -418,7 +418,7 @@
     }
     else {
         *buf_ptr++ |= 0x05;
-        *(sljit_sw*)buf_ptr = immb; /* 32 bit displacement. */
+        sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
         buf_ptr += sizeof(sljit_sw);
     }


@@ -426,9 +426,9 @@
         if (flags & EX86_BYTE_ARG)
             *buf_ptr = imma;
         else if (flags & EX86_HALF_ARG)
-            *(short*)buf_ptr = imma;
+            sljit_unaligned_store_s16(buf_ptr, imma);
         else if (!(flags & EX86_SHIFT_INS))
-            *(sljit_sw*)buf_ptr = imma;
+            sljit_unaligned_store_sw(buf_ptr, imma);
     }


     return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
@@ -541,7 +541,7 @@


         INC_SIZE(5 + 1);
         *inst++ = PUSH_i32;
-        *(sljit_sw*)inst = srcw;
+        sljit_unaligned_store_sw(inst, srcw);
         inst += sizeof(sljit_sw);
     }



Modified: code/trunk/sljit/sljitNativeX86_64.c
===================================================================
--- code/trunk/sljit/sljitNativeX86_64.c    2016-07-06 14:55:40 UTC (rev 1661)
+++ code/trunk/sljit/sljitNativeX86_64.c    2016-07-15 09:57:53 UTC (rev 1662)
@@ -35,7 +35,7 @@
     INC_SIZE(2 + sizeof(sljit_sw));
     *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
     *inst++ = MOV_r_i32 + (reg_map[reg] & 0x7);
-    *(sljit_sw*)inst = imm;
+    sljit_unaligned_store_sw(inst, imm);
     return SLJIT_SUCCESS;
 }


@@ -55,7 +55,7 @@
     if (jump->flags & JUMP_LABEL)
         jump->flags |= PATCH_MD;
     else
-        *(sljit_sw*)code_ptr = jump->u.target;
+        sljit_unaligned_store_sw(code_ptr, jump->u.target);


     code_ptr += sizeof(sljit_sw);
     *code_ptr++ = REX_B;
@@ -71,13 +71,13 @@


     if (delta <= HALFWORD_MAX && delta >= HALFWORD_MIN) {
         *code_ptr++ = (type == 2) ? CALL_i32 : JMP_i32;
-        *(sljit_sw*)code_ptr = delta;
+        sljit_unaligned_store_sw(code_ptr, delta);
     }
     else {
         SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_second);
         *code_ptr++ = REX_W | REX_B;
         *code_ptr++ = MOV_r_i32 + 1;
-        *(sljit_sw*)code_ptr = addr;
+        sljit_unaligned_store_sw(code_ptr, addr);
         code_ptr += sizeof(sljit_sw);
         *code_ptr++ = REX_B;
         *code_ptr++ = GROUP_FF;
@@ -193,7 +193,7 @@
         *inst++ = REX_W;
         *inst++ = MOV_rm_i32;
         *inst++ = MOD_REG | reg_lmap[SLJIT_R0];
-        *(sljit_s32*)inst = local_size;
+        sljit_unaligned_store_s32(inst, local_size);
 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
             || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
         compiler->skip_checks = 1;
@@ -219,7 +219,7 @@
         *inst++ = REX_W;
         *inst++ = GROUP_BINARY_81;
         *inst++ = MOD_REG | SUB | 4;
-        *(sljit_s32*)inst = local_size;
+        sljit_unaligned_store_s32(inst, local_size);
         inst += sizeof(sljit_s32);
     }


@@ -230,7 +230,7 @@
         FAIL_IF(!inst);
         INC_SIZE(5);
         *inst++ = GROUP_0F;
-        *(sljit_s32*)inst = 0x20247429;
+        sljit_unaligned_store_s32(inst, 0x20247429);
     }
 #endif


@@ -271,7 +271,7 @@
         FAIL_IF(!inst);
         INC_SIZE(5);
         *inst++ = GROUP_0F;
-        *(sljit_s32*)inst = 0x20247428;
+        sljit_unaligned_store_s32(inst, 0x20247428);
     }
 #endif


@@ -292,7 +292,7 @@
         *inst++ = REX_W;
         *inst++ = GROUP_BINARY_81;
         *inst++ = MOD_REG | ADD | 4;
-        *(sljit_s32*)inst = compiler->local_size;
+        sljit_unaligned_store_s32(inst, compiler->local_size);
     }


     tmp = compiler->scratches;
@@ -339,7 +339,7 @@
     if (rex)
         *inst++ = rex;
     *inst++ = opcode;
-    *(sljit_s32*)inst = imm;
+    sljit_unaligned_store_s32(inst, imm);
     return SLJIT_SUCCESS;
 }


@@ -516,7 +516,7 @@
                 if (immb <= 127 && immb >= -128)
                     *buf_ptr++ = immb; /* 8 bit displacement. */
                 else {
-                    *(sljit_s32*)buf_ptr = immb; /* 32 bit displacement. */
+                    sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */
                     buf_ptr += sizeof(sljit_s32);
                 }
             }
@@ -533,7 +533,7 @@
     else {
         *buf_ptr++ |= 0x04;
         *buf_ptr++ = 0x25;
-        *(sljit_s32*)buf_ptr = immb; /* 32 bit displacement. */
+        sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */
         buf_ptr += sizeof(sljit_s32);
     }


@@ -541,9 +541,9 @@
         if (flags & EX86_BYTE_ARG)
             *buf_ptr = imma;
         else if (flags & EX86_HALF_ARG)
-            *(short*)buf_ptr = imma;
+            sljit_unaligned_store_s16(buf_ptr, imma);
         else if (!(flags & EX86_SHIFT_INS))
-            *(sljit_s32*)buf_ptr = imma;
+            sljit_unaligned_store_s32(buf_ptr, imma);
     }


     return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
@@ -676,7 +676,7 @@


         INC_SIZE(5 + 1);
         *inst++ = PUSH_i32;
-        *(sljit_s32*)inst = srcw;
+        sljit_unaligned_store_s32(inst, srcw);
         inst += sizeof(sljit_s32);
     }



Modified: code/trunk/sljit/sljitNativeX86_common.c
===================================================================
--- code/trunk/sljit/sljitNativeX86_common.c    2016-07-06 14:55:40 UTC (rev 1661)
+++ code/trunk/sljit/sljitNativeX86_common.c    2016-07-15 09:57:53 UTC (rev 1662)
@@ -279,6 +279,34 @@
 #include <intrin.h>
 #endif


+/******************************************************/
+/*    Unaligned-store functions                       */
+/******************************************************/
+
+static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
+{
+    SLJIT_MEMCPY(addr, &value, sizeof(value));
+}
+
+static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
+{
+    SLJIT_MEMCPY(addr, &value, sizeof(value));
+}
+
+static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
+{
+    SLJIT_MEMCPY(addr, &value, sizeof(value));
+}
+
+static SLJIT_INLINE void sljit_unaligned_store_uw(void *addr, sljit_uw value)
+{
+    SLJIT_MEMCPY(addr, &value, sizeof(value));
+}
+
+/******************************************************/
+/*    Utility functions                               */
+/******************************************************/
+
 static void get_cpu_features(void)
 {
     sljit_u32 features;
@@ -504,7 +532,7 @@
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
                     *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
                     buf_ptr++;
-                    *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw));
+                    sljit_unaligned_store_sw(code_ptr, *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw)));
                     code_ptr += sizeof(sljit_sw);
                     buf_ptr += sizeof(sljit_sw) - 1;
 #else
@@ -531,24 +559,24 @@
         } else if (jump->flags & PATCH_MW) {
             if (jump->flags & JUMP_LABEL) {
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-                *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw)));
+                sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw))));
 #else
                 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
-                *(sljit_s32*)jump->addr = (sljit_s32)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32)));
+                sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))));
 #endif
             }
             else {
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-                *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw)));
+                sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw))));
 #else
                 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
-                *(sljit_s32*)jump->addr = (sljit_s32)(jump->u.target - (jump->addr + sizeof(sljit_s32)));
+                sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump->addr + sizeof(sljit_s32))));
 #endif
             }
         }
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
         else if (jump->flags & PATCH_MD)
-            *(sljit_sw*)jump->addr = jump->u.label->addr;
+            sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr);
 #endif


         jump = jump->next;
@@ -1699,7 +1727,7 @@
             inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
             FAIL_IF(!inst);
             INC_SIZE(4);
-            *(sljit_sw*)inst = src1w;
+            sljit_unaligned_store_sw(inst, src1w);
         }
 #else
         else if (IS_HALFWORD(src1w)) {
@@ -1709,7 +1737,7 @@
             inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
             FAIL_IF(!inst);
             INC_SIZE(4);
-            *(sljit_s32*)inst = (sljit_s32)src1w;
+            sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
         }
         else {
             EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
@@ -1742,7 +1770,7 @@
             inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
             FAIL_IF(!inst);
             INC_SIZE(4);
-            *(sljit_sw*)inst = src2w;
+            sljit_unaligned_store_sw(inst, src2w);
         }
 #else
         else if (IS_HALFWORD(src2w)) {
@@ -1752,7 +1780,7 @@
             inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
             FAIL_IF(!inst);
             INC_SIZE(4);
-            *(sljit_s32*)inst = (sljit_s32)src2w;
+            sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
         }
         else {
             EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w);
@@ -2926,15 +2954,15 @@
 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
 {
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-    *(sljit_sw*)addr = new_addr - (addr + 4);
+    sljit_unaligned_store_sw((void*)addr, new_addr - (addr + 4));
 #else
-    *(sljit_uw*)addr = new_addr;
+    sljit_unaligned_store_uw((void*)addr, new_addr);
 #endif
 }


 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
 {
-    *(sljit_sw*)addr = new_constant;
+    sljit_unaligned_store_sw((void*)addr, new_constant);
 }


SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void)