[Pcre-svn] [644] code/trunk/src/sljit: JIT compiler update.

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [644] code/trunk/src/sljit: JIT compiler update.
Revision: 644
          http://www.exim.org/viewvc/pcre2?view=rev&revision=644
Author:   zherczeg
Date:     2017-01-02 13:01:42 +0000 (Mon, 02 Jan 2017)
Log Message:
-----------
JIT compiler update.


Modified Paths:
--------------
    code/trunk/src/sljit/sljitConfig.h
    code/trunk/src/sljit/sljitConfigInternal.h
    code/trunk/src/sljit/sljitExecAllocator.c
    code/trunk/src/sljit/sljitNativeARM_32.c
    code/trunk/src/sljit/sljitNativeARM_64.c
    code/trunk/src/sljit/sljitNativeARM_T2_32.c
    code/trunk/src/sljit/sljitNativeMIPS_common.c
    code/trunk/src/sljit/sljitNativePPC_common.c
    code/trunk/src/sljit/sljitNativeSPARC_common.c
    code/trunk/src/sljit/sljitNativeTILEGX_64.c
    code/trunk/src/sljit/sljitNativeX86_64.c
    code/trunk/src/sljit/sljitNativeX86_common.c
    code/trunk/src/sljit/sljitProtExecAllocator.c


Modified: code/trunk/src/sljit/sljitConfig.h
===================================================================
--- code/trunk/src/sljit/sljitConfig.h    2017-01-01 12:13:17 UTC (rev 643)
+++ code/trunk/src/sljit/sljitConfig.h    2017-01-02 13:01:42 UTC (rev 644)
@@ -90,7 +90,7 @@


 /* Executable code allocation:
    If SLJIT_EXECUTABLE_ALLOCATOR is not defined, the application should
-   define SLJIT_MALLOC_EXEC, SLJIT_FREE_EXEC, and SLJIT_ENABLE_EXEC. */
+   define SLJIT_MALLOC_EXEC, SLJIT_FREE_EXEC, and SLJIT_EXEC_OFFSET. */
 #ifndef SLJIT_EXECUTABLE_ALLOCATOR
 /* Enabled by default. */
 #define SLJIT_EXECUTABLE_ALLOCATOR 1


Modified: code/trunk/src/sljit/sljitConfigInternal.h
===================================================================
--- code/trunk/src/sljit/sljitConfigInternal.h    2017-01-01 12:13:17 UTC (rev 643)
+++ code/trunk/src/sljit/sljitConfigInternal.h    2017-01-02 13:01:42 UTC (rev 644)
@@ -547,10 +547,10 @@
 #define SLJIT_FREE_EXEC(ptr) sljit_free_exec(ptr)


#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR)
-SLJIT_API_FUNC_ATTRIBUTE void sljit_enable_exec(void* from, void *to);
-#define SLJIT_ENABLE_EXEC(from, to) sljit_enable_exec((from), (to))
+SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
+#define SLJIT_EXEC_OFFSET(ptr) sljit_exec_offset(ptr)
#else
-#define SLJIT_ENABLE_EXEC(from, to)
+#define SLJIT_EXEC_OFFSET(ptr) 0
#endif

#endif

Modified: code/trunk/src/sljit/sljitExecAllocator.c
===================================================================
--- code/trunk/src/sljit/sljitExecAllocator.c    2017-01-01 12:13:17 UTC (rev 643)
+++ code/trunk/src/sljit/sljitExecAllocator.c    2017-01-02 13:01:42 UTC (rev 644)
@@ -86,7 +86,7 @@
     return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
 }


-static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size)
+static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
 {
     SLJIT_UNUSED_ARG(size);
     VirtualFree(chunk, 0, MEM_RELEASE);
@@ -96,7 +96,7 @@


 static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
 {
-    void* retval;
+    void *retval;


 #ifdef MAP_ANON
     retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
@@ -111,7 +111,7 @@
     return (retval != MAP_FAILED) ? retval : NULL;
 }


-static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size)
+static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
 {
     munmap(chunk, size);
 }
@@ -180,8 +180,8 @@
     sljit_uw chunk_size;


     allocator_grab_lock();
-    if (size < sizeof(struct free_block))
-        size = sizeof(struct free_block);
+    if (size < (64 - sizeof(struct block_header)))
+        size = (64 - sizeof(struct block_header));
     size = ALIGN_SIZE(size);


     free_block = free_blocks;


Modified: code/trunk/src/sljit/sljitNativeARM_32.c
===================================================================
--- code/trunk/src/sljit/sljitNativeARM_32.c    2017-01-01 12:13:17 UTC (rev 643)
+++ code/trunk/src/sljit/sljitNativeARM_32.c    2017-01-02 13:01:42 UTC (rev 644)
@@ -793,7 +793,6 @@


     compiler->error = SLJIT_ERR_COMPILED;
     compiler->executable_size = (code_ptr - code) * sizeof(sljit_uw);
-    SLJIT_ENABLE_EXEC(code, code_ptr);
     SLJIT_CACHE_FLUSH(code, code_ptr);
     return code;
 }


Modified: code/trunk/src/sljit/sljitNativeARM_64.c
===================================================================
--- code/trunk/src/sljit/sljitNativeARM_64.c    2017-01-01 12:13:17 UTC (rev 643)
+++ code/trunk/src/sljit/sljitNativeARM_64.c    2017-01-02 13:01:42 UTC (rev 644)
@@ -309,7 +309,6 @@


     compiler->error = SLJIT_ERR_COMPILED;
     compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
-    SLJIT_ENABLE_EXEC(code, code_ptr);
     SLJIT_CACHE_FLUSH(code, code_ptr);
     return code;
 }


Modified: code/trunk/src/sljit/sljitNativeARM_T2_32.c
===================================================================
--- code/trunk/src/sljit/sljitNativeARM_T2_32.c    2017-01-01 12:13:17 UTC (rev 643)
+++ code/trunk/src/sljit/sljitNativeARM_T2_32.c    2017-01-02 13:01:42 UTC (rev 644)
@@ -415,7 +415,6 @@


     compiler->error = SLJIT_ERR_COMPILED;
     compiler->executable_size = (code_ptr - code) * sizeof(sljit_u16);
-    SLJIT_ENABLE_EXEC(code, code_ptr);
     SLJIT_CACHE_FLUSH(code, code_ptr);
     /* Set thumb mode flag. */
     return (void*)((sljit_uw)code | 0x1);


Modified: code/trunk/src/sljit/sljitNativeMIPS_common.c
===================================================================
--- code/trunk/src/sljit/sljitNativeMIPS_common.c    2017-01-01 12:13:17 UTC (rev 643)
+++ code/trunk/src/sljit/sljitNativeMIPS_common.c    2017-01-02 13:01:42 UTC (rev 644)
@@ -477,7 +477,6 @@


     compiler->error = SLJIT_ERR_COMPILED;
     compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
-    SLJIT_ENABLE_EXEC(code, code_ptr);
 #ifndef __GNUC__
     SLJIT_CACHE_FLUSH(code, code_ptr);
 #else


Modified: code/trunk/src/sljit/sljitNativePPC_common.c
===================================================================
--- code/trunk/src/sljit/sljitNativePPC_common.c    2017-01-01 12:13:17 UTC (rev 643)
+++ code/trunk/src/sljit/sljitNativePPC_common.c    2017-01-02 13:01:42 UTC (rev 644)
@@ -493,7 +493,6 @@


     compiler->error = SLJIT_ERR_COMPILED;
     compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
-    SLJIT_ENABLE_EXEC(code, code_ptr);
     SLJIT_CACHE_FLUSH(code, code_ptr);


#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)

Modified: code/trunk/src/sljit/sljitNativeSPARC_common.c
===================================================================
--- code/trunk/src/sljit/sljitNativeSPARC_common.c    2017-01-01 12:13:17 UTC (rev 643)
+++ code/trunk/src/sljit/sljitNativeSPARC_common.c    2017-01-02 13:01:42 UTC (rev 644)
@@ -379,7 +379,6 @@


     compiler->error = SLJIT_ERR_COMPILED;
     compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
-    SLJIT_ENABLE_EXEC(code, code_ptr);
     SLJIT_CACHE_FLUSH(code, code_ptr);
     return code;
 }


Modified: code/trunk/src/sljit/sljitNativeTILEGX_64.c
===================================================================
--- code/trunk/src/sljit/sljitNativeTILEGX_64.c    2017-01-01 12:13:17 UTC (rev 643)
+++ code/trunk/src/sljit/sljitNativeTILEGX_64.c    2017-01-02 13:01:42 UTC (rev 644)
@@ -1113,7 +1113,6 @@


     compiler->error = SLJIT_ERR_COMPILED;
     compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
-    SLJIT_ENABLE_EXEC(code, code_ptr);
     SLJIT_CACHE_FLUSH(code, code_ptr);
     return code;
 }


Modified: code/trunk/src/sljit/sljitNativeX86_64.c
===================================================================
--- code/trunk/src/sljit/sljitNativeX86_64.c    2017-01-01 12:13:17 UTC (rev 643)
+++ code/trunk/src/sljit/sljitNativeX86_64.c    2017-01-02 13:01:42 UTC (rev 644)
@@ -65,28 +65,6 @@
     return code_ptr;
 }


-static sljit_u8* generate_fixed_jump(sljit_u8 *code_ptr, sljit_sw addr, sljit_s32 type)
-{
-    sljit_sw delta = addr - ((sljit_sw)code_ptr + 1 + sizeof(sljit_s32));
-
-    if (delta <= HALFWORD_MAX && delta >= HALFWORD_MIN) {
-        *code_ptr++ = (type == 2) ? CALL_i32 : JMP_i32;
-        sljit_unaligned_store_sw(code_ptr, delta);
-    }
-    else {
-        SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_second);
-        *code_ptr++ = REX_W | REX_B;
-        *code_ptr++ = MOV_r_i32 + 1;
-        sljit_unaligned_store_sw(code_ptr, addr);
-        code_ptr += sizeof(sljit_sw);
-        *code_ptr++ = REX_B;
-        *code_ptr++ = GROUP_FF;
-        *code_ptr++ = (type == 2) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1);
-    }
-
-    return code_ptr;
-}
-
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
     sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
     sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)


Modified: code/trunk/src/sljit/sljitNativeX86_common.c
===================================================================
--- code/trunk/src/sljit/sljitNativeX86_common.c    2017-01-01 12:13:17 UTC (rev 643)
+++ code/trunk/src/sljit/sljitNativeX86_common.c    2017-01-02 13:01:42 UTC (rev 644)
@@ -411,11 +411,7 @@


static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type);

-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-static sljit_u8* generate_fixed_jump(sljit_u8 *code_ptr, sljit_sw addr, sljit_s32 type);
-#endif
-
-static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type)
+static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type, sljit_sw executable_offset)
 {
     sljit_s32 short_jump;
     sljit_uw label_addr;
@@ -423,7 +419,8 @@
     if (jump->flags & JUMP_LABEL)
         label_addr = (sljit_uw)(code + jump->u.label->size);
     else
-        label_addr = jump->u.target;
+        label_addr = jump->u.target - executable_offset;
+
     short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;


 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@@ -476,6 +473,8 @@
     sljit_u8 *buf_ptr;
     sljit_u8 *buf_end;
     sljit_u8 len;
+    sljit_sw executable_offset;
+    sljit_sw jump_addr;


     struct sljit_label *label;
     struct sljit_jump *jump;
@@ -494,6 +493,8 @@
     label = compiler->labels;
     jump = compiler->jumps;
     const_ = compiler->consts;
+    executable_offset = SLJIT_EXEC_OFFSET(code);
+
     do {
         buf_ptr = buf->memory;
         buf_end = buf_ptr + buf->used_size;
@@ -506,35 +507,24 @@
                 buf_ptr += len;
             }
             else {
-                if (*buf_ptr >= 4) {
+                if (*buf_ptr >= 2) {
                     jump->addr = (sljit_uw)code_ptr;
                     if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
-                        code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
+                        code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 2, executable_offset);
                     else
-                        code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
+                        code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2);
                     jump = jump->next;
                 }
                 else if (*buf_ptr == 0) {
-                    label->addr = (sljit_uw)code_ptr;
+                    label->addr = ((sljit_uw)code_ptr) + executable_offset;
                     label->size = code_ptr - code;
                     label = label->next;
                 }
-                else if (*buf_ptr == 1) {
+                else { /* *buf_ptr is 1 */
+                    SLJIT_ASSERT(sljit_is_dyn_code_modification_enabled());
                     const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
                     const_ = const_->next;
                 }
-                else {
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-                    *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
-                    buf_ptr++;
-                    sljit_unaligned_store_sw(code_ptr, *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw)));
-                    code_ptr += sizeof(sljit_sw);
-                    buf_ptr += sizeof(sljit_sw) - 1;
-#else
-                    code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
-                    buf_ptr += sizeof(sljit_sw);
-#endif
-                }
                 buf_ptr++;
             }
         } while (buf_ptr < buf_end);
@@ -548,24 +538,26 @@


     jump = compiler->jumps;
     while (jump) {
+        jump_addr = jump->addr + executable_offset;
+
         if (jump->flags & PATCH_MB) {
-            SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8))) <= 127);
-            *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8)));
+            SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127);
+            *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8)));
         } else if (jump->flags & PATCH_MW) {
             if (jump->flags & JUMP_LABEL) {
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-                sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw))));
+                sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw))));
 #else
-                SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
-                sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))));
+                SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
+                sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))));
 #endif
             }
             else {
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-                sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw))));
+                sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw))));
 #else
-                SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
-                sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump->addr + sizeof(sljit_s32))));
+                SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
+                sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32))));
 #endif
             }
         }
@@ -577,12 +569,11 @@
         jump = jump->next;
     }


-    /* Maybe we waste some space because of short jumps. */
+    /* Some space may be wasted because of short jumps. */
     SLJIT_ASSERT(code_ptr <= code + compiler->size);
     compiler->error = SLJIT_ERR_COMPILED;
     compiler->executable_size = code_ptr - code;
-    SLJIT_ENABLE_EXEC(code, code_ptr);
-    return (void*)code;
+    return (void*)(code + executable_offset);
 }


 /* --------------------------------------------------------------------- */
@@ -2609,7 +2600,7 @@
     PTR_FAIL_IF_NULL(inst);


     *inst++ = 0;
-    *inst++ = type + 4;
+    *inst++ = type + 2;
     return jump;
 }


@@ -2667,7 +2658,7 @@
         FAIL_IF_NULL(inst);


         *inst++ = 0;
-        *inst++ = type + 4;
+        *inst++ = type + 2;
     }
     else {
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)


Modified: code/trunk/src/sljit/sljitProtExecAllocator.c
===================================================================
--- code/trunk/src/sljit/sljitProtExecAllocator.c    2017-01-01 12:13:17 UTC (rev 643)
+++ code/trunk/src/sljit/sljitProtExecAllocator.c    2017-01-02 13:01:42 UTC (rev 644)
@@ -25,89 +25,327 @@
  */


/*
- This file contains a simple executable memory allocator where the
- allocated regions are not writable and executable in the same time.
+ This file contains a simple executable memory allocator

-   This allocator usually uses more memory than sljitExecAllocator.
+   It is assumed, that executable code blocks are usually medium (or sometimes
+   large) memory blocks, and the allocator is not too frequently called (less
+   optimized than other allocators). Thus, using it as a generic allocator is
+   not suggested.
+
+   How does it work:
+     Memory is allocated in continuous memory areas called chunks by alloc_chunk()
+     Chunk format:
+     [ block ][ block ] ... [ block ][ block terminator ]
+
+   All blocks and the block terminator is started with block_header. The block
+   header contains the size of the previous and the next block. These sizes
+   can also contain special values.
+     Block size:
+       0 - The block is a free_block, with a different size member.
+       1 - The block is a block terminator.
+       n - The block is used at the moment, and the value contains its size.
+     Previous block size:
+       0 - This is the first block of the memory chunk.
+       n - The size of the previous block.
+
+   Using these size values we can go forward or backward on the block chain.
+   The unused blocks are stored in a chain list pointed by free_blocks. This
+   list is useful if we need to find a suitable memory area when the allocator
+   is called.
+
+   When a block is freed, the new free block is connected to its adjacent free
+   blocks if possible.
+
+     [ free block ][ used block ][ free block ]
+   and "used block" is freed, the three blocks are connected together:
+     [           one big free block           ]
 */


-#ifdef _WIN32
+/* --------------------------------------------------------------------- */
+/*  System (OS) functions                                                */
+/* --------------------------------------------------------------------- */


-static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
-{
-    return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
-}
+/* 64 KByte. */
+#define CHUNK_SIZE    0x10000


-static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size)
-{
-    SLJIT_UNUSED_ARG(size);
-    VirtualFree(chunk, 0, MEM_RELEASE);
-}
+struct chunk_header {
+    void *executable;
+    int fd;
+};


-static SLJIT_INLINE void enable_exec_permission(void* chunk, sljit_uw size)
+/*
+   alloc_chunk / free_chunk :
+     * allocate executable system memory chunks
+     * the size is always divisible by CHUNK_SIZE
+   allocator_grab_lock / allocator_release_lock :
+     * make the allocator thread safe
+     * can be empty if the OS (or the application) does not support threading
+     * only the allocator requires this lock, sljit is fully thread safe
+       as it only uses local variables
+*/
+
+#ifndef _XOPEN_SOURCE
+#define _XOPEN_SOURCE 500 /* for mkstemp() and truncate() */
+#endif
+
+static SLJIT_INLINE struct chunk_header* alloc_chunk(sljit_uw size)
 {
-    sljit_uw *uw_ptr = (sljit_uw *)ptr;
+    struct chunk_header *retval;
+    char template[] = "/tmp/XXXXXX";
+    int fd;


-    VirtualProtect(chunk, size, PAGE_EXECUTE_READ, NULL);
-}
+    fd = mkstemp(template);
+    if (fd == -1) {
+        return NULL;
+    }


-#else
+    if (unlink(template)) {
+        close(fd);
+        return NULL;
+    }


-static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
-{
-    void* retval;
+    if (ftruncate(fd, size)) {
+        close(fd);
+        return NULL;
+    }


-#ifdef MAP_ANON
-    retval = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
-#else
-    if (dev_zero < 0) {
-        if (open_dev_zero())
-            return NULL;
+    retval = (struct chunk_header *)mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+    if (retval == MAP_FAILED) {
+        close(fd);
+        return NULL;
     }
-    retval = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero, 0);
-#endif


-    return (retval != MAP_FAILED) ? retval : NULL;
-}
+    retval->executable = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);


-static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size)
-{
-    munmap(chunk, size);
+    if (retval->executable == MAP_FAILED) {
+        munmap(retval, size);
+        close(fd);
+        return NULL;
+    }
+
+    retval->fd = fd;
+    return retval;
 }


-static SLJIT_INLINE void enable_exec_permission(void* chunk, sljit_uw size)
+static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
 {
-    sljit_uw *uw_ptr = (sljit_uw *)chunk;
+    struct chunk_header *header = ((struct chunk_header *)chunk) - 1;


-    mprotect(uw_ptr - 1, size + sizeof(sljit_uw), PROT_READ | PROT_EXEC);
+    int fd = header->fd;
+    munmap(header->executable, size);
+    munmap(header, size);
+    close(fd);
 }


-#endif
-
 /* --------------------------------------------------------------------- */
 /*  Common functions                                                     */
 /* --------------------------------------------------------------------- */


+#define CHUNK_MASK    (~(CHUNK_SIZE - 1))
+
+struct block_header {
+    sljit_uw size;
+    sljit_uw prev_size;
+    sljit_sw executable_offset;
+};
+
+struct free_block {
+    struct block_header header;
+    struct free_block *next;
+    struct free_block *prev;
+    sljit_uw size;
+};
+
+#define AS_BLOCK_HEADER(base, offset) \
+    ((struct block_header*)(((sljit_u8*)base) + offset))
+#define AS_FREE_BLOCK(base, offset) \
+    ((struct free_block*)(((sljit_u8*)base) + offset))
+#define MEM_START(base)        ((void*)((base) + 1))
+#define ALIGN_SIZE(size)    (((size) + sizeof(struct block_header) + 7) & ~7)
+
+static struct free_block* free_blocks;
+static sljit_uw allocated_size;
+static sljit_uw total_size;
+
+static SLJIT_INLINE void sljit_insert_free_block(struct free_block *free_block, sljit_uw size)
+{
+    free_block->header.size = 0;
+    free_block->size = size;
+
+    free_block->next = free_blocks;
+    free_block->prev = NULL;
+    if (free_blocks)
+        free_blocks->prev = free_block;
+    free_blocks = free_block;
+}
+
+static SLJIT_INLINE void sljit_remove_free_block(struct free_block *free_block)
+{
+    if (free_block->next)
+        free_block->next->prev = free_block->prev;
+
+    if (free_block->prev)
+        free_block->prev->next = free_block->next;
+    else {
+        SLJIT_ASSERT(free_blocks == free_block);
+        free_blocks = free_block->next;
+    }
+}
+
 SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size)
 {
-    sljit_uw *ptr = (sljit_uw *)alloc_chunk(size + sizeof (sljit_uw));
+    struct chunk_header *chunk_header;
+    struct block_header *header;
+    struct block_header *next_header;
+    struct free_block *free_block;
+    sljit_uw chunk_size;
+    sljit_sw executable_offset;


-    *ptr = size;
-    return (void*)(ptr + 1);
+    allocator_grab_lock();
+    if (size < (64 - sizeof(struct block_header)))
+        size = (64 - sizeof(struct block_header));
+    size = ALIGN_SIZE(size);
+
+    free_block = free_blocks;
+    while (free_block) {
+        if (free_block->size >= size) {
+            chunk_size = free_block->size;
+            if (chunk_size > size + 64) {
+                /* We just cut a block from the end of the free block. */
+                chunk_size -= size;
+                free_block->size = chunk_size;
+                header = AS_BLOCK_HEADER(free_block, chunk_size);
+                header->prev_size = chunk_size;
+                header->executable_offset = free_block->header.executable_offset;
+                AS_BLOCK_HEADER(header, size)->prev_size = size;
+            }
+            else {
+                sljit_remove_free_block(free_block);
+                header = (struct block_header*)free_block;
+                size = chunk_size;
+            }
+            allocated_size += size;
+            header->size = size;
+            allocator_release_lock();
+            return MEM_START(header);
+        }
+        free_block = free_block->next;
+    }
+
+    chunk_size = sizeof(struct chunk_header) + sizeof(struct block_header);
+    chunk_size = (chunk_size + size + CHUNK_SIZE - 1) & CHUNK_MASK;
+
+    chunk_header = alloc_chunk(chunk_size);
+    if (!chunk_header) {
+        allocator_release_lock();
+        return NULL;
+    }
+
+    executable_offset = (sljit_sw)((sljit_u8*)chunk_header->executable - (sljit_u8*)chunk_header);
+
+    chunk_size -= sizeof(struct chunk_header) + sizeof(struct block_header);
+    total_size += chunk_size;
+
+    header = (struct block_header *)(chunk_header + 1);
+
+    header->prev_size = 0;
+    header->executable_offset = executable_offset;
+    if (chunk_size > size + 64) {
+        /* Cut the allocated space into a free and a used block. */
+        allocated_size += size;
+        header->size = size;
+        chunk_size -= size;
+
+        free_block = AS_FREE_BLOCK(header, size);
+        free_block->header.prev_size = size;
+        free_block->header.executable_offset = executable_offset;
+        sljit_insert_free_block(free_block, chunk_size);
+        next_header = AS_BLOCK_HEADER(free_block, chunk_size);
+    }
+    else {
+        /* All space belongs to this allocation. */
+        allocated_size += chunk_size;
+        header->size = chunk_size;
+        next_header = AS_BLOCK_HEADER(header, chunk_size);
+    }
+    next_header->size = 1;
+    next_header->prev_size = chunk_size;
+    next_header->executable_offset = executable_offset;
+    allocator_release_lock();
+    return MEM_START(header);
 }


 SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr)
 {
-    sljit_uw *uw_ptr = (sljit_uw *)ptr;
+    struct block_header *header;
+    struct free_block* free_block;


-    free_chunk(uw_ptr - 1, uw_ptr[-1]);
+    allocator_grab_lock();
+    header = AS_BLOCK_HEADER(ptr, -(sljit_sw)sizeof(struct block_header));
+    header = AS_BLOCK_HEADER(header, -header->executable_offset);
+    allocated_size -= header->size;
+
+    /* Connecting free blocks together if possible. */
+
+    /* If header->prev_size == 0, free_block will equal to header.
+       In this case, free_block->header.size will be > 0. */
+    free_block = AS_FREE_BLOCK(header, -(sljit_sw)header->prev_size);
+    if (SLJIT_UNLIKELY(!free_block->header.size)) {
+        free_block->size += header->size;
+        header = AS_BLOCK_HEADER(free_block, free_block->size);
+        header->prev_size = free_block->size;
+    }
+    else {
+        free_block = (struct free_block*)header;
+        sljit_insert_free_block(free_block, header->size);
+    }
+
+    header = AS_BLOCK_HEADER(free_block, free_block->size);
+    if (SLJIT_UNLIKELY(!header->size)) {
+        free_block->size += ((struct free_block*)header)->size;
+        sljit_remove_free_block((struct free_block*)header);
+        header = AS_BLOCK_HEADER(free_block, free_block->size);
+        header->prev_size = free_block->size;
+    }
+
+    /* The whole chunk is free. */
+    if (SLJIT_UNLIKELY(!free_block->header.prev_size && header->size == 1)) {
+        /* If this block is freed, we still have (allocated_size / 2) free space. */
+        if (total_size - free_block->size > (allocated_size * 3 / 2)) {
+            total_size -= free_block->size;
+            sljit_remove_free_block(free_block);
+            free_chunk(free_block, free_block->size + sizeof(struct block_header));
+        }
+    }
+
+    allocator_release_lock();
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_enable_exec(void* from, void *to)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void)
 {
-    enable_exec_permission(from, ((sljit_u8 *)to) - ((sljit_u8 *)from));
+    struct free_block* free_block;
+    struct free_block* next_free_block;
+
+    allocator_grab_lock();
+
+    free_block = free_blocks;
+    while (free_block) {
+        next_free_block = free_block->next;
+        if (!free_block->header.prev_size && 
+                AS_BLOCK_HEADER(free_block, free_block->size)->size == 1) {
+            total_size -= free_block->size;
+            sljit_remove_free_block(free_block);
+            free_chunk(free_block, free_block->size + sizeof(struct block_header));
+        }
+        free_block = next_free_block;
+    }
+
+    SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks));
+    allocator_release_lock();
 }


-SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void)
+SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr)
 {
+    return ((struct block_header *)(ptr))[-1].executable_offset;
 }