[Pcre-svn] [1187] code/trunk: JIT native interface.

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [1187] code/trunk: JIT native interface.
Revision: 1187
          http://vcs.pcre.org/viewvc?view=rev&revision=1187
Author:   zherczeg
Date:     2012-10-29 11:30:45 +0000 (Mon, 29 Oct 2012)


Log Message:
-----------
JIT native interface.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre.h.in
    code/trunk/pcre_exec.c
    code/trunk/pcre_internal.h
    code/trunk/pcre_jit_compile.c
    code/trunk/pcre_jit_test.c
    code/trunk/pcretest.c


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2012-10-28 17:57:32 UTC (rev 1186)
+++ code/trunk/ChangeLog    2012-10-29 11:30:45 UTC (rev 1187)
@@ -132,7 +132,13 @@
 27. Added a definition for CHAR_NULL (helpful for the z/OS port), and use it in 
     pcre_compile.c when checking for a zero character.


+28. Introducing a native interface for JIT. Through this interface, the compiled
+    machine code can be directly executed. The purpose of this interface is to
+    provide fast pattern matching, so several sanity checks are not performed.
+    However, feature tests are still performed. The new interface provides
+    1.4x speedup compared to the old one.


+
Version 8.31 06-July-2012
-------------------------


Modified: code/trunk/pcre.h.in
===================================================================
--- code/trunk/pcre.h.in    2012-10-28 17:57:32 UTC (rev 1186)
+++ code/trunk/pcre.h.in    2012-10-29 11:30:45 UTC (rev 1187)
@@ -183,6 +183,7 @@
 #define PCRE_ERROR_BADMODE         (-28)
 #define PCRE_ERROR_BADENDIANNESS   (-29)
 #define PCRE_ERROR_DFA_BADRESTART  (-30)
+#define PCRE_ERROR_JIT_BADOPTION   (-31)


/* Specific error codes for UTF-8 validity checks */

@@ -539,6 +540,15 @@
                    PCRE_SPTR16, int, int, int, int *, int);
 PCRE_EXP_DECL int  pcre32_exec(const pcre32 *, const pcre32_extra *,
                    PCRE_SPTR32, int, int, int, int *, int);
+PCRE_EXP_DECL int  pcre_jit_exec(const pcre *, const pcre_extra *,
+                   PCRE_SPTR, int, int, int, int *, int,
+                   pcre_jit_stack *);
+PCRE_EXP_DECL int  pcre16_jit_exec(const pcre16 *, const pcre16_extra *,
+                   PCRE_SPTR16, int, int, int, int *, int,
+                   pcre16_jit_stack *);
+PCRE_EXP_DECL int  pcre32_jit_exec(const pcre32 *, const pcre32_extra *,
+                   PCRE_SPTR32, int, int, int, int *, int,
+                   pcre32_jit_stack *);
 PCRE_EXP_DECL void pcre_free_substring(const char *);
 PCRE_EXP_DECL void pcre16_free_substring(PCRE_SPTR16);
 PCRE_EXP_DECL void pcre32_free_substring(PCRE_SPTR32);


Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c    2012-10-28 17:57:32 UTC (rev 1186)
+++ code/trunk/pcre_exec.c    2012-10-29 11:30:45 UTC (rev 1187)
@@ -6361,17 +6361,15 @@
     && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
                              PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
     && extra_data->executable_jit != NULL
-    && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
-                    PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART |
-                    PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0)
+    && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
   {
-  rc = PRIV(jit_exec)(re, extra_data, (const pcre_uchar *)subject, length,
+  rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
        start_offset, options, offsets, offsetcount);


/* PCRE_ERROR_NULL means that the selected normal or partial matching
mode is not compiled. In this case we simply fallback to interpreter. */

- if (rc != PCRE_ERROR_NULL) return rc;
+ if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
}
#endif


Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h    2012-10-28 17:57:32 UTC (rev 1186)
+++ code/trunk/pcre_internal.h    2012-10-29 11:30:45 UTC (rev 1187)
@@ -1183,6 +1183,10 @@
    (PCRE_STUDY_JIT_COMPILE|PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE| \
     PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE|PCRE_STUDY_EXTRA_NEEDED)


+#define PUBLIC_JIT_EXEC_OPTIONS \
+   (PCRE_NO_UTF8_CHECK|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|\
+    PCRE_NOTEMPTY_ATSTART|PCRE_PARTIAL_SOFT|PCRE_PARTIAL_HARD)
+
 /* Magic number to provide a small check against being handed junk. */


 #define MAGIC_NUMBER  0x50435245UL   /* 'PCRE' */
@@ -2703,7 +2707,7 @@
 #ifdef SUPPORT_JIT
 extern void              PRIV(jit_compile)(const REAL_PCRE *,
                            PUBL(extra) *, int);
-extern int               PRIV(jit_exec)(const REAL_PCRE *, const PUBL(extra) *,
+extern int               PRIV(jit_exec)(const PUBL(extra) *,
                            const pcre_uchar *, int, int, int, int *, int);
 extern void              PRIV(jit_free)(void *);
 extern int               PRIV(jit_get_size)(void *);


Modified: code/trunk/pcre_jit_compile.c
===================================================================
--- code/trunk/pcre_jit_compile.c    2012-10-28 17:57:32 UTC (rev 1186)
+++ code/trunk/pcre_jit_compile.c    2012-10-29 11:30:45 UTC (rev 1187)
@@ -170,6 +170,7 @@
   void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
   PUBL(jit_callback) callback;
   void *userdata;
+  pcre_uint32 top_bracket;
   sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
 } executable_functions;


@@ -8242,6 +8243,7 @@
     return;
     }
   memset(functions, 0, sizeof(executable_functions));
+  functions->top_bracket = (re->top_bracket + 1) * 2;
   extra->executable_jit = functions;
   extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
   }
@@ -8269,7 +8271,7 @@
 }


int
-PRIV(jit_exec)(const REAL_PCRE *re, const PUBL(extra) *extra_data, const pcre_uchar *subject,
+PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject,
int length, int start_offset, int options, int *offsets, int offsetcount)
{
executable_functions *functions = (executable_functions *)extra_data->executable_jit;
@@ -8288,10 +8290,9 @@
mode = JIT_PARTIAL_SOFT_COMPILE;

if (functions->executable_funcs[mode] == NULL)
- return PCRE_ERROR_NULL;
+ return PCRE_ERROR_JIT_BADOPTION;

/* Sanity checks should be handled by pcre_exec. */
-arguments.stack = NULL;
arguments.str = subject + start_offset;
arguments.begin = subject;
arguments.end = subject + length;
@@ -8312,7 +8313,7 @@

if (offsetcount != 2)
offsetcount = ((offsetcount - (offsetcount % 3)) * 2) / 3;
-maxoffsetcount = (re->top_bracket + 1) * 2;
+maxoffsetcount = functions->top_bracket;
if (offsetcount > maxoffsetcount)
offsetcount = maxoffsetcount;
arguments.offsetcount = offsetcount;
@@ -8338,6 +8339,85 @@
return retval;
}

+#if defined COMPILE_PCRE8
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data,
+ PCRE_SPTR subject, int length, int start_offset, int options,
+ int *offsets, int offsetcount, pcre_jit_stack *stack)
+#elif defined COMPILE_PCRE16
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
+ PCRE_SPTR16 subject, int length, int start_offset, int options,
+ int *offsets, int offsetcount, pcre16_jit_stack *stack)
+#elif defined COMPILE_PCRE32
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
+ PCRE_SPTR32 subject, int length, int start_offset, int options,
+ int *offsets, int offsetcount, pcre32_jit_stack *stack)
+#endif
+{
+pcre_uchar *subject_ptr = (pcre_uchar *)subject;
+executable_functions *functions = (executable_functions *)extra_data->executable_jit;
+union {
+ void* executable_func;
+ jit_function call_executable_func;
+} convert_executable_func;
+jit_arguments arguments;
+int maxoffsetcount;
+int retval;
+int mode = JIT_COMPILE;
+
+SLJIT_UNUSED_ARG(argument_re);
+
+/* Plausibility checks */
+if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION;
+
+if ((options & PCRE_PARTIAL_HARD) != 0)
+ mode = JIT_PARTIAL_HARD_COMPILE;
+else if ((options & PCRE_PARTIAL_SOFT) != 0)
+ mode = JIT_PARTIAL_SOFT_COMPILE;
+
+if (functions->executable_funcs[mode] == NULL)
+ return PCRE_ERROR_JIT_BADOPTION;
+
+/* Sanity checks should be handled by pcre_exec. */
+arguments.stack = (struct sljit_stack *)stack;
+arguments.str = subject_ptr + start_offset;
+arguments.begin = subject_ptr;
+arguments.end = subject_ptr + length;
+arguments.mark_ptr = NULL;
+/* JIT decreases this value less frequently than the interpreter. */
+arguments.calllimit = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : extra_data->match_limit;
+arguments.notbol = (options & PCRE_NOTBOL) != 0;
+arguments.noteol = (options & PCRE_NOTEOL) != 0;
+arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
+arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
+arguments.offsets = offsets;
+
+/* pcre_exec() rounds offsetcount to a multiple of 3, and then uses only 2/3 of
+the output vector for storing captured strings, with the remainder used as
+workspace. We don't need the workspace here. For compatibility, we limit the
+number of captured strings in the same way as pcre_exec(), so that the user
+gets the same result with and without JIT. */
+
+if (offsetcount != 2)
+ offsetcount = ((offsetcount - (offsetcount % 3)) * 2) / 3;
+maxoffsetcount = functions->top_bracket;
+if (offsetcount > maxoffsetcount)
+ offsetcount = maxoffsetcount;
+arguments.offsetcount = offsetcount;
+
+convert_executable_func.executable_func = functions->executable_funcs[mode];
+retval = convert_executable_func.call_executable_func(&arguments);
+
+if (retval * 2 > offsetcount)
+ retval = 0;
+if ((extra_data->flags & PCRE_EXTRA_MARK) != 0)
+ *(extra_data->mark) = arguments.mark_ptr;
+
+return retval;
+}
+
void
PRIV(jit_free)(void *executable_funcs)
{

Modified: code/trunk/pcre_jit_test.c
===================================================================
--- code/trunk/pcre_jit_test.c    2012-10-28 17:57:32 UTC (rev 1186)
+++ code/trunk/pcre_jit_test.c    2012-10-29 11:30:45 UTC (rev 1187)
@@ -794,59 +794,71 @@
 #endif


 #ifdef SUPPORT_PCRE8
+static pcre_jit_stack *stack8;
+
+static pcre_jit_stack *getstack8()
+{
+    if (!stack8)
+        stack8 = pcre_jit_stack_alloc(1, 1024 * 1024);
+    return stack8;
+}
+
 static void setstack8(pcre_extra *extra)
 {
-    static pcre_jit_stack *stack;
-
     if (!extra) {
-        if (stack)
-            pcre_jit_stack_free(stack);
-        stack = NULL;
+        if (stack8)
+            pcre_jit_stack_free(stack8);
+        stack8 = NULL;
         return;
     }


-    if (!stack)
-        stack = pcre_jit_stack_alloc(1, 1024 * 1024);
-    /* Extra can be NULL. */
-    pcre_assign_jit_stack(extra, callback8, stack);
+    pcre_assign_jit_stack(extra, callback8, getstack8());
 }
 #endif /* SUPPORT_PCRE8 */


 #ifdef SUPPORT_PCRE16
+static pcre16_jit_stack *stack16;
+
+static pcre16_jit_stack *getstack16()
+{
+    if (!stack16)
+        stack16 = pcre16_jit_stack_alloc(1, 1024 * 1024);
+    return stack16;
+}
+
 static void setstack16(pcre16_extra *extra)
 {
-    static pcre16_jit_stack *stack;
-
     if (!extra) {
-        if (stack)
-            pcre16_jit_stack_free(stack);
-        stack = NULL;
+        if (stack16)
+            pcre16_jit_stack_free(stack16);
+        stack16 = NULL;
         return;
     }


-    if (!stack)
-        stack = pcre16_jit_stack_alloc(1, 1024 * 1024);
-    /* Extra can be NULL. */
-    pcre16_assign_jit_stack(extra, callback16, stack);
+    pcre16_assign_jit_stack(extra, callback16, getstack16());
 }
 #endif /* SUPPORT_PCRE8 */


 #ifdef SUPPORT_PCRE32
+static pcre32_jit_stack *stack32;
+
+static pcre32_jit_stack *getstack32()
+{
+    if (!stack32)
+        stack32 = pcre32_jit_stack_alloc(1, 1024 * 1024);
+    return stack32;
+}
+
 static void setstack32(pcre32_extra *extra)
 {
-    static pcre32_jit_stack *stack;
-
     if (!extra) {
-        if (stack)
-            pcre32_jit_stack_free(stack);
-        stack = NULL;
+        if (stack32)
+            pcre32_jit_stack_free(stack32);
+        stack32 = NULL;
         return;
     }


-    if (!stack)
-        stack = pcre32_jit_stack_alloc(1, 1024 * 1024);
-    /* Extra can be NULL. */
-    pcre32_assign_jit_stack(extra, callback32, stack);
+    pcre32_assign_jit_stack(extra, callback32, getstack32());
 }
 #endif /* SUPPORT_PCRE8 */


@@ -1207,10 +1219,15 @@
         if (re8) {
             mark8_1 = NULL;
             mark8_2 = NULL;
-            setstack8(extra8);
             extra8->mark = &mark8_1;
-            return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
-                current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
+
+            if ((counter & 0x1) != 0) {
+                setstack8(extra8);
+                return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
+                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
+            } else
+                return_value8[0] = pcre_jit_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
+                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32, getstack8());
             memset(&dummy_extra8, 0, sizeof(pcre_extra));
             dummy_extra8.flags = PCRE_EXTRA_MARK;
             dummy_extra8.mark = &mark8_2;
@@ -1229,14 +1246,18 @@
         if (re16) {
             mark16_1 = NULL;
             mark16_2 = NULL;
-            setstack16(extra16);
             if ((current->flags & PCRE_UTF16) || (current->start_offset & F_FORCECONV))
                 length16 = convert_utf8_to_utf16(current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
             else
                 length16 = copy_char8_to_char16(current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
             extra16->mark = &mark16_1;
-            return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
-                current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
+            if ((counter & 0x1) != 0) {
+                setstack16(extra16);
+                return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
+                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
+            } else
+                return_value16[0] = pcre16_jit_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
+                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32, getstack16());
             memset(&dummy_extra16, 0, sizeof(pcre16_extra));
             dummy_extra16.flags = PCRE_EXTRA_MARK;
             dummy_extra16.mark = &mark16_2;
@@ -1255,14 +1276,18 @@
         if (re32) {
             mark32_1 = NULL;
             mark32_2 = NULL;
-            setstack32(extra32);
             if ((current->flags & PCRE_UTF32) || (current->start_offset & F_FORCECONV))
                 length32 = convert_utf8_to_utf32(current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
             else
                 length32 = copy_char8_to_char32(current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
             extra32->mark = &mark32_1;
-            return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
-                current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32);
+            if ((counter & 0x1) != 0) {
+                setstack32(extra32);
+                return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
+                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32);
+            } else
+                return_value32[0] = pcre32_jit_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
+                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32, getstack32());
             memset(&dummy_extra32, 0, sizeof(pcre32_extra));
             dummy_extra32.flags = PCRE_EXTRA_MARK;
             dummy_extra32.mark = &mark32_2;


Modified: code/trunk/pcretest.c
===================================================================
--- code/trunk/pcretest.c    2012-10-28 17:57:32 UTC (rev 1186)
+++ code/trunk/pcretest.c    2012-10-29 11:30:45 UTC (rev 1187)
@@ -4415,7 +4415,7 @@
       dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
       if (dbuffer == NULL)
         {
-        fprintf(stderr, "pcretest: malloc(%d) failed\n", dbuffer_size);
+        fprintf(stderr, "pcretest: malloc(%d) failed\n", (int)dbuffer_size);
         exit(1);
         }
       }