[Pcre-svn] [1437] code/trunk/pcre_jit_compile.c: JIT: Better…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [1437] code/trunk/pcre_jit_compile.c: JIT: Better fixed prefix scan when UTF is enabled.
Revision: 1437
          http://vcs.pcre.org/viewvc?view=rev&revision=1437
Author:   zherczeg
Date:     2014-01-10 08:52:20 +0000 (Fri, 10 Jan 2014)


Log Message:
-----------
JIT: Better fixed prefix scan when UTF is enabled.

Modified Paths:
--------------
    code/trunk/pcre_jit_compile.c


Modified: code/trunk/pcre_jit_compile.c
===================================================================
--- code/trunk/pcre_jit_compile.c    2014-01-08 17:29:39 UTC (rev 1436)
+++ code/trunk/pcre_jit_compile.c    2014-01-10 08:52:20 UTC (rev 1437)
@@ -2567,7 +2567,7 @@


#if defined SUPPORT_UTF && defined COMPILE_PCRE8

-static BOOL is_char7_bitset(const pcre_uint8* bitset, BOOL nclass)
+static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
 {
 /* Tells whether the character codes below 128 are enough
 to determine a match. */
@@ -3187,6 +3187,13 @@
     cc++;
     continue;


+    case OP_ASSERT:
+    case OP_ASSERT_NOT:
+    case OP_ASSERTBACK:
+    case OP_ASSERTBACK_NOT:
+    cc = bracketend(cc);
+    continue;
+
     case OP_PLUS:
     case OP_MINPLUS:
     case OP_POSPLUS:
@@ -3237,26 +3244,66 @@
     continue;


     case OP_CLASS:
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+    if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
+#endif
+    any = TRUE;
+    cc += 1 + 32 / sizeof(pcre_uchar);
+    break;
+
     case OP_NCLASS:
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+    if (common->utf) return consumed;
+#endif
     any = TRUE;
     cc += 1 + 32 / sizeof(pcre_uchar);
     break;


 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
     case OP_XCLASS:
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+    if (common->utf) return consumed;
+#endif
     any = TRUE;
     cc += GET(cc, 1);
     break;
 #endif


+    case OP_DIGIT:
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+    if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
+      return consumed;
+#endif
+    any = TRUE;
+    cc++;
+    break;
+
+    case OP_WHITESPACE:
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+    if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
+      return consumed;
+#endif
+    any = TRUE;
+    cc++;
+    break;
+
+    case OP_WORDCHAR:
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+    if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
+      return consumed;
+#endif
+    any = TRUE;
+    cc++;
+    break;
+
     case OP_NOT_DIGIT:
-    case OP_DIGIT:
     case OP_NOT_WHITESPACE:
-    case OP_WHITESPACE:
     case OP_NOT_WORDCHAR:
-    case OP_WORDCHAR:
     case OP_ANY:
     case OP_ALLANY:
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+    if (common->utf) return consumed;
+#endif
     any = TRUE;
     cc++;
     break;
@@ -3264,6 +3311,9 @@
 #ifdef SUPPORT_UCP
     case OP_NOTPROP:
     case OP_PROP:
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+    if (common->utf) return consumed;
+#endif
     any = TRUE;
     cc += 1 + 2;
     break;
@@ -3280,9 +3330,6 @@


   if (any)
     {
-#ifdef SUPPORT_UTF
-    if (common->utf) return consumed;
-#endif
 #if defined COMPILE_PCRE8
     mask = 0xff;
 #elif defined COMPILE_PCRE16