Revision: 1437
http://vcs.pcre.org/viewvc?view=rev&revision=1437
Author: zherczeg
Date: 2014-01-10 08:52:20 +0000 (Fri, 10 Jan 2014)
Log Message:
-----------
JIT: Better fixed prefix scan when UTF is enabled.
Modified Paths:
--------------
code/trunk/pcre_jit_compile.c
Modified: code/trunk/pcre_jit_compile.c
===================================================================
--- code/trunk/pcre_jit_compile.c 2014-01-08 17:29:39 UTC (rev 1436)
+++ code/trunk/pcre_jit_compile.c 2014-01-10 08:52:20 UTC (rev 1437)
@@ -2567,7 +2567,7 @@
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
-static BOOL is_char7_bitset(const pcre_uint8* bitset, BOOL nclass)
+static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass)
{
/* Tells whether the character codes below 128 are enough
to determine a match. */
@@ -3187,6 +3187,13 @@
cc++;
continue;
+ case OP_ASSERT:
+ case OP_ASSERT_NOT:
+ case OP_ASSERTBACK:
+ case OP_ASSERTBACK_NOT:
+ cc = bracketend(cc);
+ continue;
+
case OP_PLUS:
case OP_MINPLUS:
case OP_POSPLUS:
@@ -3237,26 +3244,66 @@
continue;
case OP_CLASS:
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+ if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
+#endif
+ any = TRUE;
+ cc += 1 + 32 / sizeof(pcre_uchar);
+ break;
+
case OP_NCLASS:
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+ if (common->utf) return consumed;
+#endif
any = TRUE;
cc += 1 + 32 / sizeof(pcre_uchar);
break;
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
case OP_XCLASS:
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+ if (common->utf) return consumed;
+#endif
any = TRUE;
cc += GET(cc, 1);
break;
#endif
+ case OP_DIGIT:
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+ if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
+ return consumed;
+#endif
+ any = TRUE;
+ cc++;
+ break;
+
+ case OP_WHITESPACE:
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+ if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
+ return consumed;
+#endif
+ any = TRUE;
+ cc++;
+ break;
+
+ case OP_WORDCHAR:
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+ if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
+ return consumed;
+#endif
+ any = TRUE;
+ cc++;
+ break;
+
case OP_NOT_DIGIT:
- case OP_DIGIT:
case OP_NOT_WHITESPACE:
- case OP_WHITESPACE:
case OP_NOT_WORDCHAR:
- case OP_WORDCHAR:
case OP_ANY:
case OP_ALLANY:
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+ if (common->utf) return consumed;
+#endif
any = TRUE;
cc++;
break;
@@ -3264,6 +3311,9 @@
#ifdef SUPPORT_UCP
case OP_NOTPROP:
case OP_PROP:
+#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
+ if (common->utf) return consumed;
+#endif
any = TRUE;
cc += 1 + 2;
break;
@@ -3280,9 +3330,6 @@
if (any)
{
-#ifdef SUPPORT_UTF
- if (common->utf) return consumed;
-#endif
#if defined COMPILE_PCRE8
mask = 0xff;
#elif defined COMPILE_PCRE16