[Pcre-svn] [1040] code/trunk: Improving the first n characte…

Góra strony
Delete this message
Autor: Subversion repository
Data:  
Dla: pcre-svn
Temat: [Pcre-svn] [1040] code/trunk: Improving the first n character searches.
Revision: 1040
          http://vcs.pcre.org/viewvc?view=rev&revision=1040
Author:   zherczeg
Date:     2012-09-16 07:52:27 +0100 (Sun, 16 Sep 2012)


Log Message:
-----------
Improving the first n character searches.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_jit_compile.c


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2012-09-13 16:39:03 UTC (rev 1039)
+++ code/trunk/ChangeLog    2012-09-16 06:52:27 UTC (rev 1040)
@@ -81,7 +81,9 @@
     substrings matched by the 1st and 2nd capturing parentheses. A separating 
     string can be specified by --om-separator (default empty). 


+19. Improving the first n character searches.

+
Version 8.31 06-July-2012
-------------------------


Modified: code/trunk/pcre_jit_compile.c
===================================================================
--- code/trunk/pcre_jit_compile.c    2012-09-13 16:39:03 UTC (rev 1039)
+++ code/trunk/pcre_jit_compile.c    2012-09-16 06:52:27 UTC (rev 1040)
@@ -2563,47 +2563,37 @@
 return mainloop;
 }


-static SLJIT_INLINE BOOL fast_forward_first_two_chars(compiler_common *common, BOOL firstline)
+#define MAX_N_CHARS 3
+
+static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
{
DEFINE_COMPILER;
struct sljit_label *start;
struct sljit_jump *quit;
-struct sljit_jump *found;
-pcre_int32 chars[4];
+pcre_int32 chars[MAX_N_CHARS * 2];
pcre_uchar *cc = common->start + 1 + IMM2_SIZE;
int location = 0;
pcre_int32 len, c, bit, caseless;
-BOOL must_end;
+int must_stop;

-#ifdef COMPILE_PCRE8
-union {
-    sljit_uh ascombined;
-    sljit_ub asuchars[2];
-} pair;
-#else
-union {
-    sljit_ui ascombined;
-    sljit_uh asuchars[2];
-} pair;
-#endif
-
+/* We do not support alternatives now. */
 if (*(common->start + GET(common->start, 1)) == OP_ALT)
   return FALSE;


 while (TRUE)
   {
   caseless = 0;
-  must_end = TRUE;
+  must_stop = 1;
   switch(*cc)
     {
     case OP_CHAR:
-    must_end = FALSE;
+    must_stop = 0;
     cc++;
     break;


     case OP_CHARI:
     caseless = 1;
-    must_end = FALSE;
+    must_stop = 0;
     cc++;
     break;


@@ -2645,9 +2635,13 @@
     break;


     default:
-    return FALSE;
+    must_stop = 2;
+    break;
     }


+  if (must_stop == 2)
+      break;
+
   len = 1;
 #ifdef SUPPORT_UTF
   if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
@@ -2670,7 +2664,7 @@
   else
     caseless = 0;


-  while (len > 0 && location < 2 * 2)
+  while (len > 0 && location < MAX_N_CHARS * 2)
     {
     c = *cc;
     bit = 0;
@@ -2688,72 +2682,56 @@
     cc++;
     }


-  if (location == 2 * 2)
+  if (location >= MAX_N_CHARS * 2 || must_stop != 0)
     break;
-  else if (must_end)
-    return FALSE;
   }


+/* At least two characters are required. */
+if (location < 2 * 2)
+    return FALSE;
+
 if (firstline)
   {
   SLJIT_ASSERT(common->first_line_end != 0);
   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
-  OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, 1);
+  OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, (location >> 1) - 1);
   }
 else
-  OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, 1);
+  OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, (location >> 1) - 1);


start = LABEL();
quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
-#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
-#ifdef COMPILE_PCRE8
-OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
-#else /* COMPILE_PCRE8 */
-OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
-#endif

-#else /* SLJIT_UNALIGNED */
-
-#if defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN
-OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
-OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
-#else /* SLJIT_BIG_ENDIAN */
+OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
-OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
-#endif /* SLJIT_BIG_ENDIAN */
-
-#ifdef COMPILE_PCRE8
-OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 8);
-#else /* COMPILE_PCRE8 */
-OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 16);
-#endif
-OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
-
-#endif
-
-if (chars[1] != 0 || chars[3] != 0)
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+if (chars[1] != 0)
+  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
+CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
+if (location > 2 * 2)
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+if (chars[3] != 0)
+  OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
+CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
+if (location > 2 * 2)
   {
-  pair.asuchars[0] = chars[1];
-  pair.asuchars[1] = chars[3];
-  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, pair.ascombined);
+  if (chars[5] != 0)
+    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
+  CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
   }
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));


-pair.asuchars[0] = chars[0];
-pair.asuchars[1] = chars[2];
-found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, pair.ascombined);
-
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-JUMPTO(SLJIT_JUMP, start);
-JUMPHERE(found);
JUMPHERE(quit);

if (firstline)
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
else
- OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, 1);
+ OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, (location >> 1) - 1);
return TRUE;
}

+#undef MAX_N_CHARS
+
 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
 {
 DEFINE_COMPILER;
@@ -7916,7 +7894,7 @@
   /* Forward search if possible. */
   if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
     {
-    if (mode == JIT_COMPILE && fast_forward_first_two_chars(common, (re->options & PCRE_FIRSTLINE) != 0))
+    if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0))
       { /* Do nothing */ }
     else if ((re->flags & PCRE_FIRSTSET) != 0)
       fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);