Revision: 1249
http://vcs.pcre.org/viewvc?view=rev&revision=1249
Author: zherczeg
Date: 2013-02-18 09:55:43 +0000 (Mon, 18 Feb 2013)
Log Message:
-----------
Inlining subpatterns in recursions.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_jit_compile.c
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2013-02-13 17:36:38 UTC (rev 1248)
+++ code/trunk/ChangeLog 2013-02-18 09:55:43 UTC (rev 1249)
@@ -43,8 +43,6 @@
(b) Minimum length was not checked before the matching is started.
- WARNING: Callouts are not release ready! <- this line needs to be removed when it is.
-
11. The value of capture_last that is passed to callouts was incorrect in some
cases when there was a capture on one path that was subsequently abandoned
after a backtrack. Also, the capture_last value is now reset after a
@@ -57,7 +55,10 @@
13. In the pathological case when an offset vector of size 2 is used, pcretest
now prints out the matched string after a yield of 0 or 1.
+14. Inlining subpatterns in recursions, when certain conditions are fulfilled.
+ Only supported by the JIT compiler at the moment.
+
Version 8.32 30-November-2012
-----------------------------
Modified: code/trunk/pcre_jit_compile.c
===================================================================
--- code/trunk/pcre_jit_compile.c 2013-02-13 17:36:38 UTC (rev 1248)
+++ code/trunk/pcre_jit_compile.c 2013-02-18 09:55:43 UTC (rev 1249)
@@ -186,16 +186,14 @@
struct jump_list *next;
} jump_list;
-enum stub_types { stack_alloc };
-
typedef struct stub_list {
- enum stub_types type;
- int data;
struct sljit_jump *start;
struct sljit_label *quit;
struct stub_list *next;
} stub_list;
+enum frame_types { no_frame = -1, no_stack = -2 };
+
typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
/* The following structure is the key data type for the recursive
@@ -277,6 +275,7 @@
typedef struct recurse_backtrack {
backtrack_common common;
+ BOOL inlined_pattern;
} recurse_backtrack;
#define MAX_RANGE_SIZE 6
@@ -514,6 +513,8 @@
case OP_WORDCHAR:
case OP_ANY:
case OP_ALLANY:
+ case OP_NOTPROP:
+ case OP_PROP:
case OP_ANYNL:
case OP_NOT_HSPACE:
case OP_HSPACE:
@@ -526,21 +527,46 @@
case OP_CIRCM:
case OP_DOLL:
case OP_DOLLM:
- case OP_TYPESTAR:
- case OP_TYPEMINSTAR:
- case OP_TYPEPLUS:
- case OP_TYPEMINPLUS:
- case OP_TYPEQUERY:
- case OP_TYPEMINQUERY:
- case OP_TYPEPOSSTAR:
- case OP_TYPEPOSPLUS:
- case OP_TYPEPOSQUERY:
case OP_CRSTAR:
case OP_CRMINSTAR:
case OP_CRPLUS:
case OP_CRMINPLUS:
case OP_CRQUERY:
case OP_CRMINQUERY:
+ case OP_CRRANGE:
+ case OP_CRMINRANGE:
+ case OP_CLASS:
+ case OP_NCLASS:
+ case OP_REF:
+ case OP_REFI:
+ case OP_RECURSE:
+ case OP_CALLOUT:
+ case OP_ALT:
+ case OP_KET:
+ case OP_KETRMAX:
+ case OP_KETRMIN:
+ case OP_KETRPOS:
+ case OP_REVERSE:
+ case OP_ASSERT:
+ case OP_ASSERT_NOT:
+ case OP_ASSERTBACK:
+ case OP_ASSERTBACK_NOT:
+ case OP_ONCE:
+ case OP_ONCE_NC:
+ case OP_BRA:
+ case OP_BRAPOS:
+ case OP_CBRA:
+ case OP_CBRAPOS:
+ case OP_COND:
+ case OP_SBRA:
+ case OP_SBRAPOS:
+ case OP_SCBRA:
+ case OP_SCBRAPOS:
+ case OP_SCOND:
+ case OP_CREF:
+ case OP_NCREF:
+ case OP_RREF:
+ case OP_NRREF:
case OP_DEF:
case OP_BRAZERO:
case OP_BRAMINZERO:
@@ -549,15 +575,10 @@
case OP_FAIL:
case OP_ACCEPT:
case OP_ASSERT_ACCEPT:
+ case OP_CLOSE:
case OP_SKIPZERO:
- return cc + 1;
+ return cc + PRIV(OP_lengths)[*cc];
- case OP_ANYBYTE:
-#ifdef SUPPORT_UTF
- if (common->utf) return NULL;
-#endif
- return cc + 1;
-
case OP_CHAR:
case OP_CHARI:
case OP_NOT:
@@ -568,128 +589,88 @@
case OP_MINPLUS:
case OP_QUERY:
case OP_MINQUERY:
+ case OP_UPTO:
+ case OP_MINUPTO:
+ case OP_EXACT:
case OP_POSSTAR:
case OP_POSPLUS:
case OP_POSQUERY:
+ case OP_POSUPTO:
case OP_STARI:
case OP_MINSTARI:
case OP_PLUSI:
case OP_MINPLUSI:
case OP_QUERYI:
case OP_MINQUERYI:
+ case OP_UPTOI:
+ case OP_MINUPTOI:
+ case OP_EXACTI:
case OP_POSSTARI:
case OP_POSPLUSI:
case OP_POSQUERYI:
+ case OP_POSUPTOI:
case OP_NOTSTAR:
case OP_NOTMINSTAR:
case OP_NOTPLUS:
case OP_NOTMINPLUS:
case OP_NOTQUERY:
case OP_NOTMINQUERY:
+ case OP_NOTUPTO:
+ case OP_NOTMINUPTO:
+ case OP_NOTEXACT:
case OP_NOTPOSSTAR:
case OP_NOTPOSPLUS:
case OP_NOTPOSQUERY:
+ case OP_NOTPOSUPTO:
case OP_NOTSTARI:
case OP_NOTMINSTARI:
case OP_NOTPLUSI:
case OP_NOTMINPLUSI:
case OP_NOTQUERYI:
case OP_NOTMINQUERYI:
+ case OP_NOTUPTOI:
+ case OP_NOTMINUPTOI:
+ case OP_NOTEXACTI:
case OP_NOTPOSSTARI:
case OP_NOTPOSPLUSI:
case OP_NOTPOSQUERYI:
- cc += 2;
-#ifdef SUPPORT_UTF
- if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
-#endif
- return cc;
-
- case OP_UPTO:
- case OP_MINUPTO:
- case OP_EXACT:
- case OP_POSUPTO:
- case OP_UPTOI:
- case OP_MINUPTOI:
- case OP_EXACTI:
- case OP_POSUPTOI:
- case OP_NOTUPTO:
- case OP_NOTMINUPTO:
- case OP_NOTEXACT:
- case OP_NOTPOSUPTO:
- case OP_NOTUPTOI:
- case OP_NOTMINUPTOI:
- case OP_NOTEXACTI:
case OP_NOTPOSUPTOI:
- cc += 2 + IMM2_SIZE;
+ cc += PRIV(OP_lengths)[*cc];
#ifdef SUPPORT_UTF
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
return cc;
- case OP_NOTPROP:
- case OP_PROP:
- return cc + 1 + 2;
-
+ /* Special cases. */
+ case OP_TYPESTAR:
+ case OP_TYPEMINSTAR:
+ case OP_TYPEPLUS:
+ case OP_TYPEMINPLUS:
+ case OP_TYPEQUERY:
+ case OP_TYPEMINQUERY:
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
case OP_TYPEEXACT:
+ case OP_TYPEPOSSTAR:
+ case OP_TYPEPOSPLUS:
+ case OP_TYPEPOSQUERY:
case OP_TYPEPOSUPTO:
- case OP_REF:
- case OP_REFI:
- case OP_CREF:
- case OP_NCREF:
- case OP_RREF:
- case OP_NRREF:
- case OP_CLOSE:
- cc += 1 + IMM2_SIZE;
- return cc;
+ return cc + PRIV(OP_lengths)[*cc] - 1;
- case OP_CRRANGE:
- case OP_CRMINRANGE:
- return cc + 1 + 2 * IMM2_SIZE;
+ case OP_ANYBYTE:
+#ifdef SUPPORT_UTF
+ if (common->utf) return NULL;
+#endif
+ return cc + 1;
- case OP_CLASS:
- case OP_NCLASS:
- return cc + 1 + 32 / sizeof(pcre_uchar);
-
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
case OP_XCLASS:
return cc + GET(cc, 1);
#endif
- case OP_RECURSE:
- case OP_ASSERT:
- case OP_ASSERT_NOT:
- case OP_ASSERTBACK:
- case OP_ASSERTBACK_NOT:
- case OP_REVERSE:
- case OP_ONCE:
- case OP_ONCE_NC:
- case OP_BRA:
- case OP_BRAPOS:
- case OP_COND:
- case OP_SBRA:
- case OP_SBRAPOS:
- case OP_SCOND:
- case OP_ALT:
- case OP_KET:
- case OP_KETRMAX:
- case OP_KETRMIN:
- case OP_KETRPOS:
- return cc + 1 + LINK_SIZE;
-
- case OP_CBRA:
- case OP_CBRAPOS:
- case OP_SCBRA:
- case OP_SCBRAPOS:
- return cc + 1 + LINK_SIZE + IMM2_SIZE;
-
case OP_MARK:
return cc + 1 + 2 + cc[1];
- case OP_CALLOUT:
- return cc + 2 + 2 * LINK_SIZE;
-
default:
return NULL;
}
@@ -1124,12 +1105,13 @@
}
}
-/* Returns with -1 if no need for frame. */
+/* Returns with a frame_types (always < 0) if no need for frame. */
static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
{
-pcre_uchar *ccend = bracketend(cc);
+pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
int length = 0;
int possessive = 0;
+BOOL stack_restore = FALSE;
BOOL setsom_found = recursive;
BOOL setmark_found = recursive;
/* The last capture is a local variable even for recursions. */
@@ -1149,6 +1131,7 @@
{
case OP_SET_SOM:
SLJIT_ASSERT(common->has_set_som);
+ stack_restore = TRUE;
if (!setsom_found)
{
length += 2;
@@ -1159,6 +1142,7 @@
case OP_MARK:
SLJIT_ASSERT(common->mark_ptr != 0);
+ stack_restore = TRUE;
if (!setmark_found)
{
length += 2;
@@ -1168,6 +1152,7 @@
break;
case OP_RECURSE:
+ stack_restore = TRUE;
if (common->has_set_som && !setsom_found)
{
length += 2;
@@ -1190,6 +1175,7 @@
case OP_CBRAPOS:
case OP_SCBRA:
case OP_SCBRAPOS:
+ stack_restore = TRUE;
if (common->capture_last_ptr != 0 && !capture_last_found)
{
length += 2;
@@ -1200,6 +1186,73 @@
break;
default:
+ stack_restore = TRUE;
+ /* Fall through. */
+
+ case OP_NOT_WORD_BOUNDARY:
+ case OP_WORD_BOUNDARY:
+ case OP_NOT_DIGIT:
+ case OP_DIGIT:
+ case OP_NOT_WHITESPACE:
+ case OP_WHITESPACE:
+ case OP_NOT_WORDCHAR:
+ case OP_WORDCHAR:
+ case OP_ANY:
+ case OP_ALLANY:
+ case OP_ANYBYTE:
+ case OP_NOTPROP:
+ case OP_PROP:
+ case OP_ANYNL:
+ case OP_NOT_HSPACE:
+ case OP_HSPACE:
+ case OP_NOT_VSPACE:
+ case OP_VSPACE:
+ case OP_EXTUNI:
+ case OP_EODN:
+ case OP_EOD:
+ case OP_CIRC:
+ case OP_CIRCM:
+ case OP_DOLL:
+ case OP_DOLLM:
+ case OP_CHAR:
+ case OP_CHARI:
+ case OP_NOT:
+ case OP_NOTI:
+
+ case OP_EXACT:
+ case OP_POSSTAR:
+ case OP_POSPLUS:
+ case OP_POSQUERY:
+ case OP_POSUPTO:
+
+ case OP_EXACTI:
+ case OP_POSSTARI:
+ case OP_POSPLUSI:
+ case OP_POSQUERYI:
+ case OP_POSUPTOI:
+
+ case OP_NOTEXACT:
+ case OP_NOTPOSSTAR:
+ case OP_NOTPOSPLUS:
+ case OP_NOTPOSQUERY:
+ case OP_NOTPOSUPTO:
+
+ case OP_NOTEXACTI:
+ case OP_NOTPOSSTARI:
+ case OP_NOTPOSPLUSI:
+ case OP_NOTPOSQUERYI:
+ case OP_NOTPOSUPTOI:
+
+ case OP_TYPEEXACT:
+ case OP_TYPEPOSSTAR:
+ case OP_TYPEPOSPLUS:
+ case OP_TYPEPOSQUERY:
+ case OP_TYPEPOSUPTO:
+
+ case OP_CLASS:
+ case OP_NCLASS:
+ case OP_XCLASS:
+
cc = next_opcode(common, cc);
SLJIT_ASSERT(cc != NULL);
break;
@@ -1207,17 +1260,17 @@
/* Possessive quantifiers can use a special case. */
if (SLJIT_UNLIKELY(possessive == length))
- return -1;
+ return stack_restore ? no_frame : no_stack;
if (length > 0)
return length + 1;
-return -1;
+return stack_restore ? no_frame : no_stack;
}
static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
{
DEFINE_COMPILER;
-pcre_uchar *ccend = bracketend(cc);
+pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
BOOL setsom_found = recursive;
BOOL setmark_found = recursive;
/* The last capture is a local variable even for recursions. */
@@ -1782,15 +1835,13 @@
}
}
-static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
+static void add_stub(compiler_common *common, struct sljit_jump *start)
{
DEFINE_COMPILER;
stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
if (list_item)
{
- list_item->type = type;
- list_item->data = data;
list_item->start = start;
list_item->quit = LABEL();
list_item->next = common->stubs;
@@ -1806,12 +1857,7 @@
while (list_item)
{
JUMPHERE(list_item->start);
- switch(list_item->type)
- {
- case stack_alloc:
- add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
- break;
- }
+ add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
JUMPTO(SLJIT_JUMP, list_item->quit);
list_item = list_item->next;
}
@@ -1839,7 +1885,7 @@
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
#endif
-add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
+add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
}
static SLJIT_INLINE void free_stack(compiler_common *common, int size)
@@ -4060,7 +4106,7 @@
case PT_WORD:
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
- /* ... fall through */
+ /* Fall through. */
case PT_ALNUM:
SET_TYPE_OFFSET(ucp_Ll);
@@ -5047,8 +5093,19 @@
recurse_entry *entry = common->entries;
recurse_entry *prev = NULL;
int start = GET(cc, 1);
+pcre_uchar *start_cc;
PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
+
+/* Inlining simple patterns. */
+if (get_framesize(common, common->start + start, TRUE) == no_stack)
+ {
+ start_cc = common->start + start;
+ compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
+ BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
+ return cc + 1 + LINK_SIZE;
+ }
+
while (entry != NULL)
{
if (entry->start == start)
@@ -6197,7 +6254,8 @@
BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
allocate_stack(common, stacksize);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
+ if (framesize == no_frame)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
if (offset != 0)
{
@@ -6261,7 +6319,8 @@
if (framesize < 0)
{
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
+ if (framesize == no_frame)
+ OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
if (offset != 0)
{
@@ -7183,7 +7242,11 @@
{
DEFINE_COMPILER;
+if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
+ compile_backtrackingpath(common, current->top);
set_jumps(current->topbacktracks, LABEL());
+if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
+ return;
if (common->has_set_som && common->mark_ptr != 0)
{