Revision: 986
http://vcs.pcre.org/viewvc?view=rev&revision=986
Author: zherczeg
Date: 2012-07-07 04:33:54 +0100 (Sat, 07 Jul 2012)
Log Message:
-----------
Improved JIT compiler optimizations
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_jit_compile.c
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2012-07-06 09:58:54 UTC (rev 985)
+++ code/trunk/ChangeLog 2012-07-07 03:33:54 UTC (rev 986)
@@ -1,6 +1,13 @@
ChangeLog for PCRE
------------------
+Version 8.32
+------------
+
+1. Improved JIT compiler optimizations for first character search and single
+ character iterators.
+
+
Version 8.31 06-July-2012
-------------------------
Modified: code/trunk/pcre_jit_compile.c
===================================================================
--- code/trunk/pcre_jit_compile.c 2012-07-06 09:58:54 UTC (rev 985)
+++ code/trunk/pcre_jit_compile.c 2012-07-07 03:33:54 UTC (rev 986)
@@ -667,13 +667,96 @@
}
}
+#define CASE_ITERATOR_LOCAL1 \
+ case OP_MINSTAR: \
+ case OP_MINPLUS: \
+ case OP_QUERY: \
+ case OP_MINQUERY: \
+ case OP_MINSTARI: \
+ case OP_MINPLUSI: \
+ case OP_QUERYI: \
+ case OP_MINQUERYI: \
+ case OP_NOTMINSTAR: \
+ case OP_NOTMINPLUS: \
+ case OP_NOTQUERY: \
+ case OP_NOTMINQUERY: \
+ case OP_NOTMINSTARI: \
+ case OP_NOTMINPLUSI: \
+ case OP_NOTQUERYI: \
+ case OP_NOTMINQUERYI:
+
+#define CASE_ITERATOR_LOCAL2A \
+ case OP_STAR: \
+ case OP_PLUS: \
+ case OP_STARI: \
+ case OP_PLUSI: \
+ case OP_NOTSTAR: \
+ case OP_NOTPLUS: \
+ case OP_NOTSTARI: \
+ case OP_NOTPLUSI:
+
+#define CASE_ITERATOR_LOCAL2B \
+ case OP_UPTO: \
+ case OP_MINUPTO: \
+ case OP_UPTOI: \
+ case OP_MINUPTOI: \
+ case OP_NOTUPTO: \
+ case OP_NOTMINUPTO: \
+ case OP_NOTUPTOI: \
+ case OP_NOTMINUPTOI:
+
+#define CASE_ITERATOR_TYPE_LOCAL1 \
+ case OP_TYPEMINSTAR: \
+ case OP_TYPEMINPLUS: \
+ case OP_TYPEQUERY: \
+ case OP_TYPEMINQUERY:
+
+#define CASE_ITERATOR_TYPE_LOCAL2A \
+ case OP_TYPESTAR: \
+ case OP_TYPEPLUS:
+
+#define CASE_ITERATOR_TYPE_LOCAL2B \
+ case OP_TYPEUPTO: \
+ case OP_TYPEMINUPTO:
+
+static int get_class_iterator_size(pcre_uchar *cc)
+{
+switch(*cc)
+ {
+ case OP_CRSTAR:
+ case OP_CRPLUS:
+ return 2;
+
+ case OP_CRMINSTAR:
+ case OP_CRMINPLUS:
+ case OP_CRQUERY:
+ case OP_CRMINQUERY:
+ return 1;
+
+ case OP_CRRANGE:
+ case OP_CRMINRANGE:
+ if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
+ return 0;
+ return 2;
+
+ default:
+ return 0;
+ }
+}
+
static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
{
int localspace = 0;
pcre_uchar *alternative;
+pcre_uchar *end = NULL;
+int space, size, bracketlen;
+
/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
while (cc < ccend)
{
+ space = 0;
+ size = 0;
+ bracketlen = 0;
switch(*cc)
{
case OP_SET_SOM:
@@ -692,13 +775,13 @@
case OP_SBRAPOS:
case OP_SCOND:
localspace += sizeof(sljit_w);
- cc += 1 + LINK_SIZE;
+ bracketlen = 1 + LINK_SIZE;
break;
case OP_CBRAPOS:
case OP_SCBRAPOS:
localspace += sizeof(sljit_w);
- cc += 1 + LINK_SIZE + IMM2_SIZE;
+ bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
break;
case OP_COND:
@@ -706,9 +789,63 @@
alternative = cc + GET(cc, 1);
if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
localspace += sizeof(sljit_w);
- cc += 1 + LINK_SIZE;
+ bracketlen = 1 + LINK_SIZE;
break;
+ case OP_BRA:
+ bracketlen = 1 + LINK_SIZE;
+ break;
+
+ case OP_CBRA:
+ case OP_SCBRA:
+ bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
+ break;
+
+ CASE_ITERATOR_LOCAL1
+ space = 1;
+ size = -2;
+ break;
+
+ CASE_ITERATOR_LOCAL2A
+ space = 2;
+ size = -2;
+ break;
+
+ CASE_ITERATOR_LOCAL2B
+ space = 2;
+ size = -(2 + IMM2_SIZE);
+ break;
+
+ CASE_ITERATOR_TYPE_LOCAL1
+ space = 1;
+ size = 1;
+ break;
+
+ CASE_ITERATOR_TYPE_LOCAL2A
+ if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
+ space = 2;
+ size = 1;
+ break;
+
+ CASE_ITERATOR_TYPE_LOCAL2B
+ if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
+ space = 2;
+ size = 1 + IMM2_SIZE;
+ break;
+
+ case OP_CLASS:
+ case OP_NCLASS:
+ size += 1 + 32 / sizeof(pcre_uchar);
+ space = get_class_iterator_size(cc + size);
+ break;
+
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+ case OP_XCLASS:
+ size = GET(cc, 1);
+ space = get_class_iterator_size(cc + size);
+ break;
+#endif
+
case OP_RECURSE:
/* Set its value only once. */
if (common->recursive_head == 0)
@@ -734,6 +871,33 @@
return -1;
break;
}
+
+ if (space > 0 && cc >= end)
+ localspace += sizeof(sljit_w) * space;
+
+ if (size != 0)
+ {
+ if (size < 0)
+ {
+ cc += -size;
+#ifdef SUPPORT_UTF
+ if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+ }
+ else
+ cc += size;
+ }
+
+ if (bracketlen > 0)
+ {
+ if (cc >= end)
+ {
+ end = bracketend(cc);
+ if (end[-1 - LINK_SIZE] == OP_KET)
+ end = NULL;
+ }
+ cc += bracketlen;
+ }
}
return localspace;
}
@@ -742,8 +906,14 @@
{
pcre_uchar *cc = common->start;
pcre_uchar *alternative;
+pcre_uchar *end = NULL;
+int space, size, bracketlen;
+
while (cc < ccend)
{
+ space = 0;
+ size = 0;
+ bracketlen = 0;
switch(*cc)
{
case OP_ASSERT:
@@ -758,14 +928,14 @@
case OP_SCOND:
common->localptrs[cc - common->start] = localptr;
localptr += sizeof(sljit_w);
- cc += 1 + LINK_SIZE;
+ bracketlen = 1 + LINK_SIZE;
break;
case OP_CBRAPOS:
case OP_SCBRAPOS:
common->localptrs[cc - common->start] = localptr;
localptr += sizeof(sljit_w);
- cc += 1 + LINK_SIZE + IMM2_SIZE;
+ bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
break;
case OP_COND:
@@ -776,14 +946,98 @@
common->localptrs[cc - common->start] = localptr;
localptr += sizeof(sljit_w);
}
- cc += 1 + LINK_SIZE;
+ bracketlen = 1 + LINK_SIZE;
break;
+ case OP_BRA:
+ bracketlen = 1 + LINK_SIZE;
+ break;
+
+ case OP_CBRA:
+ case OP_SCBRA:
+ bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
+ break;
+
+ CASE_ITERATOR_LOCAL1
+ space = 1;
+ size = -2;
+ break;
+
+ CASE_ITERATOR_LOCAL2A
+ space = 2;
+ size = -2;
+ break;
+
+ CASE_ITERATOR_LOCAL2B
+ space = 2;
+ size = -(2 + IMM2_SIZE);
+ break;
+
+ CASE_ITERATOR_TYPE_LOCAL1
+ space = 1;
+ size = 1;
+ break;
+
+ CASE_ITERATOR_TYPE_LOCAL2A
+ if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
+ space = 2;
+ size = 1;
+ break;
+
+ CASE_ITERATOR_TYPE_LOCAL2B
+ if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
+ space = 2;
+ size = 1 + IMM2_SIZE;
+ break;
+
+ case OP_CLASS:
+ case OP_NCLASS:
+ size += 1 + 32 / sizeof(pcre_uchar);
+ space = get_class_iterator_size(cc + size);
+ break;
+
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+ case OP_XCLASS:
+ size = GET(cc, 1);
+ space = get_class_iterator_size(cc + size);
+ break;
+#endif
+
default:
cc = next_opcode(common, cc);
SLJIT_ASSERT(cc != NULL);
break;
}
+
+ if (space > 0 && cc >= end)
+ {
+ common->localptrs[cc - common->start] = localptr;
+ localptr += sizeof(sljit_w) * space;
+ }
+
+ if (size != 0)
+ {
+ if (size < 0)
+ {
+ cc += -size;
+#ifdef SUPPORT_UTF
+ if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+ }
+ else
+ cc += size;
+ }
+
+ if (bracketlen > 0)
+ {
+ if (cc >= end)
+ {
+ end = bracketend(cc);
+ if (end[-1 - LINK_SIZE] == OP_KET)
+ end = NULL;
+ }
+ cc += bracketlen;
+ }
}
}
@@ -963,10 +1217,12 @@
static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
{
int localsize = 2;
+int size;
pcre_uchar *alternative;
/* Calculate the sum of the local variables. */
while (cc < ccend)
{
+ size = 0;
switch(*cc)
{
case OP_ASSERT:
@@ -1003,6 +1259,64 @@
cc += 1 + LINK_SIZE;
break;
+ CASE_ITERATOR_LOCAL1
+ if (PRIV_DATA(cc))
+ localsize++;
+ cc += 2;
+#ifdef SUPPORT_UTF
+ if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+ break;
+
+ CASE_ITERATOR_LOCAL2A
+ if (PRIV_DATA(cc))
+ localsize += 2;
+ cc += 2;
+#ifdef SUPPORT_UTF
+ if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+ break;
+
+ CASE_ITERATOR_LOCAL2B
+ if (PRIV_DATA(cc))
+ localsize += 2;
+ cc += 2 + IMM2_SIZE;
+#ifdef SUPPORT_UTF
+ if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+ break;
+
+ CASE_ITERATOR_TYPE_LOCAL1
+ if (PRIV_DATA(cc))
+ localsize++;
+ cc += 1;
+ break;
+
+ CASE_ITERATOR_TYPE_LOCAL2A
+ if (PRIV_DATA(cc))
+ localsize += 2;
+ cc += 1;
+ break;
+
+ CASE_ITERATOR_TYPE_LOCAL2B
+ if (PRIV_DATA(cc))
+ localsize += 2;
+ cc += 1 + IMM2_SIZE;
+ break;
+
+ case OP_CLASS:
+ case OP_NCLASS:
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+ case OP_XCLASS:
+ size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / sizeof(pcre_uchar);
+#else
+ size = 1 + 32 / sizeof(pcre_uchar);
+#endif
+ if (PRIV_DATA(cc))
+ localsize += get_class_iterator_size(cc + size);
+ cc += size;
+ break;
+
default:
cc = next_opcode(common, cc);
SLJIT_ASSERT(cc != NULL);
@@ -1018,7 +1332,7 @@
{
DEFINE_COMPILER;
int srcw[2];
-int count;
+int count, size;
BOOL tmp1next = TRUE;
BOOL tmp1empty = TRUE;
BOOL tmp2empty = TRUE;
@@ -1098,8 +1412,8 @@
case OP_CBRAPOS:
case OP_SCBRAPOS:
count = 2;
- srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
- srcw[0] = PRIV_DATA(cc);
+ srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
+ srcw[1] = PRIV_DATA(cc);
SLJIT_ASSERT(srcw[0] != 0);
cc += 1 + LINK_SIZE + IMM2_SIZE;
break;
@@ -1116,6 +1430,102 @@
cc += 1 + LINK_SIZE;
break;
+ CASE_ITERATOR_LOCAL1
+ if (PRIV_DATA(cc))
+ {
+ count = 1;
+ srcw[0] = PRIV_DATA(cc);
+ }
+ cc += 2;
+#ifdef SUPPORT_UTF
+ if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+ break;
+
+ CASE_ITERATOR_LOCAL2A
+ if (PRIV_DATA(cc))
+ {
+ count = 2;
+ srcw[0] = PRIV_DATA(cc);
+ srcw[1] = PRIV_DATA(cc) + sizeof(sljit_w);
+ }
+ cc += 2;
+#ifdef SUPPORT_UTF
+ if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+ break;
+
+ CASE_ITERATOR_LOCAL2B
+ if (PRIV_DATA(cc))
+ {
+ count = 2;
+ srcw[0] = PRIV_DATA(cc);
+ srcw[1] = PRIV_DATA(cc) + sizeof(sljit_w);
+ }
+ cc += 2 + IMM2_SIZE;
+#ifdef SUPPORT_UTF
+ if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+ break;
+
+ CASE_ITERATOR_TYPE_LOCAL1
+ if (PRIV_DATA(cc))
+ {
+ count = 1;
+ srcw[0] = PRIV_DATA(cc);
+ }
+ cc += 1;
+ break;
+
+ CASE_ITERATOR_TYPE_LOCAL2A
+ if (PRIV_DATA(cc))
+ {
+ count = 2;
+ srcw[0] = PRIV_DATA(cc);
+ srcw[1] = srcw[0] + sizeof(sljit_w);
+ }
+ cc += 1;
+ break;
+
+ CASE_ITERATOR_TYPE_LOCAL2B
+ if (PRIV_DATA(cc))
+ {
+ count = 2;
+ srcw[0] = PRIV_DATA(cc);
+ srcw[1] = srcw[0] + sizeof(sljit_w);
+ }
+ cc += 1 + IMM2_SIZE;
+ break;
+
+ case OP_CLASS:
+ case OP_NCLASS:
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+ case OP_XCLASS:
+ size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / sizeof(pcre_uchar);
+#else
+ size = 1 + 32 / sizeof(pcre_uchar);
+#endif
+ if (PRIV_DATA(cc))
+ switch(get_class_iterator_size(cc + size))
+ {
+ case 1:
+ count = 1;
+ srcw[0] = PRIV_DATA(cc);
+ break;
+
+ case 2:
+ count = 2;
+ srcw[0] = PRIV_DATA(cc);
+ srcw[1] = srcw[0] + sizeof(sljit_w);
+ break;
+
+ default:
+ SLJIT_ASSERT_STOP();
+ break;
+ }
+ cc += size;
+ break;
+
default:
cc = next_opcode(common, cc);
SLJIT_ASSERT(cc != NULL);
@@ -1218,6 +1628,13 @@
SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
}
+#undef CASE_ITERATOR_LOCAL1
+#undef CASE_ITERATOR_LOCAL2A
+#undef CASE_ITERATOR_LOCAL2B
+#undef CASE_ITERATOR_TYPE_LOCAL1
+#undef CASE_ITERATOR_TYPE_LOCAL2A
+#undef CASE_ITERATOR_TYPE_LOCAL2B
+
static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
{
return (value & (value - 1)) == 0;
@@ -2089,6 +2506,196 @@
return mainloop;
}
+static SLJIT_INLINE BOOL fast_forward_first_two_chars(compiler_common *common, BOOL firstline)
+{
+DEFINE_COMPILER;
+struct sljit_label *start;
+struct sljit_jump *leave;
+struct sljit_jump *found;
+pcre_int32 chars[4];
+pcre_uchar *cc = common->start + 1 + IMM2_SIZE;
+int index = 0;
+pcre_int32 len, c, bit;
+unsigned int caseless;
+BOOL must_end;
+
+#ifdef COMPILE_PCRE8
+union {
+ sljit_uh ascombined;
+ sljit_ub asuchars[2];
+} pair;
+#else
+union {
+ sljit_ui ascombined;
+ sljit_uh asuchars[2];
+} pair;
+#endif
+
+if (*(common->start + GET(common->start, 1)) == OP_ALT)
+ return FALSE;
+
+while (TRUE)
+ {
+ caseless = 0;
+ must_end = TRUE;
+ switch(*cc)
+ {
+ case OP_CHAR:
+ must_end = FALSE;
+ cc++;
+ break;
+
+ case OP_CHARI:
+ caseless = 1;
+ must_end = FALSE;
+ cc++;
+ break;
+
+ case OP_SOD:
+ case OP_SOM:
+ case OP_SET_SOM:
+ case OP_NOT_WORD_BOUNDARY:
+ case OP_WORD_BOUNDARY:
+ case OP_EODN:
+ case OP_EOD:
+ case OP_CIRC:
+ case OP_CIRCM:
+ case OP_DOLL:
+ case OP_DOLLM:
+ /* Zero width assertions. */
+ cc++;
+ continue;
+
+ case OP_PLUS:
+ case OP_MINPLUS:
+ case OP_POSPLUS:
+ cc++;
+ break;
+
+ case OP_EXACT:
+ cc += 1 + IMM2_SIZE;
+ break;
+
+ case OP_PLUSI:
+ case OP_MINPLUSI:
+ case OP_POSPLUSI:
+ caseless = 1;
+ cc++;
+ break;
+
+ case OP_EXACTI:
+ caseless = 1;
+ cc += 1 + IMM2_SIZE;
+ break;
+
+ default:
+ return FALSE;
+ }
+
+ len = 1;
+#ifdef SUPPORT_UTF
+ if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
+#endif
+
+ if (caseless && char_has_othercase(common, cc))
+ {
+ caseless = char_get_othercase_bit(common, cc);
+ if (caseless == 0)
+ return FALSE;
+#ifdef COMPILE_PCRE8
+ caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
+#else
+ if ((caseless & 0x100) != 0)
+ caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
+ else
+ caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
+#endif
+ }
+ else
+ caseless = 0;
+
+ while (len > 0 && index < 2 * 2)
+ {
+ c = *cc;
+ bit = 0;
+ if (len == (caseless & 0xff))
+ {
+ bit = caseless >> 8;
+ c |= bit;
+ }
+
+ chars[index] = c;
+ chars[index + 1] = bit;
+
+ len--;
+ index += 2;
+ cc++;
+ }
+
+ if (index == 2 * 2)
+ break;
+ else if (must_end)
+ return FALSE;
+ }
+
+if (firstline)
+ {
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
+ OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, 1);
+ }
+else
+ OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, 1);
+
+start = LABEL();
+leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
+#ifdef COMPILE_PCRE8
+OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+#else /* COMPILE_PCRE8 */
+OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+#endif
+
+#else /* SLJIT_UNALIGNED */
+
+#if defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN
+OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
+OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+#else /* SLJIT_BIG_ENDIAN */
+OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+#endif /* SLJIT_BIG_ENDIAN */
+
+#ifdef COMPILE_PCRE8
+OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 8);
+#else /* COMPILE_PCRE8 */
+OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 16);
+#endif
+OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
+
+#endif
+
+if (chars[1] != 0 || chars[3] != 0)
+ {
+ pair.asuchars[0] = chars[1];
+ pair.asuchars[1] = chars[3];
+ OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, pair.ascombined);
+ }
+
+pair.asuchars[0] = chars[0];
+pair.asuchars[1] = chars[2];
+found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, pair.ascombined);
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+JUMPTO(SLJIT_JUMP, start);
+JUMPHERE(found);
+JUMPHERE(leave);
+
+if (firstline)
+ OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
+OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, 1);
+return TRUE;
+}
+
static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
{
DEFINE_COMPILER;
@@ -2137,25 +2744,6 @@
}
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-#if defined SUPPORT_UTF && defined COMPILE_PCRE8
-if (common->utf)
- {
- CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
- }
-#endif
-#if defined SUPPORT_UTF && defined COMPILE_PCRE16
-if (common->utf)
- {
- CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
- COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
- }
-#endif
JUMPTO(SLJIT_JUMP, start);
JUMPHERE(found);
JUMPHERE(leave);
@@ -5390,6 +5978,10 @@
jump_list *nomatch = NULL;
struct sljit_jump *jump = NULL;
struct sljit_label *label;
+int localptr = PRIV_DATA(cc);
+int base = (localptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
+int offset0 = (localptr == 0) ? STACK(0) : localptr;
+int offset1 = (localptr == 0) ? STACK(1) : localptr + sizeof(sljit_w);
PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
@@ -5403,6 +5995,7 @@
case OP_CRRANGE:
if (type == OP_ANYNL || type == OP_EXTUNI)
{
+ SLJIT_ASSERT(localptr == 0);
if (opcode == OP_STAR || opcode == OP_UPTO)
{
allocate_stack(common, 2);
@@ -5440,28 +6033,29 @@
{
if (opcode == OP_PLUS)
compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
- allocate_stack(common, 2);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
+ if (localptr == 0)
+ allocate_stack(common, 2);
+ OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+ OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
label = LABEL();
compile_char1_trypath(common, type, cc, &nomatch);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+ OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
if (opcode <= OP_PLUS || (opcode == OP_CRRANGE && arg1 == 0))
{
- OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
+ OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
JUMPTO(SLJIT_JUMP, label);
}
else
{
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+ OP1(SLJIT_MOV, TMP1, 0, base, offset1);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
+ OP1(SLJIT_MOV, base, offset1, TMP1, 0);
CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
}
set_jumps(nomatch, LABEL());
if (opcode == OP_CRRANGE)
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, arg2 + 1));
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+ add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
+ OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
}
BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
break;
@@ -5470,16 +6064,18 @@
case OP_MINPLUS:
if (opcode == OP_MINPLUS)
compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
- allocate_stack(common, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+ if (localptr == 0)
+ allocate_stack(common, 1);
+ OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
break;
case OP_MINUPTO:
case OP_CRMINRANGE:
- allocate_stack(common, 2);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
+ if (localptr == 0)
+ allocate_stack(common, 2);
+ OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+ OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
if (opcode == OP_CRMINRANGE)
add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
@@ -5487,8 +6083,9 @@
case OP_QUERY:
case OP_MINQUERY:
- allocate_stack(common, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+ if (localptr == 0)
+ allocate_stack(common, 1);
+ OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
if (opcode == OP_QUERY)
compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
@@ -5888,6 +6485,10 @@
struct sljit_label *label = NULL;
struct sljit_jump *jump = NULL;
jump_list *jumplist = NULL;
+int localptr = PRIV_DATA(cc);
+int base = (localptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
+int offset0 = (localptr == 0) ? STACK(0) : localptr;
+int offset1 = (localptr == 0) ? STACK(1) : localptr + sizeof(sljit_w);
cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
@@ -5899,6 +6500,7 @@
case OP_CRRANGE:
if (type == OP_ANYNL || type == OP_EXTUNI)
{
+ SLJIT_ASSERT(localptr == 0);
set_jumps(current->topbacktracks, LABEL());
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
free_stack(common, 1);
@@ -5908,17 +6510,18 @@
{
if (opcode <= OP_PLUS || opcode == OP_UPTO)
arg2 = 0;
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+ OP1(SLJIT_MOV, TMP1, 0, base, offset1);
jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, arg2 + 1);
- OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, SLJIT_IMM, 1);
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+ OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);
+ OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
skip_char_back(common);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+ OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
if (opcode == OP_CRRANGE)
set_jumps(current->topbacktracks, LABEL());
JUMPHERE(jump);
- free_stack(common, 2);
+ if (localptr == 0)
+ free_stack(common, 2);
if (opcode == OP_PLUS)
set_jumps(current->topbacktracks, LABEL());
}
@@ -5926,12 +6529,13 @@
case OP_MINSTAR:
case OP_MINPLUS:
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+ OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
compile_char1_trypath(common, type, cc, &jumplist);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+ OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
set_jumps(jumplist, LABEL());
- free_stack(common, 1);
+ if (localptr == 0)
+ free_stack(common, 1);
if (opcode == OP_MINPLUS)
set_jumps(current->topbacktracks, LABEL());
break;
@@ -5943,13 +6547,13 @@
label = LABEL();
set_jumps(current->topbacktracks, label);
}
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+ OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
compile_char1_trypath(common, type, cc, &jumplist);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+ OP1(SLJIT_MOV, TMP1, 0, base, offset1);
+ OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
+ OP1(SLJIT_MOV, base, offset1, TMP1, 0);
if (opcode == OP_CRMINRANGE)
CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);
@@ -5960,31 +6564,34 @@
CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_backtrack)->trypath);
set_jumps(jumplist, LABEL());
- free_stack(common, 2);
+ if (localptr == 0)
+ free_stack(common, 2);
break;
case OP_QUERY:
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+ OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
+ OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
jump = JUMP(SLJIT_JUMP);
set_jumps(current->topbacktracks, LABEL());
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+ OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
+ OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
JUMPHERE(jump);
- free_stack(common, 1);
+ if (localptr == 0)
+ free_stack(common, 1);
break;
case OP_MINQUERY:
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+ OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
+ OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
compile_char1_trypath(common, type, cc, &jumplist);
JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
set_jumps(jumplist, LABEL());
JUMPHERE(jump);
- free_stack(common, 1);
+ if (localptr == 0)
+ free_stack(common, 1);
break;
case OP_EXACT:
@@ -6856,7 +7463,7 @@
{
case 0:
/* Compile-time default */
- switch (NEWLINE)
+ switch(NEWLINE)
{
case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
@@ -6979,6 +7586,8 @@
/* Forward search if possible. */
if ((re->options & PCRE_NO_START_OPTIMIZE) == 0)
{
+ if (mode == JIT_COMPILE && fast_forward_first_two_chars(common, (re->options & PCRE_FIRSTLINE) != 0))
+ /* Do nothing */;
if ((re->flags & PCRE_FIRSTSET) != 0)
fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);
else if ((re->flags & PCRE_STARTLINE) != 0)