Revision: 1627
http://vcs.pcre.org/viewvc?view=rev&revision=1627
Author: zherczeg
Date: 2016-02-09 08:55:32 +0000 (Tue, 09 Feb 2016)
Log Message:
-----------
JIT compiler does not use TMP3 for matching xclass anymore.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_jit_compile.c
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2016-02-08 09:29:08 UTC (rev 1626)
+++ code/trunk/ChangeLog 2016-02-09 08:55:32 UTC (rev 1627)
@@ -44,12 +44,9 @@
10. pcre_get_substring_list() crashed if the use of \K in a match caused the
start of the match to be earlier than the end.
-11. JIT code generator for assertion matching is refactored to a separate
- function.
+11. Migrating appropriate PCRE2 JIT improvements to PCRE.
-12. Assertion code generator in JIT has been optimized.
-
-13. A pattern such as /(?<=((?C)0))/, which has a callout inside a lookbehind
+12. A pattern such as /(?<=((?C)0))/, which has a callout inside a lookbehind
assertion, caused pcre2test to generate incorrect output, and also to read
uninitialized memory (detected by ASAN or valgrind).
Modified: code/trunk/pcre_jit_compile.c
===================================================================
--- code/trunk/pcre_jit_compile.c 2016-02-08 09:29:08 UTC (rev 1626)
+++ code/trunk/pcre_jit_compile.c 2016-02-09 08:55:32 UTC (rev 1627)
@@ -277,12 +277,18 @@
struct sljit_label *matchingpath;
} braminzero_backtrack;
-typedef struct iterator_backtrack {
+typedef struct char_iterator_backtrack {
backtrack_common common;
/* Next iteration. */
struct sljit_label *matchingpath;
-} iterator_backtrack;
+} char_iterator_backtrack;
+typedef struct ref_iterator_backtrack {
+ backtrack_common common;
+ /* Next iteration. */
+ struct sljit_label *matchingpath;
+} ref_iterator_backtrack;
+
typedef struct recurse_entry {
struct recurse_entry *next;
/* Contains the function entry. */
@@ -4776,6 +4782,8 @@
} \
charoffset = (value);
+static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr);
+
static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
{
DEFINE_COMPILER;
@@ -4792,8 +4800,8 @@
#ifdef SUPPORT_UCP
BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
BOOL charsaved = FALSE;
-int typereg = TMP1, scriptreg = TMP1;
-const pcre_uint32 *other_cases;
+int typereg = TMP1;
+const sljit_ui *other_cases;
sljit_uw typeoffset;
#endif
@@ -4855,6 +4863,14 @@
switch(*cc)
{
case PT_ANY:
+ /* Any either accepts everything or ignored. */
+ if (cc[-1] == XCL_PROP)
+ {
+ compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
+ if (list == backtracks)
+ add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+ return;
+ }
break;
case PT_LAMP:
@@ -4891,6 +4907,7 @@
}
#endif
}
+SLJIT_ASSERT(compares > 0);
/* We are not necessary in utf mode even in 8 bit mode. */
cc = ccbegin;
@@ -4924,7 +4941,7 @@
}
else if ((cc[-1] & XCL_MAP) != 0)
{
- OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
+ OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
#ifdef SUPPORT_UCP
charsaved = TRUE;
#endif
@@ -4949,45 +4966,82 @@
JUMPHERE(jump);
}
- OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
+ OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
cc += 32 / sizeof(pcre_uchar);
}
#ifdef SUPPORT_UCP
-/* Simple register allocation. TMP1 is preferred if possible. */
if (needstype || needsscript)
{
if (needschar && !charsaved)
- OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
- add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
- if (needschar)
+ OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
+
+ OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
+ OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
+ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
+ OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
+ OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
+
+ /* Before anything else, we deal with scripts. */
+ if (needsscript)
{
- if (needstype)
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
+ OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
+
+ ccbegin = cc;
+
+ while (*cc != XCL_END)
{
- OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
- typereg = RETURN_ADDR;
+ if (*cc == XCL_SINGLE)
+ {
+ cc ++;
+ GETCHARINCTEST(c, cc);
+ }
+ else if (*cc == XCL_RANGE)
+ {
+ cc ++;
+ GETCHARINCTEST(c, cc);
+ GETCHARINCTEST(c, cc);
+ }
+ else
+ {
+ SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
+ cc++;
+ if (*cc == PT_SC)
+ {
+ compares--;
+ invertcmp = (compares == 0 && list != backtracks);
+ if (cc[-1] == XCL_NOTPROP)
+ invertcmp ^= 0x1;
+ jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
+ add_jump(compiler, compares > 0 ? list : backtracks, jump);
+ }
+ cc += 2;
+ }
}
- if (needsscript)
- scriptreg = TMP3;
- OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
+ cc = ccbegin;
}
- else if (needstype && needsscript)
- scriptreg = TMP3;
- /* In all other cases only one of them was specified, and that can goes to TMP1. */
- if (needsscript)
+ if (needschar)
{
- if (scriptreg == TMP1)
+ OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
+ }
+
+ if (needstype)
+ {
+ if (!needschar)
{
- OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
- OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
+ OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
}
else
{
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
- OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
+ OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
+ typereg = RETURN_ADDR;
}
}
}
@@ -5065,14 +5119,8 @@
switch(*cc)
{
case PT_ANY:
- if (list != backtracks)
- {
- if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
- continue;
- }
- else if (cc[-1] == XCL_NOTPROP)
- continue;
- jump = JUMP(SLJIT_JUMP);
+ if (!invertcmp)
+ jump = JUMP(SLJIT_JUMP);
break;
case PT_LAMP:
@@ -5096,7 +5144,8 @@
break;
case PT_SC:
- jump = CMP(SLJIT_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
+ compares++;
+ /* Do nothing. */
break;
case PT_SPACE:
@@ -5262,6 +5311,10 @@
OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
break;
+
+ default:
+ SLJIT_ASSERT_STOP();
+ break;
}
cc += 2;
}
@@ -6123,7 +6176,7 @@
int min = 0, max = 0;
BOOL minimize;
-PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
+PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
if (ref)
offset = GET2(cc, 1) << 1;
@@ -6243,7 +6296,7 @@
}
JUMPHERE(zerolength);
- BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+ BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
count_match(common);
return cc;
@@ -6289,7 +6342,7 @@
}
}
-BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
if (max > 0)
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
@@ -6303,7 +6356,7 @@
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
- CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
+ CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
}
else if (max > 0)
OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
@@ -7997,47 +8050,19 @@
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
int tmp_base, tmp_offset;
-PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
+PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, &end);
-switch(type)
+if (type != OP_EXTUNI)
{
- case OP_NOT_DIGIT:
- case OP_DIGIT:
- case OP_NOT_WHITESPACE:
- case OP_WHITESPACE:
- case OP_NOT_WORDCHAR:
- case OP_WORDCHAR:
- case OP_ANY:
- case OP_ALLANY:
- case OP_ANYBYTE:
- case OP_ANYNL:
- case OP_NOT_HSPACE:
- case OP_HSPACE:
- case OP_NOT_VSPACE:
- case OP_VSPACE:
- case OP_CHAR:
- case OP_CHARI:
- case OP_NOT:
- case OP_NOTI:
- case OP_CLASS:
- case OP_NCLASS:
tmp_base = TMP3;
tmp_offset = 0;
- break;
-
- default:
- SLJIT_ASSERT_STOP();
- /* Fall through. */
-
- case OP_EXTUNI:
- case OP_XCLASS:
- case OP_NOTPROP:
- case OP_PROP:
+ }
+else
+ {
tmp_base = SLJIT_MEM1(SLJIT_SP);
tmp_offset = POSSESSIVE0;
- break;
}
switch(opcode)
@@ -8117,7 +8142,7 @@
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS, base, offset1, SLJIT_IMM, min + 1));
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
}
- BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+ BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
break;
case OP_MINSTAR:
@@ -8127,7 +8152,7 @@
if (private_data_ptr == 0)
allocate_stack(common, 1);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+ BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
break;
case OP_MINUPTO:
@@ -8138,7 +8163,7 @@
OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
if (opcode == OP_CRMINRANGE)
add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
- BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+ BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
break;
case OP_QUERY:
@@ -8148,7 +8173,7 @@
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
if (opcode == OP_QUERY)
compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
- BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
+ BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
break;
case OP_EXACT:
@@ -8707,7 +8732,7 @@
set_jumps(current->topbacktracks, LABEL());
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
free_stack(common, 1);
- CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
+ CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
}
else
{
@@ -8727,7 +8752,7 @@
}
skip_char_back(common);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
+ JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
if (opcode == OP_CRRANGE)
set_jumps(current->topbacktracks, LABEL());
JUMPHERE(jump);
@@ -8743,7 +8768,7 @@
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
+ JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
set_jumps(jumplist, LABEL());
if (private_data_ptr == 0)
free_stack(common, 1);
@@ -8770,9 +8795,9 @@
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min + 1, label);
if (opcode == OP_CRMINRANGE && max == 0)
- JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
+ JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
else
- CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, max + 2, CURRENT_AS(iterator_backtrack)->matchingpath);
+ CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, max + 2, CURRENT_AS(char_iterator_backtrack)->matchingpath);
set_jumps(jumplist, LABEL());
if (private_data_ptr == 0)
@@ -8782,12 +8807,12 @@
case OP_QUERY:
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
- CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
+ CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
jump = JUMP(SLJIT_JUMP);
set_jumps(current->topbacktracks, LABEL());
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
- JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
+ JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
JUMPHERE(jump);
if (private_data_ptr == 0)
free_stack(common, 1);
@@ -8798,7 +8823,7 @@
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
- JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
+ JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
set_jumps(jumplist, LABEL());
JUMPHERE(jump);
if (private_data_ptr == 0)
@@ -8837,12 +8862,12 @@
set_jumps(current->topbacktracks, LABEL());
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
free_stack(common, 1);
- CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
+ CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
return;
}
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
-CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
+CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
set_jumps(current->topbacktracks, LABEL());
free_stack(common, ref ? 2 : 3);
}