Revision: 929
http://vcs.pcre.org/viewvc?view=rev&revision=929
Author: zherczeg
Date: 2012-02-24 11:07:47 +0000 (Fri, 24 Feb 2012)
Log Message:
-----------
(*MARK) support, set_SOM optimization and other fixes in JIT
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_exec.c
code/trunk/pcre_internal.h
code/trunk/pcre_jit_compile.c
code/trunk/pcre_jit_test.c
code/trunk/pcretest.c
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2012-02-23 17:57:01 UTC (rev 928)
+++ code/trunk/ChangeLog 2012-02-24 11:07:47 UTC (rev 929)
@@ -16,33 +16,35 @@
5. Fixed several bugs concerned with partial matching of items that consist
of more than one character:
-
+
(a) /^(..)\1/ did not partially match "aba" because checking references was
done on an "all or nothing" basis. This also applied to repeated
references.
-
+
(b) \R did not give a hard partial match if \r was found at the end of the
subject.
-
+
(c) \X did not give a hard partial match after matching one or more
characters at the end of the subject.
-
+
(d) When newline was set to CRLF, a pattern such as /a$/ did not recognize
a partial match for the string "\r".
-
+
(e) When newline was set to CRLF, the metacharacter "." did not recognize
a partial match for a CR character at the end of the subject string.
-
+
6. If JIT is requested using /S++ or -s++ (instead of just /S+ or -s+) when
running pcretest, the text "(JIT)" added to the output whenever JIT is
actually used to run the match.
-
+
7. Individual JIT compile options can be set in pcretest by following -s+[+]
or /S+[+] with a digit between 1 and 7.
8. OP_NOT now supports any UTF character not just single-byte ones.
+9. (*MARK) control verb is now supported by the JIT compiler.
+
Version 8.30 04-February-2012
-----------------------------
Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c 2012-02-23 17:57:01 UTC (rev 928)
+++ code/trunk/pcre_exec.c 2012-02-24 11:07:47 UTC (rev 929)
@@ -6430,11 +6430,12 @@
rc = PRIV(jit_exec)(re, extra_data->executable_jit,
(const pcre_uchar *)subject, length, start_offset, options,
((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
- ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
-
+ ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount,
+ ((extra_data->flags & PCRE_EXTRA_MARK) != 0) ? extra_data->mark : NULL);
+
/* PCRE_ERROR_NULL means that the selected normal or partial matching
mode is not compiled. In this case we simply fallback to interpreter. */
-
+
if (rc != PCRE_ERROR_NULL) return rc;
}
#endif
Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h 2012-02-23 17:57:01 UTC (rev 928)
+++ code/trunk/pcre_internal.h 2012-02-24 11:07:47 UTC (rev 929)
@@ -2295,7 +2295,8 @@
#ifdef SUPPORT_JIT
extern void PRIV(jit_compile)(const REAL_PCRE *, PUBL(extra) *, int);
extern int PRIV(jit_exec)(const REAL_PCRE *, void *,
- const pcre_uchar *, int, int, int, int, int *, int);
+ const pcre_uchar *, int, int, int, int, int *, int,
+ pcre_uchar **);
extern void PRIV(jit_free)(void *);
extern int PRIV(jit_get_size)(void *);
extern const char* PRIV(jit_get_target)(void);
Modified: code/trunk/pcre_jit_compile.c
===================================================================
--- code/trunk/pcre_jit_compile.c 2012-02-23 17:57:01 UTC (rev 928)
+++ code/trunk/pcre_jit_compile.c 2012-02-24 11:07:47 UTC (rev 929)
@@ -152,7 +152,8 @@
const pcre_uchar *begin;
const pcre_uchar *end;
int *offsets;
- pcre_uchar *ptr;
+ pcre_uchar *uchar_ptr;
+ pcre_uchar *mark_ptr;
/* Everything else after. */
int offsetcount;
int calllimit;
@@ -287,6 +288,8 @@
int hit_start;
/* End pointer of the first line. */
int first_line_end;
+ /* Points to the marked string. */
+ int mark_ptr;
/* Other */
const pcre_uint8 *fcc;
@@ -296,6 +299,7 @@
int newline;
int bsr_nltype;
int endonly;
+ BOOL has_set_som;
sljit_w ctypes;
sljit_uw name_table;
sljit_w name_count;
@@ -370,7 +374,8 @@
enum {
frame_end = 0,
- frame_setstrbegin = -1
+ frame_setstrbegin = -1,
+ frame_setmark = -2
};
/* Undefine sljit macros. */
@@ -650,6 +655,9 @@
case OP_SCBRAPOS:
return cc + 1 + LINK_SIZE + IMM2_SIZE;
+ case OP_MARK:
+ return cc + 1 + 2 + cc[1];
+
default:
return NULL;
}
@@ -664,6 +672,11 @@
{
switch(*cc)
{
+ case OP_SET_SOM:
+ common->has_set_som = TRUE;
+ cc += 1;
+ break;
+
case OP_ASSERT:
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
@@ -702,6 +715,15 @@
cc += 1 + LINK_SIZE;
break;
+ case OP_MARK:
+ if (common->mark_ptr == 0)
+ {
+ common->mark_ptr = common->ovector_start;
+ common->ovector_start += sizeof(sljit_w);
+ }
+ cc += 1 + 2 + cc[1];
+ break;
+
default:
cc = next_opcode(common, cc);
if (cc == NULL)
@@ -767,7 +789,8 @@
pcre_uchar *ccend = bracketend(cc);
int length = 0;
BOOL possessive = FALSE;
-BOOL setsom_found = FALSE;
+BOOL setsom_found = recursive;
+BOOL setmark_found = recursive;
if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
{
@@ -781,15 +804,39 @@
switch(*cc)
{
case OP_SET_SOM:
- case OP_RECURSE:
+ SLJIT_ASSERT(common->has_set_som);
if (!setsom_found)
{
length += 2;
setsom_found = TRUE;
}
- cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
+ cc += 1;
break;
+ case OP_MARK:
+ SLJIT_ASSERT(common->mark_ptr != 0);
+ if (!setmark_found)
+ {
+ length += 2;
+ setmark_found = TRUE;
+ }
+ cc += 1 + 2 + cc[1];
+ break;
+
+ case OP_RECURSE:
+ if (common->has_set_som && !setsom_found)
+ {
+ length += 2;
+ setsom_found = TRUE;
+ }
+ if (common->mark_ptr != 0 && !setmark_found)
+ {
+ length += 2;
+ setmark_found = TRUE;
+ }
+ cc += 1 + LINK_SIZE;
+ break;
+
case OP_CBRA:
case OP_CBRAPOS:
case OP_SCBRA:
@@ -817,7 +864,8 @@
{
DEFINE_COMPILER;
pcre_uchar *ccend = bracketend(cc);
-BOOL setsom_found = FALSE;
+BOOL setsom_found = recursive;
+BOOL setmark_found = recursive;
int offset;
/* >= 1 + shortest item size (2) */
@@ -832,7 +880,7 @@
switch(*cc)
{
case OP_SET_SOM:
- case OP_RECURSE:
+ SLJIT_ASSERT(common->has_set_som);
if (!setsom_found)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
@@ -842,9 +890,45 @@
stackpos += (int)sizeof(sljit_w);
setsom_found = TRUE;
}
- cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
+ cc += 1;
break;
+ case OP_MARK:
+ SLJIT_ASSERT(common->mark_ptr != 0);
+ if (!setmark_found)
+ {
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
+ stackpos += (int)sizeof(sljit_w);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
+ stackpos += (int)sizeof(sljit_w);
+ setmark_found = TRUE;
+ }
+ cc += 1 + 2 + cc[1];
+ break;
+
+ case OP_RECURSE:
+ if (common->has_set_som && !setsom_found)
+ {
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
+ stackpos += (int)sizeof(sljit_w);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
+ stackpos += (int)sizeof(sljit_w);
+ setsom_found = TRUE;
+ }
+ if (common->mark_ptr != 0 && !setmark_found)
+ {
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
+ stackpos += (int)sizeof(sljit_w);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
+ stackpos += (int)sizeof(sljit_w);
+ setmark_found = TRUE;
+ }
+ cc += 1 + LINK_SIZE;
+ break;
+
case OP_CBRA:
case OP_CBRAPOS:
case OP_SCBRA:
@@ -1258,7 +1342,11 @@
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
+if (common->mark_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
+if (common->mark_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0);
OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START);
@@ -2320,6 +2408,17 @@
JUMPTO(SLJIT_JUMP, mainloop);
JUMPHERE(jump);
+if (common->mark_ptr != 0)
+ {
+ jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
+ JUMPTO(SLJIT_JUMP, mainloop);
+
+ JUMPHERE(jump);
+ }
+
/* Unknown command. */
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
JUMPTO(SLJIT_JUMP, mainloop);
@@ -2612,7 +2711,7 @@
{
/* This function would be ineffective to do in JIT level. */
int c1, c2;
-const pcre_uchar *src2 = args->ptr;
+const pcre_uchar *src2 = args->uchar_ptr;
const pcre_uchar *end2 = args->end;
while (src1 < end1)
@@ -3815,7 +3914,7 @@
/* Needed to save important temporary registers. */
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, ptr), STR_PTR, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
if (common->mode == JIT_COMPILE)
@@ -4050,9 +4149,20 @@
common->entries = entry;
}
-OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
-allocate_stack(common, 1);
-OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+if (common->has_set_som && common->mark_ptr != 0)
+ {
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
+ allocate_stack(common, 2);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
+ }
+else if (common->has_set_som || common->mark_ptr != 0)
+ {
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (OVECTOR(0)) : common->mark_ptr);
+ allocate_stack(common, 1);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+ }
if (entry->entry == NULL)
add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
@@ -5519,10 +5629,10 @@
case OP_SET_SOM:
PUSH_FALLBACK_NOVALUE(sizeof(fallback_common), cc);
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
allocate_stack(common, 1);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
cc++;
break;
@@ -5687,6 +5797,19 @@
cc = compile_bracketpos_hotpath(common, cc, parent);
break;
+ case OP_MARK:
+ PUSH_FALLBACK_NOVALUE(sizeof(fallback_common), cc);
+ SLJIT_ASSERT(common->mark_ptr != 0);
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
+ allocate_stack(common, 1);
+ OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)(cc + 2));
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
+ cc += 1 + 2 + cc[1];
+ break;
+
case OP_FAIL:
case OP_ACCEPT:
case OP_ASSERT_ACCEPT:
@@ -5880,9 +6003,21 @@
DEFINE_COMPILER;
set_jumps(current->topfallbacks, LABEL());
-OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
-free_stack(common, 1);
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
+
+if (common->has_set_som && common->mark_ptr != 0)
+ {
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+ free_stack(common, 2);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0);
+ }
+else if (common->has_set_som || common->mark_ptr != 0)
+ {
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+ free_stack(common, 1);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (OVECTOR(0)) : common->mark_ptr, TMP2, 0);
+ }
}
static void compile_assert_fallbackpath(compiler_common *common, struct fallback_common *current)
@@ -6518,6 +6653,12 @@
compile_braminzero_fallbackpath(common, current);
break;
+ case OP_MARK:
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+ free_stack(common, 1);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0);
+ break;
+
case OP_FAIL:
case OP_ACCEPT:
case OP_ASSERT_ACCEPT:
@@ -6561,7 +6702,7 @@
copy_locals(common, ccbegin, ccend, TRUE, localsize + framesize + alternativesize, framesize + alternativesize);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head, STACK_TOP, 0);
if (needsframe)
- init_frame(common, cc, framesize + alternativesize - 1, alternativesize, FALSE);
+ init_frame(common, cc, framesize + alternativesize - 1, alternativesize, TRUE);
if (alternativesize > 0)
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
@@ -6604,11 +6745,9 @@
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head);
if (needsframe)
{
- OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP3, 0);
}
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
@@ -6799,6 +6938,8 @@
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
/* Copy the limit of allowed recursions. */
OP1(SLJIT_MOV, CALL_COUNT, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT);
+if (common->mark_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
/* Copy the beginning of the string. */
if (mode == JIT_PARTIAL_SOFT_COMPILE)
{
@@ -6866,7 +7007,7 @@
{
if (mode == JIT_COMPILE && study != NULL && study->minlength > 1)
{
- OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
+ OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength + 1));
CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_END, 0, mainloop);
}
else
@@ -6877,7 +7018,7 @@
SLJIT_ASSERT(common->first_line_end != 0);
if (mode == JIT_COMPILE && study != NULL && study->minlength > 1)
{
- OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
+ OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength + 1));
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER);
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
@@ -7064,7 +7205,7 @@
int
PRIV(jit_exec)(const REAL_PCRE *re, void *executable_funcs,
const pcre_uchar *subject, int length, int start_offset, int options,
- int match_limit, int *offsets, int offsetcount)
+ int match_limit, int *offsets, int offsetcount, pcre_uchar **mark_ptr)
{
executable_functions *functions = (executable_functions *)executable_funcs;
union {
@@ -7089,7 +7230,9 @@
arguments.str = subject + start_offset;
arguments.begin = subject;
arguments.end = subject + length;
-arguments.calllimit = match_limit; /* JIT decreases this value less times. */
+arguments.mark_ptr = NULL;
+/* JIT decreases this value less frequently than the interpreter. */
+arguments.calllimit = match_limit;
arguments.notbol = (options & PCRE_NOTBOL) != 0;
arguments.noteol = (options & PCRE_NOTEOL) != 0;
arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
@@ -7124,6 +7267,9 @@
if (retval * 2 > offsetcount)
retval = 0;
+if (mark_ptr != NULL)
+ *mark_ptr = arguments.mark_ptr;
+
return retval;
}
Modified: code/trunk/pcre_jit_test.c
===================================================================
--- code/trunk/pcre_jit_test.c 2012-02-23 17:57:01 UTC (rev 928)
+++ code/trunk/pcre_jit_test.c 2012-02-24 11:07:47 UTC (rev 929)
@@ -663,6 +663,22 @@
{ MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
{ MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
+ /* (*MARK) verb. */
+ { MUA, 0, "a(*MARK:aa)a", "ababaa" },
+ { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
+ { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
+ { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
+ { MUA, 0, "(?>a(*:aa))b|ac", "ac" },
+ { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
+ { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
+ { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
+ { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
+ { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
+ { MUA, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
+ { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
+ { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
+ { MUA, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
+
/* Deep recursion. */
{ MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
{ MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
@@ -876,23 +892,28 @@
int is_successful, is_ascii_pattern, is_ascii_input;
int total = 0;
int successful = 0;
+ int successful_row = 0;
int counter = 0;
int study_mode;
#ifdef SUPPORT_PCRE8
pcre *re8;
pcre_extra *extra8;
+ pcre_extra dummy_extra8;
int ovector8_1[32];
int ovector8_2[32];
int return_value8_1, return_value8_2;
+ unsigned char *mark8_1, *mark8_2;
int utf8 = 0, ucp8 = 0;
int disabled_flags8 = 0;
#endif
#ifdef SUPPORT_PCRE16
pcre16 *re16;
pcre16_extra *extra16;
+ pcre16_extra dummy_extra16;
int ovector16_1[32];
int ovector16_2[32];
int return_value16_1, return_value16_2;
+ PCRE_UCHAR16 *mark16_1, *mark16_2;
int utf16 = 0, ucp16 = 0;
int disabled_flags16 = 0;
int length16;
@@ -970,6 +991,7 @@
pcre_free(re8);
re8 = NULL;
}
+ extra8->flags |= PCRE_EXTRA_MARK;
} else if (((utf8 && ucp8) || is_ascii_pattern) && !(current->start_offset & F_NO8))
printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
#endif
@@ -1000,6 +1022,7 @@
pcre16_free(re16);
re16 = NULL;
}
+ extra16->flags |= PCRE_EXTRA_MARK;
} else if (((utf16 && ucp16) || is_ascii_pattern) && !(current->start_offset & F_NO16))
printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
#endif
@@ -1022,10 +1045,16 @@
for (i = 0; i < 32; ++i)
ovector8_2[i] = -2;
if (re8) {
+ mark8_1 = NULL;
+ mark8_2 = NULL;
setstack8(extra8);
+ extra8->mark = &mark8_1;
return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
- return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
+ memset(&dummy_extra8, 0, sizeof(pcre_extra));
+ dummy_extra8.flags = PCRE_EXTRA_MARK;
+ dummy_extra8.mark = &mark8_2;
+ return_value8_2 = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32);
}
#endif
@@ -1038,14 +1067,20 @@
for (i = 0; i < 32; ++i)
ovector16_2[i] = -2;
if (re16) {
+ mark16_1 = NULL;
+ mark16_2 = NULL;
setstack16(extra16);
if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
else
length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
+ extra16->mark = &mark16_1;
return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
- return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset & OFFSET_MASK,
+ memset(&dummy_extra16, 0, sizeof(pcre16_extra));
+ dummy_extra16.flags = PCRE_EXTRA_MARK;
+ dummy_extra16.mark = &mark16_2;
+ return_value16_2 = pcre16_exec(re16, &dummy_extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32);
}
#endif
@@ -1173,8 +1208,22 @@
#endif
}
- if (is_successful)
- successful++;
+ if (is_successful) {
+#ifdef SUPPORT_PCRE8
+ if (mark8_1 != mark8_2) {
+ printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
+ total, current->pattern, current->input);
+ is_successful = 0;
+ }
+#endif
+#ifdef SUPPORT_PCRE16
+ if (mark16_1 != mark16_2) {
+ printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
+ total, current->pattern, current->input);
+ is_successful = 0;
+ }
+#endif
+ }
#ifdef SUPPORT_PCRE8
if (re8) {
@@ -1189,7 +1238,17 @@
}
#endif
- printf(".");
+ if (is_successful) {
+ successful++;
+ successful_row++;
+ printf(".");
+ if (successful_row >= 60) {
+ successful_row = 0;
+ printf("\n");
+ }
+ } else
+ successful_row = 0;
+
fflush(stdout);
current++;
}
Modified: code/trunk/pcretest.c
===================================================================
--- code/trunk/pcretest.c 2012-02-23 17:57:01 UTC (rev 928)
+++ code/trunk/pcretest.c 2012-02-24 11:07:47 UTC (rev 929)
@@ -1,4 +1,4 @@
-/*.************************************************
+/*************************************************
* PCRE testing program *
*************************************************/