Revision: 225
http://www.exim.org/viewvc/pcre2?view=rev&revision=225
Author: ph10
Date: 2015-03-14 12:20:18 +0000 (Sat, 14 Mar 2015)
Log Message:
-----------
Add string offset within the pattern to the data passed to a callout with a
string argument.
Modified Paths:
--------------
code/trunk/src/pcre2.h.in
code/trunk/src/pcre2_compile.c
code/trunk/src/pcre2_dfa_match.c
code/trunk/src/pcre2_jit_compile.c
code/trunk/src/pcre2_match.c
code/trunk/src/pcre2_printint.c
code/trunk/src/pcre2test.c
code/trunk/testdata/testoutput2
code/trunk/testdata/testoutput6
Modified: code/trunk/src/pcre2.h.in
===================================================================
--- code/trunk/src/pcre2.h.in 2015-03-14 10:47:43 UTC (rev 224)
+++ code/trunk/src/pcre2.h.in 2015-03-14 12:20:18 UTC (rev 225)
@@ -338,6 +338,7 @@
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
/* ------------------- Added for Version 1 -------------------------- */ \
+ PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
uint32_t callout_string_length; /* Length of string compiled into pattern */ \
/* ------------------------------------------------------------------ */ \
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2015-03-14 10:47:43 UTC (rev 224)
+++ code/trunk/src/pcre2_compile.c 2015-03-14 12:20:18 UTC (rev 225)
@@ -5652,17 +5652,19 @@
for the terminating zero. Any doubled delimiters within the string
make this an overestimate, but it is not worth bothering about. */
- (*lengthptr) += (ptr - start) + 2 + (1 + 3*LINK_SIZE);
+ (*lengthptr) += (ptr - start) + 2 + (1 + 4*LINK_SIZE);
}
/* In the real compile we can copy the string, knowing that it is
syntactically OK. The starting delimiter is included so that the
- client can discover it if they want. */
+ client can discover it if they want. We also pass the start offset to
+ help a script language give better error messages. */
else
{
- PCRE2_UCHAR *callout_string = code + (1 + 3*LINK_SIZE);
+ PCRE2_UCHAR *callout_string = code + (1 + 4*LINK_SIZE);
*callout_string++ = *ptr++;
+ PUT(code, 1 + 3*LINK_SIZE, (int)(ptr - cb->start_pattern)); /* Start offset */
for(;;)
{
if (*ptr == delimiter)
@@ -7302,7 +7304,7 @@
scode += 1 + LINK_SIZE;
if (*scode == OP_CALLOUT) scode += PRIV(OP_lengths)[OP_CALLOUT];
- else if (*scode == OP_CALLOUT_STR) scode += GET(scode, 1 + 2*LINK_SIZE);
+ else if (*scode == OP_CALLOUT_STR) scode += GET(scode, 1 + 2*LINK_SIZE);
switch (*scode)
{
Modified: code/trunk/src/pcre2_dfa_match.c
===================================================================
--- code/trunk/src/pcre2_dfa_match.c 2015-03-14 10:47:43 UTC (rev 224)
+++ code/trunk/src/pcre2_dfa_match.c 2015-03-14 12:20:18 UTC (rev 225)
@@ -2631,15 +2631,17 @@
if (code[LINK_SIZE + 1] == OP_CALLOUT)
{
cb.callout_number = code[2 + 3*LINK_SIZE];
+ cb.callout_string_offset = 0;
cb.callout_string = NULL;
cb.callout_string_length = 0;
}
else
{
cb.callout_number = 0;
- cb.callout_string = code + (2 + 4*LINK_SIZE) + 1;
+ cb.callout_string_offset = GET(code, 2 + 4*LINK_SIZE);
+ cb.callout_string = code + (2 + 5*LINK_SIZE) + 1;
cb.callout_string_length =
- callout_length - (1 + 3*LINK_SIZE) - 2;
+ callout_length - (1 + 4*LINK_SIZE) - 2;
}
if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)
@@ -2997,15 +2999,17 @@
if (*code == OP_CALLOUT)
{
cb.callout_number = code[1 + 2*LINK_SIZE];
+ cb.callout_string_offset = 0;
cb.callout_string = NULL;
cb.callout_string_length = 0;
}
else
{
cb.callout_number = 0;
- cb.callout_string = code + (1 + 3*LINK_SIZE) + 1;
+ cb.callout_string_offset = GET(code, 1 + 3*LINK_SIZE);
+ cb.callout_string = code + (1 + 4*LINK_SIZE) + 1;
cb.callout_string_length =
- callout_length - (1 + 3*LINK_SIZE) - 2;
+ callout_length - (1 + 4*LINK_SIZE) - 2;
}
if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)
Modified: code/trunk/src/pcre2_jit_compile.c
===================================================================
--- code/trunk/src/pcre2_jit_compile.c 2015-03-14 10:47:43 UTC (rev 224)
+++ code/trunk/src/pcre2_jit_compile.c 2015-03-14 12:20:18 UTC (rev 225)
@@ -6346,6 +6346,7 @@
? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
sljit_sw value1;
sljit_sw value2;
+sljit_sw value3;
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
@@ -6373,15 +6374,18 @@
{
value1 = 0;
value2 = 0;
+ value3 = 0;
}
else
{
- value1 = (sljit_sw) (cc + (1 + 3*LINK_SIZE) + 1);
- value2 = (callout_length - (1 + 3*LINK_SIZE + 2));
+ value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
+ value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
+ value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
}
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
+OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
/* Needed to save important temporary registers. */
Modified: code/trunk/src/pcre2_match.c
===================================================================
--- code/trunk/src/pcre2_match.c 2015-03-14 10:47:43 UTC (rev 224)
+++ code/trunk/src/pcre2_match.c 2015-03-14 12:20:18 UTC (rev 225)
@@ -1333,15 +1333,17 @@
if (*ecode == OP_CALLOUT)
{
cb.callout_number = ecode[1 + 2*LINK_SIZE];
+ cb.callout_string_offset = 0;
cb.callout_string = NULL;
cb.callout_string_length = 0;
}
else
{
cb.callout_number = 0;
- cb.callout_string = ecode + (1 + 3*LINK_SIZE) + 1;
+ cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
+ cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
cb.callout_string_length =
- callout_length - (1 + 3*LINK_SIZE) - 2;
+ callout_length - (1 + 4*LINK_SIZE) - 2;
}
if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)
@@ -1757,15 +1759,17 @@
if (*ecode == OP_CALLOUT)
{
cb.callout_number = ecode[1 + 2*LINK_SIZE];
+ cb.callout_string_offset = 0;
cb.callout_string = NULL;
cb.callout_string_length = 0;
}
else
{
cb.callout_number = 0;
- cb.callout_string = ecode + (1 + 3*LINK_SIZE) + 1;
+ cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
+ cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
cb.callout_string_length =
- callout_length - (1 + 3*LINK_SIZE) - 2;
+ callout_length - (1 + 4*LINK_SIZE) - 2;
}
if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)
Modified: code/trunk/src/pcre2_printint.c
===================================================================
--- code/trunk/src/pcre2_printint.c 2015-03-14 10:47:43 UTC (rev 224)
+++ code/trunk/src/pcre2_printint.c 2015-03-14 12:20:18 UTC (rev 225)
@@ -600,18 +600,18 @@
break;
case OP_CALLOUT_STR:
- c = code[1 + 3*LINK_SIZE];
+ c = code[1 + 4*LINK_SIZE];
fprintf(f, " %s %c", OP_names[*code], c);
extra = GET(code, 1 + 2*LINK_SIZE);
- print_custring(f, code + 2 + 3*LINK_SIZE);
-
+ print_custring(f, code + 2 + 4*LINK_SIZE);
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
if (c == PRIV(callout_start_delims)[i])
{
c = PRIV(callout_end_delims)[i];
break;
}
- fprintf(f, "%c %d %d", c, GET(code, 1), GET(code, 1 + LINK_SIZE));
+ fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
+ GET(code, 1 + LINK_SIZE));
break;
case OP_PROP:
Modified: code/trunk/src/pcre2test.c
===================================================================
--- code/trunk/src/pcre2test.c 2015-03-14 10:47:43 UTC (rev 224)
+++ code/trunk/src/pcre2test.c 2015-03-14 12:20:18 UTC (rev 225)
@@ -4546,7 +4546,8 @@
if (cb->callout_string != NULL)
{
uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
- fprintf(outfile, "Callout: %c", delimiter);
+ fprintf(outfile, "Callout (%lu): %c",
+ (unsigned long int)cb->callout_string_offset, delimiter);
PCHARSV(cb->callout_string, 0,
cb->callout_string_length, utf, outfile);
for (i = 0; callout_start_delims[i] != 0; i++)
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2015-03-14 10:47:43 UTC (rev 224)
+++ code/trunk/testdata/testoutput2 2015-03-14 12:20:18 UTC (rev 225)
@@ -13987,7 +13987,7 @@
------------------------------------------------------------------
Bra
a
- CalloutStr "a)b"c" 13 0
+ CalloutStr "a)b"c" 5 13 0
Ket
End
------------------------------------------------------------------
@@ -13996,18 +13996,18 @@
------------------------------------------------------------------
Bra
ab
- CalloutStr " any text with spaces " 30 1
+ CalloutStr " any text with spaces " 6 30 1
cde
Ket
End
------------------------------------------------------------------
abcde
-Callout: " any text with spaces "
+Callout (6): " any text with spaces "
--->abcde
^ ^ c
0: abcde
12abcde
-Callout: " any text with spaces "
+Callout (6): " any text with spaces "
--->12abcde
^ ^ c
0: abcde
@@ -14021,7 +14021,7 @@
/^a(b)c(?C"AB")def/
abcdef
-Callout: "AB"
+Callout (10): "AB"
--->abcdef
^ ^ d
0: abcdef
@@ -14046,13 +14046,13 @@
b
Ket
c
- CalloutStr {AB} 14 1
+ CalloutStr {AB} 10 14 1
def
Ket
End
------------------------------------------------------------------
abcdef\=callout_capture
-Callout: {AB} last capture = 1
+Callout (10): {AB} last capture = 1
0: <unset>
1: b
--->abcdef
@@ -14063,14 +14063,14 @@
/(?C`a``b`)(?C'a''b')(?C"a""b")(?C^a^^b^)(?C%a%%b%)(?C#a##b#)(?C$a$$b$)(?C{a}}b})/B
------------------------------------------------------------------
Bra
- CalloutStr `a`b` 10 0
- CalloutStr 'a'b' 20 0
- CalloutStr "a"b" 30 0
- CalloutStr ^a^b^ 40 0
- CalloutStr %a%b% 50 0
- CalloutStr #a#b# 60 0
- CalloutStr $a$b$ 70 0
- CalloutStr {a}b} 80 0
+ CalloutStr `a`b` 4 10 0
+ CalloutStr 'a'b' 14 20 0
+ CalloutStr "a"b" 24 30 0
+ CalloutStr ^a^b^ 34 40 0
+ CalloutStr %a%b% 44 50 0
+ CalloutStr #a#b# 54 60 0
+ CalloutStr $a$b$ 64 70 0
+ CalloutStr {a}b} 74 80 0
Ket
End
------------------------------------------------------------------
@@ -14080,15 +14080,15 @@
Bra
Bra
a
- CalloutStr `code` 14 0
+ CalloutStr `code` 8 14 0
Ket
Bra
a
- CalloutStr `code` 14 0
+ CalloutStr `code` 8 14 0
Ket
Bra
a
- CalloutStr `code` 14 0
+ CalloutStr `code` 8 14 0
Ket
Ket
End
@@ -14124,7 +14124,7 @@
Bra
^
Cond
- CalloutStr $abc$ 12 7
+ CalloutStr $abc$ 7 12 7
Assert
abc
Ket
@@ -14136,35 +14136,35 @@
End
------------------------------------------------------------------
abcdefg
-Callout: $abc$
+Callout (7): $abc$
--->abcdefg
^ (?=abc)
0: abcd
xyz123
-Callout: $abc$
+Callout (7): $abc$
--->xyz123
^ (?=abc)
0: xyz
/^ab(?C'first')cd(?C"second")ef/
abcdefg
-Callout: 'first'
+Callout (7): 'first'
--->abcdefg
^ ^ c
-Callout: "second"
+Callout (20): "second"
--->abcdefg
^ ^ e
0: abcdef
/(?:a(?C`code`)){3}X/
aaaXY
-Callout: `code`
+Callout (8): `code`
--->aaaXY
^^ )
-Callout: `code`
+Callout (8): `code`
--->aaaXY
^ ^ )
-Callout: `code`
+Callout (8): `code`
--->aaaXY
^ ^ )
0: aaaX
Modified: code/trunk/testdata/testoutput6
===================================================================
--- code/trunk/testdata/testoutput6 2015-03-14 10:47:43 UTC (rev 224)
+++ code/trunk/testdata/testoutput6 2015-03-14 12:20:18 UTC (rev 225)
@@ -7777,18 +7777,18 @@
------------------------------------------------------------------
Bra
ab
- CalloutStr " any text with spaces " 30 1
+ CalloutStr " any text with spaces " 6 30 1
cde
Ket
End
------------------------------------------------------------------
abcde
-Callout: " any text with spaces "
+Callout (6): " any text with spaces "
--->abcde
^ ^ c
0: abcde
12abcde
-Callout: " any text with spaces "
+Callout (6): " any text with spaces "
--->12abcde
^ ^ c
0: abcde
@@ -7801,7 +7801,7 @@
/^a(b)c(?C"AB")def/
abcdef
-Callout: "AB"
+Callout (10): "AB"
--->abcdef
^ ^ d
0: abcdef
@@ -7823,13 +7823,13 @@
b
Ket
c
- CalloutStr {AB} 14 1
+ CalloutStr {AB} 10 14 1
def
Ket
End
------------------------------------------------------------------
abcdef\=callout_capture
-Callout: {AB} last capture = 0
+Callout (10): {AB} last capture = 0
0:
--->abcdef
^ ^ d
@@ -7865,7 +7865,7 @@
Bra
^
Cond
- CalloutStr $abc$ 12 7
+ CalloutStr $abc$ 7 12 7
Assert
abc
Ket
@@ -7877,35 +7877,35 @@
End
------------------------------------------------------------------
abcdefg
-Callout: $abc$
+Callout (7): $abc$
--->abcdefg
^ (?=abc)
0: abcd
xyz123
-Callout: $abc$
+Callout (7): $abc$
--->xyz123
^ (?=abc)
0: xyz
/^ab(?C'first')cd(?C"second")ef/
abcdefg
-Callout: 'first'
+Callout (7): 'first'
--->abcdefg
^ ^ c
-Callout: "second"
+Callout (20): "second"
--->abcdefg
^ ^ e
0: abcdef
/(?:a(?C`code`)){3}X/
aaaXY
-Callout: `code`
+Callout (8): `code`
--->aaaXY
^^ )
-Callout: `code`
+Callout (8): `code`
--->aaaXY
^ ^ )
-Callout: `code`
+Callout (8): `code`
--->aaaXY
^ ^ )
0: aaaX