Revision: 220
http://www.exim.org/viewvc/pcre2?view=rev&revision=220
Author: ph10
Date: 2015-03-11 17:44:16 +0000 (Wed, 11 Mar 2015)
Log Message:
-----------
Code for callouts with string arguments. Documentation not yet updated.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/configure.ac
code/trunk/src/pcre2.h.in
code/trunk/src/pcre2_auto_possess.c
code/trunk/src/pcre2_compile.c
code/trunk/src/pcre2_dfa_match.c
code/trunk/src/pcre2_error.c
code/trunk/src/pcre2_internal.h
code/trunk/src/pcre2_jit_compile.c
code/trunk/src/pcre2_match.c
code/trunk/src/pcre2_printint.c
code/trunk/src/pcre2_study.c
code/trunk/src/pcre2_tables.c
code/trunk/src/pcre2test.c
code/trunk/testdata/testinput2
code/trunk/testdata/testinput6
code/trunk/testdata/testoutput2
code/trunk/testdata/testoutput6
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-03-06 15:19:36 UTC (rev 219)
+++ code/trunk/ChangeLog 2015-03-11 17:44:16 UTC (rev 220)
@@ -1,6 +1,12 @@
Change Log for PCRE2
--------------------
+Version 10.20 xx-xx-2015
+------------------------
+
+1. Callouts with string arguments have been added.
+
+
Version 10.10 06-March-2015
---------------------------
Modified: code/trunk/configure.ac
===================================================================
--- code/trunk/configure.ac 2015-03-06 15:19:36 UTC (rev 219)
+++ code/trunk/configure.ac 2015-03-11 17:44:16 UTC (rev 220)
@@ -9,9 +9,9 @@
dnl be defined as -RC2, for example. For real releases, it should be empty.
m4_define(pcre2_major, [10])
-m4_define(pcre2_minor, [10])
-m4_define(pcre2_prerelease, [])
-m4_define(pcre2_date, [2015-03-06])
+m4_define(pcre2_minor, [20])
+m4_define(pcre2_prerelease, [-RC1])
+m4_define(pcre2_date, [2015-03-11])
# NOTE: The CMakeLists.txt file searches for the above variables in the first
# 50 lines of this file. Please update that if the variables above are moved.
Modified: code/trunk/src/pcre2.h.in
===================================================================
--- code/trunk/src/pcre2.h.in 2015-03-06 15:19:36 UTC (rev 219)
+++ code/trunk/src/pcre2.h.in 2015-03-11 17:44:16 UTC (rev 220)
@@ -337,6 +337,9 @@
PCRE2_SIZE current_position; /* Where we currently are in the subject */ \
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
+ /* ------------------- Added for Version 1 -------------------------- */ \
+ PCRE2_SPTR callout_string; /* String compiled into pattern */ \
+ uint32_t callout_string_length; /* Length of string compiled into pattern */ \
/* ------------------------------------------------------------------ */ \
} pcre2_callout_block;
Modified: code/trunk/src/pcre2_auto_possess.c
===================================================================
--- code/trunk/src/pcre2_auto_possess.c 2015-03-06 15:19:36 UTC (rev 219)
+++ code/trunk/src/pcre2_auto_possess.c 2015-03-11 17:44:16 UTC (rev 220)
@@ -604,6 +604,12 @@
continue;
}
+ if (c == OP_CALLOUT_STR)
+ {
+ code += GET(code, 1 + 2*LINK_SIZE);
+ continue;
+ }
+
if (c == OP_ALT)
{
do code += GET(code, 1); while (*code == OP_ALT);
@@ -1234,6 +1240,10 @@
code += 2;
break;
+ case OP_CALLOUT_STR:
+ code += GET(code, 1 + 2*LINK_SIZE);
+ break;
+
#ifdef SUPPORT_WIDE_CHARS
case OP_XCLASS:
code += GET(code, 1);
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2015-03-06 15:19:36 UTC (rev 219)
+++ code/trunk/src/pcre2_compile.c 2015-03-11 17:44:16 UTC (rev 220)
@@ -573,7 +573,8 @@
ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50,
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
- ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80 };
+ ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
+ ERR81, ERR82 };
/* This is a table of start-of-pattern options such as (*UTF) and settings such
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
@@ -617,7 +618,6 @@
{ (uint8_t *)STRING_BSR_UNICODE_RIGHTPAR, 12, PSO_BSR, PCRE2_BSR_UNICODE }
};
-
/* This table is used when converting repeating opcodes into possessified
versions as a result of an explicit possessive quantifier such as ++. A zero
value means there is no possessified version - in those cases the item in
@@ -730,11 +730,11 @@
static PCRE2_UCHAR *
auto_callout(PCRE2_UCHAR *code, PCRE2_SPTR ptr, compile_block *cb)
{
-*code++ = OP_CALLOUT;
-*code++ = 255;
-PUT(code, 0, ptr - cb->start_pattern); /* Pattern offset */
-PUT(code, LINK_SIZE, 0); /* Default length */
-return code + 2 * LINK_SIZE;
+code[0] = OP_CALLOUT;
+PUT(code, 1, ptr - cb->start_pattern); /* Pattern offset */
+PUT(code, 1 + LINK_SIZE, 0); /* Default length */
+code[1 + 2*LINK_SIZE] = 255;
+return code + PRIV(OP_lengths)[OP_CALLOUT];
}
@@ -759,8 +759,8 @@
complete_callout(PCRE2_UCHAR *previous_callout, PCRE2_SPTR ptr,
compile_block *cb)
{
-size_t length = ptr - cb->start_pattern - GET(previous_callout, 2);
-PUT(previous_callout, 2 + LINK_SIZE, length);
+size_t length = ptr - cb->start_pattern - GET(previous_callout, 1);
+PUT(previous_callout, 1 + LINK_SIZE, length);
}
@@ -909,6 +909,10 @@
cc += PRIV(OP_lengths)[*cc];
break;
+ case OP_CALLOUT_STR:
+ cc += GET(cc, 1 + 2*LINK_SIZE);
+ break;
+
/* Handle literal characters */
case OP_CHAR:
@@ -1157,6 +1161,10 @@
code += PRIV(OP_lengths)[*code];
break;
+ case OP_CALLOUT_STR:
+ code += GET(code, 1 + 2*LINK_SIZE);
+ break;
+
default:
return code;
}
@@ -2279,11 +2287,13 @@
if (c == OP_END) return NULL;
- /* XCLASS is used for classes that cannot be represented just by a bit
- map. This includes negated single high-valued characters. The length in
- the table is zero; the actual length is stored in the compiled code. */
+ /* XCLASS is used for classes that cannot be represented just by a bit map.
+ This includes negated single high-valued characters. CALLOUT_STR is used for
+ callouts with string arguments. In both cases the length in the table is
+ zero; the actual length is stored in the compiled code. */
if (c == OP_XCLASS) code += GET(code, 1);
+ else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
/* Handle recursion */
@@ -2442,11 +2452,13 @@
if (c == OP_END) return NULL;
if (c == OP_RECURSE) return code;
- /* XCLASS is used for classes that cannot be represented just by a bit
- map. This includes negated single high-valued characters. The length in
- the table is zero; the actual length is stored in the compiled code. */
+ /* XCLASS is used for classes that cannot be represented just by a bit map.
+ This includes negated single high-valued characters. CALLOUT_STR is used for
+ callouts with string arguments. In both cases the length in the table is
+ zero; the actual length is stored in the compiled code. */
if (c == OP_XCLASS) code += GET(code, 1);
+ else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
/* Otherwise, we can get the item's length from the table, except that for
repeated character types, we have to test for \p and \P, which have an extra
@@ -5558,30 +5570,124 @@
/* ------------------------------------------------------------ */
- case CHAR_C: /* Callout - may be followed by digits; */
+ case CHAR_C: /* Callout */
previous_callout = code; /* Save for later completion */
after_manual_callout = 1; /* Skip one item before completing */
- *code++ = OP_CALLOUT;
- {
- int n = 0;
- ptr++;
- while(IS_DIGIT(*ptr))
- n = n * 10 + *ptr++ - CHAR_0;
- if (*ptr != CHAR_RIGHT_PARENTHESIS)
+ ptr++; /* Character after (?C */
+
+ /* A callout may have a string argument, delimited by one of a fixed
+ number of characters, or an undelimited numerical argument, or no
+ argument, which is the same as (?C0). Different opcodes are used for
+ the two cases. */
+
+ if (*ptr != CHAR_RIGHT_PARENTHESIS && !IS_DIGIT(*ptr))
+ {
+ uint32_t delimiter = 0;
+
+ for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
{
- *errorcodeptr = ERR39;
- goto FAILED;
+ if (*ptr == PRIV(callout_start_delims)[i])
+ {
+ delimiter = PRIV(callout_end_delims)[i];
+ break;
+ }
}
- if (n > 255)
+
+ if (delimiter == 0)
{
- *errorcodeptr = ERR38;
- goto FAILED;
+ *errorcodeptr = ERR82;
+ goto FAILED;
+ }
+
+ /* During the pre-compile phase, we parse the string and update the
+ length. There is no need to generate any code. */
+
+ if (lengthptr != NULL) /* Only check the string */
+ {
+ PCRE2_SPTR start = ptr;
+ do
+ {
+ if (++ptr >= cb->end_pattern)
+ {
+ *errorcodeptr = ERR81;
+ ptr = start; /* To give a more useful message */
+ goto FAILED;
+ }
+ if (ptr[0] == delimiter && ptr[1] == delimiter) ptr += 2;
+ }
+ while (ptr[0] != delimiter);
+
+ /* Start points to the opening delimiter, ptr points to the
+ closing delimiter. We must allow for including the delimiter and
+ for the terminating zero. Any doubled delimiters within the string
+ make this an overestimate, but it is not worth bothering about. */
+
+ (*lengthptr) += (ptr - start) + 2 + (1 + 3*LINK_SIZE);
}
- *code++ = n;
- PUT(code, 0, (int)(ptr - cb->start_pattern + 1)); /* Pattern offset */
- PUT(code, LINK_SIZE, 0); /* Default length */
- code += 2 * LINK_SIZE;
+
+ /* In the real compile we can copy the string, knowing that it is
+ syntactically OK. The starting delimiter is included so that the
+ client can discover it if they want. */
+
+ else
+ {
+ PCRE2_UCHAR *callout_string = code + (1 + 3*LINK_SIZE);
+ *callout_string++ = *ptr++;
+ for(;;)
+ {
+ if (*ptr == delimiter)
+ {
+ if (ptr[1] == delimiter) ptr++; else break;
+ }
+ *callout_string++ = *ptr++;
+ }
+ *callout_string++ = CHAR_NULL;
+ code[0] = OP_CALLOUT_STR;
+ PUT(code, 1, (int)(ptr + 2 - cb->start_pattern)); /* Next offset */
+ PUT(code, 1 + LINK_SIZE, 0); /* Default length */
+ PUT(code, 1 + 2*LINK_SIZE, /* Compute size */
+ (int)(callout_string - code));
+ code = callout_string;
+ }
+
+ /* Advance to what should be the closing parenthesis, which is
+ checked below. */
+
+ ptr++;
}
+
+ /* Handle a callout with an optional numerical argument, which must be
+ less than or equal to 255. A missing argument gives 0. */
+
+ else
+ {
+ int n = 0;
+ code[0] = OP_CALLOUT; /* Numerical callout */
+ while (IS_DIGIT(*ptr))
+ {
+ n = n * 10 + *ptr++ - CHAR_0;
+ if (n > 255)
+ {
+ *errorcodeptr = ERR38;
+ goto FAILED;
+ }
+ }
+ PUT(code, 1, (int)(ptr - cb->start_pattern + 1)); /* Next offset */
+ PUT(code, 1 + LINK_SIZE, 0); /* Default length */
+ code[1 + 2*LINK_SIZE] = n; /* Callout number */
+ code += PRIV(OP_lengths)[OP_CALLOUT];
+ }
+
+ /* Both formats must have a closing parenthesis */
+
+ if (*ptr != CHAR_RIGHT_PARENTHESIS)
+ {
+ *errorcodeptr = ERR39;
+ goto FAILED;
+ }
+
+ /* Callouts cannot be quantified. */
+
previous = NULL;
continue;
@@ -7164,7 +7270,10 @@
if (op == OP_COND)
{
scode += 1 + LINK_SIZE;
+
if (*scode == OP_CALLOUT) scode += PRIV(OP_lengths)[OP_CALLOUT];
+ else if (*scode == OP_CALLOUT_STR) scode += GET(scode, 1 + 2*LINK_SIZE);
+
switch (*scode)
{
case OP_CREF:
Modified: code/trunk/src/pcre2_dfa_match.c
===================================================================
--- code/trunk/src/pcre2_dfa_match.c 2015-03-06 15:19:36 UTC (rev 219)
+++ code/trunk/src/pcre2_dfa_match.c 2015-03-11 17:44:16 UTC (rev 220)
@@ -161,6 +161,7 @@
0, /* DNREFI */
0, /* RECURSE */
0, /* CALLOUT */
+ 0, /* CALLOUT_STR */
0, /* Alt */
0, /* Ket */
0, /* KetRmax */
@@ -233,6 +234,7 @@
0, /* DNREFI */
0, /* RECURSE */
0, /* CALLOUT */
+ 0, /* CALLOUT_STR */
0, /* Alt */
0, /* Ket */
0, /* KetRmax */
@@ -2605,14 +2607,16 @@
is inserted between OP_COND and an assertion condition. This does not
happen for the other conditions. */
- if (code[LINK_SIZE+1] == OP_CALLOUT)
+ if (code[LINK_SIZE + 1] == OP_CALLOUT
+ || code[LINK_SIZE + 1] == OP_CALLOUT_STR)
{
+ unsigned int callout_length = (code[LINK_SIZE + 1] == OP_CALLOUT)
+ ? PRIV(OP_lengths)[OP_CALLOUT] : GET(code, 2 + 3*LINK_SIZE);
rrc = 0;
if (mb->callout != NULL)
{
pcre2_callout_block cb;
- cb.version = 0;
- cb.callout_number = code[LINK_SIZE+2];
+ cb.version = 1;
cb.capture_top = 1;
cb.capture_last = 0;
cb.offset_vector = offsets;
@@ -2621,13 +2625,28 @@
cb.subject_length = (PCRE2_SIZE)(end_subject - start_subject);
cb.start_match = (PCRE2_SIZE)(current_subject - start_subject);
cb.current_position = (PCRE2_SIZE)(ptr - start_subject);
- cb.pattern_position = GET(code, LINK_SIZE + 3);
- cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
+ cb.pattern_position = GET(code, LINK_SIZE + 2);
+ cb.next_item_length = GET(code, LINK_SIZE + 2 + LINK_SIZE);
+
+ if (code[LINK_SIZE + 1] == OP_CALLOUT)
+ {
+ cb.callout_number = code[2 + 3*LINK_SIZE];
+ cb.callout_string = NULL;
+ cb.callout_string_length = 0;
+ }
+ else
+ {
+ cb.callout_number = 0;
+ cb.callout_string = code + (2 + 4*LINK_SIZE) + 1;
+ cb.callout_string_length =
+ callout_length - (1 + 3*LINK_SIZE) - 2;
+ }
+
if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)
return rrc; /* Abandon */
}
if (rrc > 0) break; /* Fail this thread */
- code += PRIV(OP_lengths)[OP_CALLOUT]; /* Skip callout data */
+ code += callout_length; /* Skip callout data */
}
condcode = code[LINK_SIZE+1];
@@ -2954,27 +2973,47 @@
/* Handle callouts */
case OP_CALLOUT:
- rrc = 0;
- if (mb->callout != NULL)
+ case OP_CALLOUT_STR:
{
- pcre2_callout_block cb;
- cb.version = 0;
- cb.callout_number = code[1];
- cb.capture_top = 1;
- cb.capture_last = 0;
- cb.offset_vector = offsets;
- cb.mark = NULL; /* No (*MARK) support */
- cb.subject = start_subject;
- cb.subject_length = (PCRE2_SIZE)(end_subject - start_subject);
- cb.start_match = (PCRE2_SIZE)(current_subject - start_subject);
- cb.current_position = (PCRE2_SIZE)(ptr - start_subject);
- cb.pattern_position = GET(code, 2);
- cb.next_item_length = GET(code, 2 + LINK_SIZE);
- if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)
- return rrc; /* Abandon */
+ unsigned int callout_length = (*code == OP_CALLOUT)
+ ? PRIV(OP_lengths)[OP_CALLOUT] : GET(code, 1 + 2*LINK_SIZE);
+ rrc = 0;
+
+ if (mb->callout != NULL)
+ {
+ pcre2_callout_block cb;
+ cb.version = 1;
+ cb.capture_top = 1;
+ cb.capture_last = 0;
+ cb.offset_vector = offsets;
+ cb.mark = NULL; /* No (*MARK) support */
+ cb.subject = start_subject;
+ cb.subject_length = (PCRE2_SIZE)(end_subject - start_subject);
+ cb.start_match = (PCRE2_SIZE)(current_subject - start_subject);
+ cb.current_position = (PCRE2_SIZE)(ptr - start_subject);
+ cb.pattern_position = GET(code, 1);
+ cb.next_item_length = GET(code, 1 + LINK_SIZE);
+
+ if (*code == OP_CALLOUT)
+ {
+ cb.callout_number = code[1 + 2*LINK_SIZE];
+ cb.callout_string = NULL;
+ cb.callout_string_length = 0;
+ }
+ else
+ {
+ cb.callout_number = 0;
+ cb.callout_string = code + (1 + 3*LINK_SIZE) + 1;
+ cb.callout_string_length =
+ callout_length - (1 + 3*LINK_SIZE) - 2;
+ }
+
+ if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)
+ return rrc; /* Abandon */
+ }
+ if (rrc == 0)
+ { ADD_ACTIVE(state_offset + callout_length, 0); }
}
- if (rrc == 0)
- { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
break;
Modified: code/trunk/src/pcre2_error.c
===================================================================
--- code/trunk/src/pcre2_error.c 2015-03-06 15:19:36 UTC (rev 219)
+++ code/trunk/src/pcre2_error.c 2015-03-11 17:44:16 UTC (rev 220)
@@ -161,6 +161,8 @@
"syntax error in (?(VERSION condition\0"
/* 80 */
"internal error: unknown opcode in auto_possessify()\0"
+ "missing terminating delimiter for callout with string argument\0"
+ "unrecognized string delimiter follows (?C\0"
;
/* Match-time and UTF error texts are in the same format. */
Modified: code/trunk/src/pcre2_internal.h
===================================================================
--- code/trunk/src/pcre2_internal.h 2015-03-06 15:19:36 UTC (rev 219)
+++ code/trunk/src/pcre2_internal.h 2015-03-11 17:44:16 UTC (rev 220)
@@ -1477,84 +1477,85 @@
OP_DNREFI, /* 116 Match a duplicate name backref, caselessly */
OP_RECURSE, /* 117 Match a numbered subpattern (possibly recursive) */
OP_CALLOUT, /* 118 Call out to external function if provided */
+ OP_CALLOUT_STR, /* 119 Call out with string argument */
- OP_ALT, /* 119 Start of alternation */
- OP_KET, /* 120 End of group that doesn't have an unbounded repeat */
- OP_KETRMAX, /* 121 These two must remain together and in this */
- OP_KETRMIN, /* 122 order. They are for groups the repeat for ever. */
- OP_KETRPOS, /* 123 Possessive unlimited repeat. */
+ OP_ALT, /* 120 Start of alternation */
+ OP_KET, /* 121 End of group that doesn't have an unbounded repeat */
+ OP_KETRMAX, /* 122 These two must remain together and in this */
+ OP_KETRMIN, /* 123 order. They are for groups the repeat for ever. */
+ OP_KETRPOS, /* 124 Possessive unlimited repeat. */
/* The assertions must come before BRA, CBRA, ONCE, and COND, and the four
asserts must remain in order. */
- OP_REVERSE, /* 124 Move pointer back - used in lookbehind assertions */
- OP_ASSERT, /* 125 Positive lookahead */
- OP_ASSERT_NOT, /* 126 Negative lookahead */
- OP_ASSERTBACK, /* 127 Positive lookbehind */
- OP_ASSERTBACK_NOT, /* 128 Negative lookbehind */
+ OP_REVERSE, /* 125 Move pointer back - used in lookbehind assertions */
+ OP_ASSERT, /* 126 Positive lookahead */
+ OP_ASSERT_NOT, /* 127 Negative lookahead */
+ OP_ASSERTBACK, /* 128 Positive lookbehind */
+ OP_ASSERTBACK_NOT, /* 129 Negative lookbehind */
/* ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately
after the assertions, with ONCE first, as there's a test for >= ONCE for a
subpattern that isn't an assertion. The POS versions must immediately follow
the non-POS versions in each case. */
- OP_ONCE, /* 129 Atomic group, contains captures */
- OP_ONCE_NC, /* 130 Atomic group containing no captures */
- OP_BRA, /* 131 Start of non-capturing bracket */
- OP_BRAPOS, /* 132 Ditto, with unlimited, possessive repeat */
- OP_CBRA, /* 133 Start of capturing bracket */
- OP_CBRAPOS, /* 134 Ditto, with unlimited, possessive repeat */
- OP_COND, /* 135 Conditional group */
+ OP_ONCE, /* 130 Atomic group, contains captures */
+ OP_ONCE_NC, /* 131 Atomic group containing no captures */
+ OP_BRA, /* 132 Start of non-capturing bracket */
+ OP_BRAPOS, /* 133 Ditto, with unlimited, possessive repeat */
+ OP_CBRA, /* 134 Start of capturing bracket */
+ OP_CBRAPOS, /* 135 Ditto, with unlimited, possessive repeat */
+ OP_COND, /* 136 Conditional group */
/* These five must follow the previous five, in the same order. There's a
check for >= SBRA to distinguish the two sets. */
- OP_SBRA, /* 136 Start of non-capturing bracket, check empty */
- OP_SBRAPOS, /* 137 Ditto, with unlimited, possessive repeat */
- OP_SCBRA, /* 138 Start of capturing bracket, check empty */
- OP_SCBRAPOS, /* 139 Ditto, with unlimited, possessive repeat */
- OP_SCOND, /* 140 Conditional group, check empty */
+ OP_SBRA, /* 137 Start of non-capturing bracket, check empty */
+ OP_SBRAPOS, /* 138 Ditto, with unlimited, possessive repeat */
+ OP_SCBRA, /* 139 Start of capturing bracket, check empty */
+ OP_SCBRAPOS, /* 140 Ditto, with unlimited, possessive repeat */
+ OP_SCOND, /* 141 Conditional group, check empty */
/* The next two pairs must (respectively) be kept together. */
- OP_CREF, /* 141 Used to hold a capture number as condition */
- OP_DNCREF, /* 142 Used to point to duplicate names as a condition */
- OP_RREF, /* 143 Used to hold a recursion number as condition */
- OP_DNRREF, /* 144 Used to point to duplicate names as a condition */
- OP_FALSE, /* 145 Always false (used by DEFINE and VERSION) */
- OP_TRUE, /* 146 Always true (used by VERSION) */
+ OP_CREF, /* 142 Used to hold a capture number as condition */
+ OP_DNCREF, /* 143 Used to point to duplicate names as a condition */
+ OP_RREF, /* 144 Used to hold a recursion number as condition */
+ OP_DNRREF, /* 145 Used to point to duplicate names as a condition */
+ OP_FALSE, /* 146 Always false (used by DEFINE and VERSION) */
+ OP_TRUE, /* 147 Always true (used by VERSION) */
- OP_BRAZERO, /* 147 These two must remain together and in this */
- OP_BRAMINZERO, /* 148 order. */
- OP_BRAPOSZERO, /* 149 */
+ OP_BRAZERO, /* 148 These two must remain together and in this */
+ OP_BRAMINZERO, /* 149 order. */
+ OP_BRAPOSZERO, /* 150 */
/* These are backtracking control verbs */
- OP_MARK, /* 150 always has an argument */
- OP_PRUNE, /* 151 */
- OP_PRUNE_ARG, /* 152 same, but with argument */
- OP_SKIP, /* 153 */
- OP_SKIP_ARG, /* 154 same, but with argument */
- OP_THEN, /* 155 */
- OP_THEN_ARG, /* 156 same, but with argument */
- OP_COMMIT, /* 157 */
+ OP_MARK, /* 151 always has an argument */
+ OP_PRUNE, /* 152 */
+ OP_PRUNE_ARG, /* 153 same, but with argument */
+ OP_SKIP, /* 154 */
+ OP_SKIP_ARG, /* 155 same, but with argument */
+ OP_THEN, /* 156 */
+ OP_THEN_ARG, /* 157 same, but with argument */
+ OP_COMMIT, /* 158 */
/* These are forced failure and success verbs */
- OP_FAIL, /* 158 */
- OP_ACCEPT, /* 159 */
- OP_ASSERT_ACCEPT, /* 160 Used inside assertions */
- OP_CLOSE, /* 161 Used before OP_ACCEPT to close open captures */
+ OP_FAIL, /* 159 */
+ OP_ACCEPT, /* 160 */
+ OP_ASSERT_ACCEPT, /* 161 Used inside assertions */
+ OP_CLOSE, /* 162 Used before OP_ACCEPT to close open captures */
/* This is used to skip a subpattern with a {0} quantifier */
- OP_SKIPZERO, /* 162 */
+ OP_SKIPZERO, /* 163 */
/* This is used to identify a DEFINE group during compilation so that it can
be checked for having only one branch. It is changed to OP_FALSE before
compilation finishes. */
- OP_DEFINE, /* 163 */
+ OP_DEFINE, /* 164 */
/* This is not an opcode, but is used to check that tables indexed by opcode
are the correct length, in order to catch updating errors - there have been
@@ -1598,7 +1599,7 @@
"*", "*?", "+", "+?", "?", "??", "{", "{", \
"*+","++", "?+", "{", \
"class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi", \
- "Recurse", "Callout", \
+ "Recurse", "Callout", "CalloutStr", \
"Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \
"Reverse", "Assert", "Assert not", "AssertB", "AssertB not", \
"Once", "Once_NC", \
@@ -1672,7 +1673,8 @@
1+2*IMM2_SIZE, /* DNREF */ \
1+2*IMM2_SIZE, /* DNREFI */ \
1+LINK_SIZE, /* RECURSE */ \
- 2+2*LINK_SIZE, /* CALLOUT */ \
+ 1+2*LINK_SIZE+1, /* CALLOUT */ \
+ 0, /* CALLOUT_STR - variable length */ \
1+LINK_SIZE, /* Alt */ \
1+LINK_SIZE, /* Ket */ \
1+LINK_SIZE, /* KetRmax */ \
@@ -1806,6 +1808,8 @@
#endif
#define _pcre2_OP_lengths PCRE2_SUFFIX(_pcre2_OP_lengths_)
+#define _pcre2_callout_end_delims PCRE2_SUFFIX(_pcre2_callout_end_delims_)
+#define _pcre2_callout_start_delims PCRE2_SUFFIX(_pcre2_callout_start_delims_)
#define _pcre2_default_compile_context PCRE2_SUFFIX(_pcre2_default_compile_context_)
#define _pcre2_default_match_context PCRE2_SUFFIX(_pcre2_default_match_context_)
#define _pcre2_default_tables PCRE2_SUFFIX(_pcre2_default_tables_)
@@ -1824,6 +1828,8 @@
#define _pcre2_utt_size PCRE2_SUFFIX(_pcre2_utt_size_)
extern const uint8_t PRIV(OP_lengths)[];
+extern const uint32_t PRIV(callout_end_delims)[];
+extern const uint32_t PRIV(callout_start_delims)[];
extern const pcre2_compile_context PRIV(default_compile_context);
extern const pcre2_match_context PRIV(default_match_context);
extern const uint8_t PRIV(default_tables)[];
Modified: code/trunk/src/pcre2_jit_compile.c
===================================================================
--- code/trunk/src/pcre2_jit_compile.c 2015-03-06 15:19:36 UTC (rev 219)
+++ code/trunk/src/pcre2_jit_compile.c 2015-03-11 17:44:16 UTC (rev 220)
@@ -771,6 +771,9 @@
#endif
return cc + 1;
+ case OP_CALLOUT_STR:
+ return cc + GET(cc, 1 + 2*LINK_SIZE);
+
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
case OP_XCLASS:
return cc + GET(cc, 1);
@@ -821,7 +824,7 @@
case OP_SCOND:
/* Only AUTO_CALLOUT can insert this opcode. We do
not intend to support this case. */
- if (cc[1 + LINK_SIZE] == OP_CALLOUT)
+ if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
return FALSE;
cc += 1 + LINK_SIZE;
break;
@@ -855,12 +858,13 @@
break;
case OP_CALLOUT:
+ case OP_CALLOUT_STR:
if (common->capture_last_ptr == 0)
{
common->capture_last_ptr = common->ovector_start;
common->ovector_start += sizeof(sljit_sw);
}
- cc += 2 + 2 * LINK_SIZE;
+ cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
break;
case OP_THEN_ARG:
@@ -6296,7 +6300,7 @@
if (arguments->callout == NULL)
return 0;
-callout_block->version = 0;
+callout_block->version = 1;
/* Offsets in subject. */
callout_block->subject_length = arguments->end - arguments->begin;
@@ -6333,6 +6337,10 @@
DEFINE_COMPILER;
backtrack_common *backtrack;
sljit_si mov_opcode;
+unsigned int callout_length = (*cc == OP_CALLOUT)
+ ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
+sljit_sw value1;
+sljit_sw value2;
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
@@ -6341,7 +6349,8 @@
SLJIT_ASSERT(common->capture_last_ptr != 0);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
-OP1(SLJIT_MOV_UI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
+value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
+OP1(SLJIT_MOV_UI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
OP1(SLJIT_MOV_UI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
/* These pointer sized fields temporarly stores internal variables. */
@@ -6352,8 +6361,22 @@
if (common->mark_ptr != 0)
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_UI : SLJIT_MOV;
-OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
-OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
+OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
+OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
+
+if (*cc == OP_CALLOUT)
+ {
+ value1 = 0;
+ value2 = 0;
+ }
+else
+ {
+ value1 = (sljit_sw) (cc + (1 + 3*LINK_SIZE) + 1);
+ value2 = (callout_length - (1 + 3*LINK_SIZE + 2));
+ }
+
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
+OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
/* Needed to save important temporary registers. */
@@ -6372,7 +6395,7 @@
add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS));
else
JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label);
-return cc + 2 + 2 * LINK_SIZE;
+return cc + callout_length;
}
#undef CALLOUT_ARG_SIZE
@@ -8377,6 +8400,7 @@
break;
case OP_CALLOUT:
+ case OP_CALLOUT_STR:
cc = compile_callout_matchingpath(common, cc, parent);
break;
@@ -9561,6 +9585,7 @@
break;
case OP_CALLOUT:
+ case OP_CALLOUT_STR:
case OP_FAIL:
case OP_ACCEPT:
case OP_ASSERT_ACCEPT:
Modified: code/trunk/src/pcre2_match.c
===================================================================
--- code/trunk/src/pcre2_match.c 2015-03-06 15:19:36 UTC (rev 219)
+++ code/trunk/src/pcre2_match.c 2015-03-11 17:44:16 UTC (rev 220)
@@ -1310,13 +1310,15 @@
/* Because of the way auto-callout works during compile, a callout item is
inserted between OP_COND and an assertion condition. */
- if (*ecode == OP_CALLOUT)
+ if (*ecode == OP_CALLOUT || *ecode == OP_CALLOUT_STR)
{
+ unsigned int callout_length = (*ecode == OP_CALLOUT)
+ ? PRIV(OP_lengths)[OP_CALLOUT] : GET(ecode, 1 + 2*LINK_SIZE);
+
if (mb->callout != NULL)
{
pcre2_callout_block cb;
- cb.version = 0;
- cb.callout_number = ecode[1];
+ cb.version = 1;
cb.capture_top = offset_top/2;
cb.capture_last = mb->capture_last & CAPLMASK;
cb.offset_vector = mb->ovector;
@@ -1325,8 +1327,23 @@
cb.subject_length = (PCRE2_SIZE)(mb->end_subject - mb->start_subject);
cb.start_match = (PCRE2_SIZE)(mstart - mb->start_subject);
cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject);
- cb.pattern_position = GET(ecode, 2);
- cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
+ cb.pattern_position = GET(ecode, 1);
+ cb.next_item_length = GET(ecode, 1 + LINK_SIZE);
+
+ if (*ecode == OP_CALLOUT)
+ {
+ cb.callout_number = ecode[1 + 2*LINK_SIZE];
+ cb.callout_string = NULL;
+ cb.callout_string_length = 0;
+ }
+ else
+ {
+ cb.callout_number = 0;
+ cb.callout_string = ecode + (1 + 3*LINK_SIZE) + 1;
+ cb.callout_string_length =
+ callout_length - (1 + 3*LINK_SIZE) - 2;
+ }
+
if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)
RRETURN(MATCH_NOMATCH);
if (rrc < 0) RRETURN(rrc);
@@ -1335,8 +1352,8 @@
/* Advance ecode past the callout, so it now points to the condition. We
must adjust codelink so that the value of ecode+codelink is unchanged. */
- ecode += PRIV(OP_lengths)[OP_CALLOUT];
- codelink -= PRIV(OP_lengths)[OP_CALLOUT];
+ ecode += callout_length;
+ codelink -= callout_length;
}
/* Test the various possible conditions */
@@ -1716,26 +1733,47 @@
function is able to force a failure. */
case OP_CALLOUT:
- if (mb->callout != NULL)
+ case OP_CALLOUT_STR:
{
- pcre2_callout_block cb;
- cb.version = 0;
- cb.callout_number = ecode[1];
- cb.capture_top = offset_top/2;
- cb.capture_last = mb->capture_last & CAPLMASK;
- cb.offset_vector = mb->ovector;
- cb.mark = mb->nomatch_mark;
- cb.subject = mb->start_subject;
- cb.subject_length = (PCRE2_SIZE)(mb->end_subject - mb->start_subject);
- cb.start_match = (PCRE2_SIZE)(mstart - mb->start_subject);
- cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject);
- cb.pattern_position = GET(ecode, 2);
- cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
- if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)
- RRETURN(MATCH_NOMATCH);
- if (rrc < 0) RRETURN(rrc);
+ unsigned int callout_length = (*ecode == OP_CALLOUT)
+ ? PRIV(OP_lengths)[OP_CALLOUT] : GET(ecode, 1 + 2*LINK_SIZE);
+
+ if (mb->callout != NULL)
+ {
+ pcre2_callout_block cb;
+ cb.version = 1;
+ cb.callout_number = ecode[LINK_SIZE + 1];
+ cb.capture_top = offset_top/2;
+ cb.capture_last = mb->capture_last & CAPLMASK;
+ cb.offset_vector = mb->ovector;
+ cb.mark = mb->nomatch_mark;
+ cb.subject = mb->start_subject;
+ cb.subject_length = (PCRE2_SIZE)(mb->end_subject - mb->start_subject);
+ cb.start_match = (PCRE2_SIZE)(mstart - mb->start_subject);
+ cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject);
+ cb.pattern_position = GET(ecode, 1);
+ cb.next_item_length = GET(ecode, 1 + LINK_SIZE);
+
+ if (*ecode == OP_CALLOUT)
+ {
+ cb.callout_number = ecode[1 + 2*LINK_SIZE];
+ cb.callout_string = NULL;
+ cb.callout_string_length = 0;
+ }
+ else
+ {
+ cb.callout_number = 0;
+ cb.callout_string = ecode + (1 + 3*LINK_SIZE) + 1;
+ cb.callout_string_length =
+ callout_length - (1 + 3*LINK_SIZE) - 2;
+ }
+
+ if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)
+ RRETURN(MATCH_NOMATCH);
+ if (rrc < 0) RRETURN(rrc);
+ }
+ ecode += callout_length;
}
- ecode += 2 + 2*LINK_SIZE;
break;
/* Recursion either matches the current regex, or some subexpression. The
Modified: code/trunk/src/pcre2_printint.c
===================================================================
--- code/trunk/src/pcre2_printint.c 2015-03-06 15:19:36 UTC (rev 219)
+++ code/trunk/src/pcre2_printint.c 2015-03-11 17:44:16 UTC (rev 220)
@@ -305,6 +305,7 @@
{
PCRE2_SPTR ccode;
uint32_t c;
+ int i;
const char *flag = " ";
unsigned int extra = 0;
@@ -594,10 +595,25 @@
goto CLASS_REF_REPEAT;
case OP_CALLOUT:
- fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
- GET(code, 2 + LINK_SIZE));
+ fprintf(f, " %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE],
+ GET(code, 1), GET(code, 1 + LINK_SIZE));
break;
+ case OP_CALLOUT_STR:
+ c = code[1 + 3*LINK_SIZE];
+ fprintf(f, " %s %c", OP_names[*code], c);
+ extra = GET(code, 1 + 2*LINK_SIZE);
+ print_custring(f, code + 2 + 3*LINK_SIZE);
+
+ for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
+ if (c == PRIV(callout_start_delims)[i])
+ {
+ c = PRIV(callout_end_delims)[i];
+ break;
+ }
+ fprintf(f, "%c %d %d", c, GET(code, 1), GET(code, 1 + LINK_SIZE));
+ break;
+
case OP_PROP:
case OP_NOTPROP:
print_prop(f, code, " ", "");
@@ -611,7 +627,6 @@
case OP_NCLASS:
case OP_XCLASS:
{
- int i;
unsigned int min, max;
BOOL printmap;
BOOL invertmap = FALSE;
Modified: code/trunk/src/pcre2_study.c
===================================================================
--- code/trunk/src/pcre2_study.c 2015-03-06 15:19:36 UTC (rev 219)
+++ code/trunk/src/pcre2_study.c 2015-03-11 17:44:16 UTC (rev 220)
@@ -199,6 +199,10 @@
cc += PRIV(OP_lengths)[*cc];
break;
+ case OP_CALLOUT_STR:
+ cc += GET(cc, 1 + 2*LINK_SIZE);
+ break;
+
/* Skip over a subpattern that has a {0} or {0,x} quantifier */
case OP_BRAZERO:
@@ -935,9 +939,13 @@
/* Skip over callout */
case OP_CALLOUT:
- tcode += 2 + 2*LINK_SIZE;
+ tcode += PRIV(OP_lengths)[OP_CALLOUT];
break;
+ case OP_CALLOUT_STR:
+ tcode += GET(tcode, 1 + 2*LINK_SIZE);
+ break;
+
/* Skip over lookbehind and negative lookahead assertions */
case OP_ASSERT_NOT:
Modified: code/trunk/src/pcre2_tables.c
===================================================================
--- code/trunk/src/pcre2_tables.c 2015-03-06 15:19:36 UTC (rev 219)
+++ code/trunk/src/pcre2_tables.c 2015-03-11 17:44:16 UTC (rev 220)
@@ -66,7 +66,21 @@
const uint32_t PRIV(hspace_list)[] = { HSPACE_LIST };
const uint32_t PRIV(vspace_list)[] = { VSPACE_LIST };
+/* These tables are the pairs of delimiters that are valid for callout string
+arguments. For each starting delimiter there must be a matching ending
+delimiter, which in fact is different only for bracket-like delimiters. */
+const uint32_t PRIV(callout_start_delims)[] = {
+ CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
+ CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
+ CHAR_DOLLAR_SIGN, CHAR_LEFT_CURLY_BRACKET, 0 };
+
+const uint32_t PRIV(callout_end_delims[]) = {
+ CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
+ CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
+ CHAR_DOLLAR_SIGN, CHAR_RIGHT_CURLY_BRACKET, 0 };
+
+
/*************************************************
* Tables for UTF-8 support *
*************************************************/
Modified: code/trunk/src/pcre2test.c
===================================================================
--- code/trunk/src/pcre2test.c 2015-03-06 15:19:36 UTC (rev 219)
+++ code/trunk/src/pcre2test.c 2015-03-11 17:44:16 UTC (rev 220)
@@ -4519,9 +4519,9 @@
/* Called from a PCRE2 library as a result of the (?C) item. We print out where
we are in the match. Yield zero unless more callouts than the fail count, or
the callout data is not zero. The only differences in the callout block for
-different code unit widths are that the pointers to the subject and the most
-recent MARK point to strings of the appropriate width. Casts can be used to
-deal with this.
+different code unit widths are that the pointers to the subject, the most
+recent MARK, and a callout argument string point to strings of the appropriate
+width. Casts can be used to deal with this.
Argument: a pointer to a callout block
Return:
@@ -4535,11 +4535,31 @@
BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
FILE *f = (first_callout || callout_capture)? outfile : NULL;
+/* For a callout with a string argument, show the string first because there
+isn't a tidy way to fit it in the rest of the data. */
+
+if (cb->callout_string != NULL)
+ {
+ uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
+ fprintf(f, "Callout: %c", delimiter);
+ PCHARSV(cb->callout_string, 0,
+ cb->callout_string_length, utf, outfile);
+ for (i = 0; callout_start_delims[i] != 0; i++)
+ if (delimiter == callout_start_delims[i])
+ {
+ delimiter = callout_end_delims[i];
+ break;
+ }
+ fprintf(outfile, "%c", delimiter);
+ if (!callout_capture) fprintf(f, "\n");
+ }
+
+/* Show captured strings if required */
+
if (callout_capture)
{
- fprintf(f, "Callout %d: last capture = %d\n",
- cb->callout_number, cb->capture_last);
-
+ if (cb->callout_string == NULL) fprintf(f, "Callout %d:", cb->callout_number);
+ fprintf(f, " last capture = %d\n", cb->capture_last);
for (i = 0; i < cb->capture_top * 2; i += 2)
{
fprintf(f, "%2d: ", i/2);
@@ -4553,7 +4573,7 @@
fprintf(f, "\n");
}
}
-
+
/* Re-print the subject in canonical form, the first time or if giving full
datails. On subsequent calls in the same match, we use pchars just to find the
printed lengths of the substrings. */
@@ -4572,19 +4592,22 @@
if (f != NULL) fprintf(f, "\n");
-/* Always print appropriate indicators, with callout number if not already
-shown. For automatic callouts, show the pattern offset. */
+/* For automatic callouts, show the pattern offset. Otherwise, for a numerical
+callout whose number has not already been shown with captured strings, show the
+number here. A callout with a string argument has been displayed above. */
if (cb->callout_number == 255)
{
fprintf(outfile, "%+3d ", (int)cb->pattern_position);
if (cb->pattern_position > 99) fprintf(outfile, "\n ");
}
-else
+else
{
- if (callout_capture) fprintf(outfile, " ");
+ if (callout_capture || cb->callout_string != NULL) fprintf(outfile, " ");
else fprintf(outfile, "%3d ", cb->callout_number);
}
+
+/* Now show position indicators */
for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
fprintf(outfile, "^");
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2015-03-06 15:19:36 UTC (rev 219)
+++ code/trunk/testdata/testinput2 2015-03-11 17:44:16 UTC (rev 220)
@@ -4178,4 +4178,32 @@
/((?+1)(\1))/B
+# Callouts with string arguments
+
+/a(?C"/
+
+/a(?C"a/
+
+/a(?C"a"/
+
+/a(?C"a"bcde(?C"b")xyz/
+
+/a(?C"a)b""c")/B
+
+/ab(?C" any text with spaces ")cde/B
+ abcde
+ 12abcde
+
+/^a(b)c(?C1)def/
+ abcdef
+
+/^a(b)c(?C"AB")def/
+ abcdef
+
+/^a(b)c(?C1)def/
+ abcdef\=callout_capture
+
+/^a(b)c(?C{AB})def/B
+ abcdef\=callout_capture
+
# End of testinput2
Modified: code/trunk/testdata/testinput6
===================================================================
--- code/trunk/testdata/testinput6 2015-03-06 15:19:36 UTC (rev 219)
+++ code/trunk/testdata/testinput6 2015-03-11 17:44:16 UTC (rev 220)
@@ -4811,4 +4811,20 @@
/a(b)c(d)/
abc\=ph,copy=0,copy=1,getall
+/ab(?C" any text with spaces ")cde/B
+ abcde
+ 12abcde
+
+/^a(b)c(?C1)def/
+ abcdef
+
+/^a(b)c(?C"AB")def/
+ abcdef
+
+/^a(b)c(?C1)def/
+ abcdef\=callout_capture
+
+/^a(b)c(?C{AB})def/B
+ abcdef\=callout_capture
+
# End of testinput6
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2015-03-06 15:19:36 UTC (rev 219)
+++ code/trunk/testdata/testoutput2 2015-03-11 17:44:16 UTC (rev 220)
@@ -3538,7 +3538,7 @@
Failed: error 138 at offset 6: number after (?C is greater than 255
/(?Cab)xx/I
-Failed: error 139 at offset 3: closing parenthesis for (?C expected
+Failed: error 182 at offset 3: unrecognized string delimiter follows (?C
/(?C12vr)x/I
Failed: error 139 at offset 5: closing parenthesis for (?C expected
@@ -13969,4 +13969,95 @@
End
------------------------------------------------------------------
+# Callouts with string arguments
+
+/a(?C"/
+Failed: error 181 at offset 4: missing terminating delimiter for callout with string argument
+
+/a(?C"a/
+Failed: error 181 at offset 4: missing terminating delimiter for callout with string argument
+
+/a(?C"a"/
+Failed: error 139 at offset 7: closing parenthesis for (?C expected
+
+/a(?C"a"bcde(?C"b")xyz/
+Failed: error 139 at offset 7: closing parenthesis for (?C expected
+
+/a(?C"a)b""c")/B
+------------------------------------------------------------------
+ Bra
+ a
+ CalloutStr "a)b"c" 13 0
+ Ket
+ End
+------------------------------------------------------------------
+
+/ab(?C" any text with spaces ")cde/B
+------------------------------------------------------------------
+ Bra
+ ab
+ CalloutStr " any text with spaces " 30 1
+ cde
+ Ket
+ End
+------------------------------------------------------------------
+ abcde
+Callout: " any text with spaces "
+--->abcde
+ ^ ^ c
+ 0: abcde
+ 12abcde
+Callout: " any text with spaces "
+--->12abcde
+ ^ ^ c
+ 0: abcde
+
+/^a(b)c(?C1)def/
+ abcdef
+--->abcdef
+ 1 ^ ^ d
+ 0: abcdef
+ 1: b
+
+/^a(b)c(?C"AB")def/
+ abcdef
+Callout: "AB"
+--->abcdef
+ ^ ^ d
+ 0: abcdef
+ 1: b
+
+/^a(b)c(?C1)def/
+ abcdef\=callout_capture
+Callout 1: last capture = 1
+ 0: <unset>
+ 1: b
+--->abcdef
+ ^ ^ d
+ 0: abcdef
+ 1: b
+
+/^a(b)c(?C{AB})def/B
+------------------------------------------------------------------
+ Bra
+ ^
+ a
+ CBra 1
+ b
+ Ket
+ c
+ CalloutStr {AB} 14 1
+ def
+ Ket
+ End
+------------------------------------------------------------------
+ abcdef\=callout_capture
+Callout: {AB} last capture = 1
+ 0: <unset>
+ 1: b
+--->abcdef
+ ^ ^ d
+ 0: abcdef
+ 1: b
+
# End of testinput2
Modified: code/trunk/testdata/testoutput6
===================================================================
--- code/trunk/testdata/testoutput6 2015-03-06 15:19:36 UTC (rev 219)
+++ code/trunk/testdata/testoutput6 2015-03-11 17:44:16 UTC (rev 220)
@@ -7773,4 +7773,66 @@
Copy substring 1 failed (-2): partial match
get substring list failed (-2): partial match
+/ab(?C" any text with spaces ")cde/B
+------------------------------------------------------------------
+ Bra
+ ab
+ CalloutStr " any text with spaces " 30 1
+ cde
+ Ket
+ End
+------------------------------------------------------------------
+ abcde
+Callout: " any text with spaces "
+--->abcde
+ ^ ^ c
+ 0: abcde
+ 12abcde
+Callout: " any text with spaces "
+--->12abcde
+ ^ ^ c
+ 0: abcde
+
+/^a(b)c(?C1)def/
+ abcdef
+--->abcdef
+ 1 ^ ^ d
+ 0: abcdef
+
+/^a(b)c(?C"AB")def/
+ abcdef
+Callout: "AB"
+--->abcdef
+ ^ ^ d
+ 0: abcdef
+
+/^a(b)c(?C1)def/
+ abcdef\=callout_capture
+Callout 1: last capture = 0
+ 0:
+--->abcdef
+ ^ ^ d
+ 0: abcdef
+
+/^a(b)c(?C{AB})def/B
+------------------------------------------------------------------
+ Bra
+ ^
+ a
+ CBra 1
+ b
+ Ket
+ c
+ CalloutStr {AB} 14 1
+ def
+ Ket
+ End
+------------------------------------------------------------------
+ abcdef\=callout_capture
+Callout: {AB} last capture = 0
+ 0:
+--->abcdef
+ ^ ^ d
+ 0: abcdef
+
# End of testinput6