Revision: 140
http://www.exim.org/viewvc/pcre2?view=rev&revision=140
Author: ph10
Date: 2014-11-11 10:19:23 +0000 (Tue, 11 Nov 2014)
Log Message:
-----------
Code for pcre2_substitute(), and tests.
Modified Paths:
--------------
code/trunk/doc/pcre2test.1
code/trunk/src/pcre2.h.in
code/trunk/src/pcre2_error.c
code/trunk/src/pcre2_substitute.c
code/trunk/src/pcre2test.c
code/trunk/testdata/grepoutput
code/trunk/testdata/testinput2
code/trunk/testdata/testinput5
code/trunk/testdata/testoutput10
code/trunk/testdata/testoutput12-16
code/trunk/testdata/testoutput14
code/trunk/testdata/testoutput16
code/trunk/testdata/testoutput2
code/trunk/testdata/testoutput5
code/trunk/testdata/testoutput6
code/trunk/testdata/testoutput7
Modified: code/trunk/doc/pcre2test.1
===================================================================
--- code/trunk/doc/pcre2test.1 2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/doc/pcre2test.1 2014-11-11 10:19:23 UTC (rev 140)
@@ -1,4 +1,4 @@
-.TH PCRE2TEST 1 "02 November 2014" "PCRE 10.00"
+.TH PCRE2TEST 1 "09 November 2014" "PCRE 10.00"
.SH NAME
pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS
@@ -447,7 +447,6 @@
posix use the POSIX API
stackguard=<number> test the stackguard feature
tables=[0|1|2] select internal tables
- use_length use the pattern's length
.sp
The effects of these modifiers are described in the following sections.
FIXME: Give more examples.
@@ -497,17 +496,12 @@
/ab 32 59/hex
.sp
This feature is provided as a way of creating patterns that contain binary zero
-characters. When \fBhex\fP is set, it implies \fBuse_length\fP.
+characters. By default, \fBpcre2test\fP passes patterns as zero-terminated
+strings to \fBpcre2_compile()\fP, giving the length as PCRE2_ZERO_TERMINATED.
+However, for patterns specified in hexadecimal, the length of the pattern is
+passed.
.
.
-.SS "Using the pattern's length"
-.rs
-.sp
-By default, \fBpcre2test\fP passes patterns as zero-terminated strings to
-\fBpcre2_compile()\fP, giving the length as -1. If \fBuse_length\fP is set, the
-length of the pattern is passed. This is implied if \fBhex\fP is set.
-.
-.
.SS "JIT compilation"
.rs
.sp
@@ -726,6 +720,7 @@
ovector=<n> set size of output vector
recursion_limit=<n> set a recursion limit
startchar show startchar when relevant
+ zero_terminate pass the subject as zero-terminated
.sp
The effects of these modifiers are described in the following sections.
FIXME: Give more examples.
@@ -931,6 +926,19 @@
offsets.)
.
.
+.SS "Passing the subject as zero-terminated"
+.rs
+.sp
+By default, the subject string is passed to a native API matching function with
+its correct length. In order to test the facility for passing a zero-terminated
+string, the \fBzero_terminate\fP modifier is provided. It causes the length to
+be passed as PCRE2_ZERO_TERMINATED. (When matching via the POSIX interface,
+this modifier has no effect, as there is no facility for passing a length.)
+.P
+When testing \fBpcre2_substitute\fP, this modifier also has the effect of
+passing the replacement string as zero-terminated.
+.
+.
.SH "THE ALTERNATIVE MATCHING FUNCTION"
.rs
.sp
@@ -1192,6 +1200,6 @@
.rs
.sp
.nf
-Last updated: 02 November 2014
+Last updated: 09 November 2014
Copyright (c) 1997-2014 University of Cambridge.
.fi
Modified: code/trunk/src/pcre2.h.in
===================================================================
--- code/trunk/src/pcre2.h.in 2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/src/pcre2.h.in 2014-11-11 10:19:23 UTC (rev 140)
@@ -206,24 +206,25 @@
#define PCRE2_ERROR_BADMODE (-32)
#define PCRE2_ERROR_BADOFFSET (-33)
#define PCRE2_ERROR_BADOPTION (-34)
-#define PCRE2_ERROR_BADUTFOFFSET (-35)
-#define PCRE2_ERROR_CALLOUT (-36) /* Never used by PCRE2 itself */
-#define PCRE2_ERROR_DFA_BADRESTART (-37)
-#define PCRE2_ERROR_DFA_RECURSE (-38)
-#define PCRE2_ERROR_DFA_UCOND (-39)
-#define PCRE2_ERROR_DFA_UITEM (-40)
-#define PCRE2_ERROR_DFA_WSSIZE (-41)
-#define PCRE2_ERROR_INTERNAL (-42)
-#define PCRE2_ERROR_JIT_BADOPTION (-43)
-#define PCRE2_ERROR_JIT_STACKLIMIT (-44)
-#define PCRE2_ERROR_MATCHLIMIT (-45)
-#define PCRE2_ERROR_NOMEMORY (-46)
-#define PCRE2_ERROR_NOSUBSTRING (-47)
-#define PCRE2_ERROR_NOUNIQUESUBSTRING (-48)
-#define PCRE2_ERROR_NULL (-49)
-#define PCRE2_ERROR_RECURSELOOP (-50)
-#define PCRE2_ERROR_RECURSIONLIMIT (-51)
-#define PCRE2_ERROR_UNSET (-52)
+#define PCRE2_ERROR_BADREPLACEMENT (-35)
+#define PCRE2_ERROR_BADUTFOFFSET (-36)
+#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */
+#define PCRE2_ERROR_DFA_BADRESTART (-38)
+#define PCRE2_ERROR_DFA_RECURSE (-39)
+#define PCRE2_ERROR_DFA_UCOND (-40)
+#define PCRE2_ERROR_DFA_UITEM (-41)
+#define PCRE2_ERROR_DFA_WSSIZE (-42)
+#define PCRE2_ERROR_INTERNAL (-43)
+#define PCRE2_ERROR_JIT_BADOPTION (-44)
+#define PCRE2_ERROR_JIT_STACKLIMIT (-45)
+#define PCRE2_ERROR_MATCHLIMIT (-46)
+#define PCRE2_ERROR_NOMEMORY (-47)
+#define PCRE2_ERROR_NOSUBSTRING (-48)
+#define PCRE2_ERROR_NOUNIQUESUBSTRING (-49)
+#define PCRE2_ERROR_NULL (-50)
+#define PCRE2_ERROR_RECURSELOOP (-51)
+#define PCRE2_ERROR_RECURSIONLIMIT (-52)
+#define PCRE2_ERROR_UNSET (-53)
/* Request types for pcre2_pattern_info() */
Modified: code/trunk/src/pcre2_error.c
===================================================================
--- code/trunk/src/pcre2_error.c 2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/src/pcre2_error.c 2014-11-11 10:19:23 UTC (rev 140)
@@ -206,24 +206,25 @@
"bad offset value\0"
"bad option value\0"
/* 35 */
+ "invalid replacement string\0"
"bad offset into UTF string\0"
"callout error code\0" /* Never returned by PCRE2 itself */
"invalid data in workspace for DFA restart\0"
"too much recursion for DFA matching\0"
+ /* 40 */
"backreference condition or recursion test not supported for DFA matching\0"
- /* 40 */
"item unsupported for DFA matching\0"
"workspace size exceeded in DFA matching\0"
"internal error - pattern overwritten?\0"
"bad JIT option\0"
+ /* 45 */
"JIT stack limit reached\0"
- /* 45 */
"match limit exceeded\0"
"no more memory\0"
"unknown or unset substring\0"
"non-unique substring name\0"
+ /* 50 */
"NULL argument passed\0"
- /* 50 */
"nested recursion at the same subject position\0"
"recursion limit exceeded\0"
"requested value is not set\0"
Modified: code/trunk/src/pcre2_substitute.c
===================================================================
--- code/trunk/src/pcre2_substitute.c 2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/src/pcre2_substitute.c 2014-11-11 10:19:23 UTC (rev 140)
@@ -51,7 +51,7 @@
*************************************************/
/* This function applies a compiled re to a subject string and creates a new
-string with substitutione. The first 7 arguments are the same as for
+string with substitutions. The first 7 arguments are the same as for
pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
Arguments:
@@ -69,6 +69,7 @@
Returns: > 0 number of substitutions made
< 0 an error code, including PCRE2_ERROR_NOMATCH if no match
+ PCRE2_ERROR_BADREPLACEMENT means invalid use of $
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
@@ -86,6 +87,11 @@
PCRE2_SIZE buff_offset, lengthleft, endlength;
PCRE2_SIZE *ovector;
+/* Partial matching is not valid. */
+
+if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
+ return PCRE2_ERROR_BADOPTION;
+
/* If no match data block is provided, create one. */
if (match_data == NULL)
@@ -129,11 +135,16 @@
rc = pcre2_match(code, subject, length, start_offset, options|goptions,
match_data, mcontext);
+
+ /* Any error other than no match returns the error code. No match when not
+ doing the special after-empty-match global rematch, or when at the end of the
+ subject, breaks the global loop. Otherwise, advance the starting point and
+ try again. */
if (rc < 0)
{
- if (goptions == 0 || rc != PCRE2_ERROR_NOMATCH || start_offset >= length)
- break;
+ if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
+ if (goptions == 0 || start_offset >= length) break;
start_offset++;
if ((code->overall_options & PCRE2_UTF) != 0)
{
@@ -149,6 +160,8 @@
goptions = 0;
continue;
}
+
+ /* Handle a successful match. */
subs++;
if (rc == 0) rc = ovector_count;
@@ -161,29 +174,34 @@
for (i = 0; i < rlength; i++)
{
- if (replacement[i] == CHAR_DOLLAR_SIGN && i != rlength - 1)
+ if (replacement[i] == CHAR_DOLLAR_SIGN)
{
- int group = -1;
- int n = 0;
- BOOL inparens = FALSE;
- PCRE2_SIZE j = i + 1;
- PCRE2_SIZE sublength;
- PCRE2_UCHAR next = replacement[j];
- PCRE2_UCHAR name[33];
+ int group, n;
+ BOOL inparens;
+ PCRE2_SIZE sublength;
+ PCRE2_UCHAR next;
+ PCRE2_UCHAR name[33];
+
+ if (++i == rlength) goto BAD;
+ if ((next = replacement[i]) == CHAR_DOLLAR_SIGN) goto LITERAL;
+
+ group = -1;
+ n = 0;
+ inparens = FALSE;
if (next == CHAR_LEFT_CURLY_BRACKET)
{
- if (j == rlength - 1) goto LITERAL;
+ if (++i == rlength) goto BAD;
+ next = replacement[i];
inparens = TRUE;
- next = replacement[++j];
}
if (next >= CHAR_0 && next <= CHAR_9)
{
group = next - CHAR_0;
- while (j < rlength - 1)
+ while (i < rlength - 1)
{
- next = replacement[++j];
+ next = replacement[++i];
if (next < CHAR_0 || next > CHAR_9) break;
group = group * 10 + next - CHAR_0;
}
@@ -194,31 +212,31 @@
while (MAX_255(next) && (ctypes[next] & ctype_word) != 0)
{
name[n++] = next;
- if (n > 32) goto LITERAL;
- if (j == rlength - 1) break;
- next = replacement[++j];
+ if (n > 32) goto BAD;
+ if (i == rlength) break;
+ next = replacement[++i];
}
+ if (n == 0) goto BAD;
name[n] = 0;
}
if (inparens)
{
- if (j == rlength || next != CHAR_RIGHT_CURLY_BRACKET) goto LITERAL;
+ if (i == rlength || next != CHAR_RIGHT_CURLY_BRACKET) goto BAD;
}
- else j--; /* Last code unit of name/number */
-
+ else i--; /* Last code unit of name/number */
+
/* Have found a syntactically correct group number or name. */
- i = j; /* Where to continue from */
-
+ sublength = lengthleft;
if (group < 0)
rc = pcre2_substring_copy_byname(match_data, name,
buffer + buff_offset, &sublength);
else
rc = pcre2_substring_copy_bynumber(match_data, group,
buffer + buff_offset, &sublength);
-
- if (rc < 0) goto EXIT;
+
+ if (rc < 0) goto EXIT;
buff_offset += sublength;
lengthleft -= sublength;
}
@@ -242,20 +260,16 @@
PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
} while (global); /* Repeat "do" loop */
-/* No match is a "normal" end; copy the rest of the subject and return the
-number of substitutions. */
+/* Copy the rest of the subject and return the number of substitutions. */
-if (rc == PCRE2_ERROR_NOMATCH)
- {
- rc = subs;
- endlength = length - start_offset;
- if (endlength + 1 >= lengthleft) goto NOROOM;
- memcpy(buffer + buff_offset, subject + start_offset,
- endlength*(PCRE2_CODE_UNIT_WIDTH/8));
- buff_offset += endlength;
- buffer[buff_offset] = 0;
- *blength = buff_offset;
- }
+rc = subs;
+endlength = length - start_offset;
+if (endlength + 1 > lengthleft) goto NOROOM;
+memcpy(buffer + buff_offset, subject + start_offset,
+ endlength*(PCRE2_CODE_UNIT_WIDTH/8));
+buff_offset += endlength;
+buffer[buff_offset] = 0;
+*blength = buff_offset;
EXIT:
if (match_data_created) pcre2_match_data_free(match_data);
@@ -264,6 +278,10 @@
NOROOM:
rc = PCRE2_ERROR_NOMEMORY;
goto EXIT;
+
+BAD:
+rc = PCRE2_ERROR_BADREPLACEMENT;
+goto EXIT;
}
/* End of pcre2_substitute.c */
Modified: code/trunk/src/pcre2test.c
===================================================================
--- code/trunk/src/pcre2test.c 2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/src/pcre2test.c 2014-11-11 10:19:23 UTC (rev 140)
@@ -165,6 +165,7 @@
#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
#define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
#define LOOPREPEAT 500000 /* Default loop count for timing */
+#define REPLACE_BUFFSIZE 400 /* For replacement strings */
#define VERSION_SIZE 64 /* Size of buffer for the version strings */
/* Execution modes */
@@ -345,9 +346,9 @@
#define CTL_JITVERIFY 0x00010000u
#define CTL_MARK 0x00020000u
#define CTL_MEMORY 0x00040000u
-#define CTL_PATLEN 0x00080000u
-#define CTL_POSIX 0x00100000u
-#define CTL_STARTCHAR 0x00200000u
+#define CTL_POSIX 0x00080000u
+#define CTL_STARTCHAR 0x00100000u
+#define CTL_ZERO_TERMINATE 0x00200000u
#define CTL_BSR_SET 0x80000000u /* This is informational */
#define CTL_NL_SET 0x40000000u /* This is informational */
@@ -376,6 +377,7 @@
uint32_t stackguard_test;
uint32_t tables_id;
uint8_t locale[32];
+ uint8_t replacement[REPLACE_BUFFSIZE];
} patctl;
#define MAXCPYGET 10
@@ -485,13 +487,14 @@
{ "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) },
{ "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
{ "recursion_limit", MOD_CTM, MOD_INT, 0, MO(recursion_limit) },
+ { "replace", MOD_PAT, MOD_STR, 0, PO(replacement) },
{ "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) },
{ "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) },
{ "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
{ "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
{ "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
- { "use_length", MOD_PAT, MOD_CTL, CTL_PATLEN, PO(control) },
- { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) }
+ { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) },
+ { "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) }
};
#define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct)
@@ -945,6 +948,17 @@
else \
pcre2_set_recursion_limit_32(G(a,32),b)
+#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
+ if (test_mode == PCRE8_MODE) \
+ a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
+ (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \
+ else if (test_mode == PCRE16_MODE) \
+ a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
+ (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \
+ else \
+ a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
+ (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
+
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
if (test_mode == PCRE8_MODE) \
a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \
@@ -1298,6 +1312,16 @@
else \
G(pcre2_set_recursion_limit_,BITTWO)(G(a,BITTWO),b)
+#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
+ if (test_mode == G(G(PCRE,BITONE),_MODE)) \
+ a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
+ G(g,BITONE),G(h,BITONE),(G(PCRE2_SPTR,BITONE))i,j, \
+ (G(PCRE2_UCHAR,BITONE) *)k,l); \
+ else \
+ a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
+ G(g,BITTWO),G(h,BITTWO),(G(PCRE2_SPTR,BITTWO))i,j, \
+ (G(PCRE2_UCHAR,BITTWO) *)k,l)
+
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
@@ -1466,6 +1490,9 @@
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b)
+#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
+ a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
+ (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
@@ -1544,6 +1571,9 @@
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b)
+#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
+ a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
+ (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
@@ -1622,6 +1652,9 @@
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b)
+#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
+ a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
+ (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
@@ -3199,9 +3232,9 @@
((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
((controls & CTL_MARK) != 0)? " mark" : "",
((controls & CTL_MEMORY) != 0)? " memory" : "",
- ((controls & CTL_PATLEN) != 0)? " use_length" : "",
((controls & CTL_POSIX) != 0)? " posix" : "",
- ((controls & CTL_STARTCHAR) != 0)? " startchar" : "");
+ ((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
+ ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
}
@@ -3672,6 +3705,7 @@
/* Look for modifiers and options after the final delimiter. */
if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
+utf = (pat_patctl.options & PCRE2_UTF) != 0;
/* Assume full JIT compile for jitverify and/or jitfast if nothing else was
specified. */
@@ -3679,7 +3713,6 @@
if (pat_patctl.jit == 0 &&
(pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
pat_patctl.jit = 7;
-utf = (pat_patctl.options & PCRE2_UTF) != 0;
/* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting
in callouts. Convert to binary if required. */
@@ -3786,6 +3819,7 @@
/* Check for features that the POSIX interface does not support. */
if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale");
+ if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace");
if (pat_patctl.tables_id != 0) prmsg(&msg, "tables");
if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard");
if (timeit > 0) prmsg(&msg, "timing");
@@ -3863,11 +3897,11 @@
break;
}
-/* The pattern in now in pbuffer[8|16|32], with the length in patlen. By
+/* The pattern is now in pbuffer[8|16|32], with the length in patlen. By
default, however, we pass a zero-terminated pattern. The length is passed only
-if we had a hex pattern or if use_length was set. */
+if we had a hex pattern. */
-if ((pat_patctl.control & (CTL_PATLEN|CTL_HEXPAT)) == 0) patlen = -1;
+if ((pat_patctl.control & CTL_HEXPAT) == 0) patlen = PCRE2_ZERO_TERMINATED;
/* Compile many times when timing. */
@@ -4491,22 +4525,6 @@
len = CASTVAR(uint8_t *, q) - dbuffer; /* Length in bytes */
ulen = len/code_unit_size; /* Length in code units */
-/* If we have explicit valgrind support, mark the data from after its end to
-the end of the buffer as unaddressable, so that a read over the end of the
-buffer will be seen by valgrind, even if it doesn't cause a crash. If we're not
-building with valgrind support, at least move the data to the end of the buffer
-so that it might at least cause a crash. If we are using the POSIX interface,
-we must include the terminating zero. */
-
-pp = dbuffer;
-c = code_unit_size * ((pat_patctl.control & CTL_POSIX) != 0)? 1:0;
-
-#ifdef SUPPORT_VALGRIND
- VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + c, dbuffer_size - (len + c));
-#else
- pp = memmove(pp + dbuffer_size - len - c, pp, len + c);
-#endif
-
/* If the string was terminated by \= we must now interpret modifiers. */
if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
@@ -4522,10 +4540,27 @@
return PR_OK;
}
-/* Now run the pattern match: len contains the byte length, ulen contains the
-code unit length, and pp points to the subject string. POSIX matching is only
-possible in 8-bit mode, and it does not support timing or other fancy features.
-Some were checked at compile time, but we need to check the match-time settings
+/* If we have explicit valgrind support, mark the data from after its end to
+the end of the buffer as unaddressable, so that a read over the end of the
+buffer will be seen by valgrind, even if it doesn't cause a crash. If we're not
+building with valgrind support, at least move the data to the end of the buffer
+so that it might at least cause a crash. If we are using the POSIX interface,
+or testing zero-termination, we must include the terminating zero. */
+
+pp = dbuffer;
+c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
+ (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
+
+#ifdef SUPPORT_VALGRIND
+ VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + c, dbuffer_size - (len + c));
+#else
+ pp = memmove(pp + dbuffer_size - len - c, pp, len + c);
+#endif
+
+/* We now have len containing the byte length, ulen containing the code unit
+length, and pp pointing to the subject string. POSIX matching is only possible
+in 8-bit mode, and it does not support timing or other fancy features. Some
+were checked at compile time, but we need to check the match-time settings
here. */
#ifdef SUPPORT_PCRE2_8
@@ -4621,6 +4656,11 @@
dat_datctl.control &= ~CTL_ALLUSEDTEXT;
}
+/* Handle passing the subject as zero-terminated. */
+
+if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
+ ulen = PCRE2_ZERO_TERMINATED;
+
/* Enable display of malloc/free if wanted. */
show_memory = (dat_datctl.control & CTL_MEMORY) != 0;
@@ -4676,10 +4716,135 @@
PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL);
}
-/* Loop for global matching */
+/* If a replacement string is provided, call pcre2_substitute() instead of one
+of the matching functions. First we have to convert the replacement string to
+the appropriate width. */
-for (gmatched = 0;; gmatched++)
+if (pat_patctl.replacement[0] != 0)
{
+ int rc;
+ uint8_t *pr;
+ uint8_t rbuffer[REPLACE_BUFFSIZE];
+ uint8_t nbuffer[REPLACE_BUFFSIZE];
+ uint32_t goption;
+ PCRE2_SIZE rlen;
+ PCRE2_SIZE nsize;
+
+#ifdef SUPPORT_PCRE2_8
+ uint8_t *r8 = NULL;
+#endif
+#ifdef SUPPORT_PCRE2_16
+ uint16_t *r16 = NULL;
+#endif
+#ifdef SUPPORT_PCRE2_32
+ uint32_t *r32 = NULL;
+#endif
+
+ goption = ((pat_patctl.control & CTL_GLOBAL) == 0)? 0 :
+ PCRE2_SUBSTITUTE_GLOBAL;
+ SETCASTPTR(r, rbuffer); /* Sets r8, r16, or r32, as appropriate. */
+ pr = pat_patctl.replacement;
+
+ /* If the replacement starts with '[<number>]' we interpret that as length
+ value for the replacement buffer. */
+
+ nsize = REPLACE_BUFFSIZE/code_unit_size;
+ if (*pr == '[')
+ {
+ PCRE2_SIZE n = 0;
+ while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0;
+ if (*pr++ != ']')
+ {
+ fprintf(outfile, "Bad buffer size in replacement string\n");
+ return PR_OK;
+ }
+ if (n > nsize)
+ {
+ fprintf(outfile, "Replacement buffer setting (%ld) is too large "
+ "(max %ld)\n", n, nsize);
+ return PR_OK;
+ }
+ nsize = n;
+ }
+
+ /* Now copy the replacement string to a buffer of the appropriate width. */
+
+ while ((c = *pr++) != 0)
+ {
+ if (utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); }
+
+ /* At present no escape processing is provided for replacements. */
+
+#ifdef SUPPORT_PCRE2_8
+ if (test_mode == PCRE8_MODE)
+ {
+ if (utf)
+ {
+ r8 += ord2utf8(c, r8);
+ }
+ else
+ {
+ *r8++ = c;
+ }
+ }
+#endif
+#ifdef SUPPORT_PCRE2_16
+ if (test_mode == PCRE16_MODE)
+ {
+ if (utf)
+ {
+ if (c >= 0x10000u)
+ {
+ c-= 0x10000u;
+ *r16++ = 0xD800 | (c >> 10);
+ *r16++ = 0xDC00 | (c & 0x3ff);
+ }
+ else
+ *r16++ = c;
+ }
+ else
+ {
+ *r16++ = c;
+ }
+ }
+#endif
+#ifdef SUPPORT_PCRE2_32
+ if (test_mode == PCRE32_MODE)
+ {
+ *r32++ = c;
+ }
+#endif
+ }
+
+ SET(*r, 0);
+ if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
+ rlen = PCRE2_ZERO_TERMINATED;
+ else
+ rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
+ PCRE2_SUBSTITUTE(rc, compiled_code, pp, ulen, dat_datctl.offset,
+ dat_datctl.options|goption, match_data, dat_context,
+ rbuffer, rlen, nbuffer, &nsize);
+
+ if (rc < 0)
+ {
+ fprintf(outfile, "Failed: error %d: ", rc);
+ PCRE2_GET_ERROR_MESSAGE(nsize, rc, pbuffer);
+ PCHARSV(CASTVAR(void *, pbuffer), 0, nsize, FALSE, outfile);
+ }
+ else
+ {
+ fprintf(outfile, "%2d: ", rc);
+ PCHARSV(nbuffer, 0, nsize, utf, outfile);
+ }
+
+ fprintf(outfile, "\n");
+ } /* End of substitution handling */
+
+/* When a replacement string is not provided, run a loop for global matching
+with one of the basic matching functions. */
+
+else for (gmatched = 0;; gmatched++)
+ {
PCRE2_SIZE j;
int capcount;
PCRE2_SIZE *ovector;
@@ -4689,7 +4854,7 @@
/* Fill the ovector with junk to detect elements that do not get set
when they should be. */
-
+
for (j = 0; j < 2*dat_datctl.oveccount; j++) ovector[j] = JUNK_OFFSET;
/* When matching is via pcre2_match(), we will detect the use of JIT via the
@@ -4787,7 +4952,7 @@
{
PCRE2_SET_CALLOUT(dat_context, NULL, NULL); /* No callout */
}
-
+
/* Run a single DFA or NFA match. */
if ((dat_datctl.control & CTL_DFA) != 0)
@@ -4888,7 +5053,7 @@
fprintf(outfile, "Start of matched string is beyond its end - "
"displaying from end to start.\n");
}
-
+
fprintf(outfile, "%2d: ", i/2);
/* Check for an unset group */
@@ -4900,15 +5065,15 @@
}
/* Check for silly offsets, in particular, values that have not been
- set when they should have been. */
-
+ set when they should have been. */
+
if (start > ulen || end > ulen)
{
fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
start, end);
- continue;
- }
-
+ continue;
+ }
+
/* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
JIT, it is disabled above, with a comment.) When the match is done by the
interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
Modified: code/trunk/testdata/grepoutput
===================================================================
(Binary files differ)
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/testdata/testinput2 2014-11-11 10:19:23 UTC (rev 140)
@@ -4008,4 +4008,65 @@
/(((((a)))))/parens_nest_limit=2
+# Tests for pcre2_substitute()
+
+/abc/replace=XYZ
+ 123123
+ 123abc123
+ 123abc123abc123
+ 123123\=zero_terminate
+ 123abc123\=zero_terminate
+ 123abc123abc123\=zero_terminate
+
+/abc/g,replace=XYZ
+ 123abc123
+ 123abc123abc123
+
+/abc/replace=X$$Z
+ 123abc123
+
+/abc/g,replace=X$$Z
+ 123abc123abc123
+
+/a(b)c(d)e/replace=X$1Y${2}Z
+ "abcde"
+
+/a(b)c(d)e/replace=X$1Y${2}Z,global
+ "abcde-abcde"
+
+/a(?<ONE>b)c(?<TWO>d)e/replace=X$ONE+${TWO}Z
+ "abcde"
+
+/a(?<ONE>b)c(?<TWO>d)e/g,replace=X$ONE+${TWO}Z
+ "abcde-abcde-"
+
+/abc/replace=a$++
+ 123abc
+
+/abc/replace=a$bad
+ 123abc
+
+/abc/replace=a${A234567890123456789_123456789012}z
+ 123abc
+
+/abc/replace=a${A23456789012345678901234567890123}z
+ 123abc
+
+/abc/replace=a${bcd
+ 123abc
+
+/abc/replace=a${b+d}z
+ 123abc
+
+/abc/replace=[10]XYZ
+ 123abc123
+
+/abc/replace=[9]XYZ
+ 123abc123
+
+/abc/replace=xyz
+ 1abc2\=partial_hard
+
+# End of substitute tests
+
# End of testinput2
Modified: code/trunk/testdata/testinput5
===================================================================
--- code/trunk/testdata/testinput5 2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/testdata/testinput5 2014-11-11 10:19:23 UTC (rev 140)
@@ -1629,5 +1629,10 @@
/\x{100}\x{200}\K\x{300}/utf,startchar
\x{100}\x{200}\x{300}
+
+# Test UTF characters in a substitution
+/ábc/utf,replace=XሴZ
+ 123ábc123
+
# End of testinput5
Modified: code/trunk/testdata/testoutput10
===================================================================
--- code/trunk/testdata/testoutput10 2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/testdata/testoutput10 2014-11-11 10:19:23 UTC (rev 140)
@@ -888,7 +888,7 @@
a\x{123}aa\=offset=1
0: aa
a\x{123}aa\=offset=2
-Error -35 (bad UTF-8 offset)
+Error -36 (bad UTF-8 offset)
a\x{123}aa\=offset=3
0: aa
a\x{123}aa\=offset=4
Modified: code/trunk/testdata/testoutput12-16
===================================================================
--- code/trunk/testdata/testoutput12-16 2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/testdata/testoutput12-16 2014-11-11 10:19:23 UTC (rev 140)
@@ -851,9 +851,9 @@
/a/utf
\x{10000}\=offset=1
-Error -35 (bad UTF-16 offset)
+Error -36 (bad UTF-16 offset)
\x{10000}ab\=offset=1
-Error -35 (bad UTF-16 offset)
+Error -36 (bad UTF-16 offset)
\x{10000}ab\=offset=2
0: a
\x{10000}ab\=offset=3
Modified: code/trunk/testdata/testoutput14
===================================================================
--- code/trunk/testdata/testoutput14 2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/testdata/testoutput14 2014-11-11 10:19:23 UTC (rev 140)
@@ -114,11 +114,11 @@
aaaaaaaaaaaaaz
No match
aaaaaaaaaaaaaz\=match_limit=3000
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded
/(a+)*zz/
aaaaaaaaaaaaaz\=recursion_limit=10
-Failed: error -51: recursion limit exceeded
+Failed: error -52: recursion limit exceeded
/(*LIMIT_MATCH=3000)(a+)*zz/I
Capturing subpattern count = 1
@@ -127,9 +127,9 @@
Last code unit = 'z'
Subject length lower bound = 2
aaaaaaaaaaaaaz
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded
aaaaaaaaaaaaaz\=match_limit=60000
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded
/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
Capturing subpattern count = 1
@@ -138,7 +138,7 @@
Last code unit = 'z'
Subject length lower bound = 2
aaaaaaaaaaaaaz
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded
/(*LIMIT_MATCH=60000)(a+)*zz/I
Capturing subpattern count = 1
@@ -149,7 +149,7 @@
aaaaaaaaaaaaaz
No match
aaaaaaaaaaaaaz\=match_limit=3000
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded
/(*LIMIT_RECURSION=10)(a+)*zz/I
Capturing subpattern count = 1
@@ -158,9 +158,9 @@
Last code unit = 'z'
Subject length lower bound = 2
aaaaaaaaaaaaaz
-Failed: error -51: recursion limit exceeded
+Failed: error -52: recursion limit exceeded
aaaaaaaaaaaaaz\=recursion_limit=1000
-Failed: error -51: recursion limit exceeded
+Failed: error -52: recursion limit exceeded
/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I
Capturing subpattern count = 1
@@ -180,21 +180,21 @@
aaaaaaaaaaaaaz
No match
aaaaaaaaaaaaaz\=recursion_limit=10
-Failed: error -51: recursion limit exceeded
+Failed: error -52: recursion limit exceeded
# These three have infinitely nested recursions.
/((?2))((?1))/
abc
-Failed: error -50: nested recursion at the same subject position
+Failed: error -51: nested recursion at the same subject position
/((?(R2)a+|(?1)b))/
aaaabcde
-Failed: error -50: nested recursion at the same subject position
+Failed: error -51: nested recursion at the same subject position
/(?(R)a*(?1)|((?R))b)/
aaaabcde
-Failed: error -50: nested recursion at the same subject position
+Failed: error -51: nested recursion at the same subject position
# The allusedtext modifier does not work with JIT, which does not maintain
# the leftchar/rightchar data.
Modified: code/trunk/testdata/testoutput16
===================================================================
--- code/trunk/testdata/testoutput16 2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/testdata/testoutput16 2014-11-11 10:19:23 UTC (rev 140)
@@ -15,7 +15,7 @@
/(?(R)a*(?1)|((?R))b)/
aaaabcde
-Failed: error -44: JIT stack limit reached
+Failed: error -45: JIT stack limit reached
/abcd/I
Capturing subpattern count = 0
@@ -64,13 +64,13 @@
abcd
0: abcd (JIT)
ab\=ps
-Failed: error -43: bad JIT option
+Failed: error -44: bad JIT option
ab\=ph
-Failed: error -43: bad JIT option
+Failed: error -44: bad JIT option
xyz
No match (JIT)
xyz\=ps
-Failed: error -43: bad JIT option
+Failed: error -44: bad JIT option
/abcd/jit=2
abcd
@@ -84,13 +84,13 @@
/abcd/jit=2,jitfast
abcd
-Failed: error -43: bad JIT option
+Failed: error -44: bad JIT option
ab\=ps
Partial match: ab (JIT)
ab\=ph
-Failed: error -43: bad JIT option
+Failed: error -44: bad JIT option
xyz
-Failed: error -43: bad JIT option
+Failed: error -44: bad JIT option
/abcd/jit=3
abcd
@@ -256,7 +256,7 @@
aaaaaaaaaaaaaz
No match (JIT)
aaaaaaaaaaaaaz\=match_limit=3000
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded
/(*LIMIT_MATCH=3000)(a+)*zz/I
Capturing subpattern count = 1
@@ -266,9 +266,9 @@
Subject length lower bound = 2
JIT compilation was successful
aaaaaaaaaaaaaz
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded
aaaaaaaaaaaaaz\=match_limit=60000
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded
/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
Capturing subpattern count = 1
@@ -278,7 +278,7 @@
Subject length lower bound = 2
JIT compilation was successful
aaaaaaaaaaaaaz
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded
/(*LIMIT_MATCH=60000)(a+)*zz/I
Capturing subpattern count = 1
@@ -290,21 +290,21 @@
aaaaaaaaaaaaaz
No match (JIT)
aaaaaaaaaaaaaz\=match_limit=3000
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded
# These three have infinitely nested recursions.
/((?2))((?1))/
abc
-Failed: error -44: JIT stack limit reached
+Failed: error -45: JIT stack limit reached
/((?(R2)a+|(?1)b))/
aaaabcde
-Failed: error -44: JIT stack limit reached
+Failed: error -45: JIT stack limit reached
/(?(R)a*(?1)|((?R))b)/
aaaabcde
-Failed: error -44: JIT stack limit reached
+Failed: error -45: JIT stack limit reached
# Invalid options disable JIT when called via pcre2_match(), causing the
# match to happen via the interpreter, but for fast JIT invalid options are
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/testdata/testoutput2 2014-11-11 10:19:23 UTC (rev 140)
@@ -993,7 +993,7 @@
0: abcd
1: a
2: d
-Copy substring 5 failed (-47): unknown or unset substring
+Copy substring 5 failed (-48): unknown or unset substring
/(.{20})/I
Capturing subpattern count = 1
@@ -1047,9 +1047,9 @@
2: <unset>
3: f
1G a (1)
-Get substring 2 failed (-47): unknown or unset substring
+Get substring 2 failed (-48): unknown or unset substring
3G f (1)
-Get substring 4 failed (-47): unknown or unset substring
+Get substring 4 failed (-48): unknown or unset substring
0L adef
1L a
2L
@@ -1062,7 +1062,7 @@
1G bc (2)
2G bc (2)
3G f (1)
-Get substring 4 failed (-47): unknown or unset substring
+Get substring 4 failed (-48): unknown or unset substring
0L bcdef
1L bc
2L bc
@@ -4363,7 +4363,7 @@
1: cd
2: gh
Number not found for group 'three'
-Copy substring 'three' failed (-47): unknown or unset substring
+Copy substring 'three' failed (-48): unknown or unset substring
/(?P<Tes>)(?P<Test>)/IB
------------------------------------------------------------------
@@ -5731,7 +5731,7 @@
1: a1
2: a1
Number not found for group 'Z'
-Copy substring 'Z' failed (-47): unknown or unset substring
+Copy substring 'Z' failed (-48): unknown or unset substring
C a1 (2) A (non-unique)
/(?|(?<a>)(?<b>)(?<a>)|(?<a>)(?<b>)(?<a>))/I,dupnames
@@ -5772,7 +5772,7 @@
C a (1) A (non-unique)
cd\=copy=A
0: cd
-Copy substring 'A' failed (-47): unknown or unset substring
+Copy substring 'A' failed (-48): unknown or unset substring
/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/I,dupnames
Capturing subpattern count = 4
@@ -5817,7 +5817,7 @@
1: a1
2: a1
Number not found for group 'Z'
-Get substring 'Z' failed (-47): unknown or unset substring
+Get substring 'Z' failed (-48): unknown or unset substring
G a1 (2) A (non-unique)
/^(?P<A>a)(?P<A>b)/I,dupnames
@@ -5848,7 +5848,7 @@
G a (1) A (non-unique)
cd\=get=A
0: cd
-Get substring 'A' failed (-47): unknown or unset substring
+Get substring 'A' failed (-48): unknown or unset substring
/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/I,dupnames
Capturing subpattern count = 4
@@ -13607,4 +13607,88 @@
/(((((a)))))/parens_nest_limit=2
Failed: error 119 at offset 3: parentheses are too deeply nested
+# Tests for pcre2_substitute()
+
+/abc/replace=XYZ
+ 123123
+ 0: 123123
+ 123abc123
+ 1: 123XYZ123
+ 123abc123abc123
+ 1: 123XYZ123abc123
+ 123123\=zero_terminate
+ 0: 123123
+ 123abc123\=zero_terminate
+ 1: 123XYZ123
+ 123abc123abc123\=zero_terminate
+ 1: 123XYZ123abc123
+
+/abc/g,replace=XYZ
+ 123abc123
+ 1: 123XYZ123
+ 123abc123abc123
+ 2: 123XYZ123XYZ123
+
+/abc/replace=X$$Z
+ 123abc123
+ 1: 123X$Z123
+
+/abc/g,replace=X$$Z
+ 123abc123abc123
+ 2: 123X$Z123X$Z123
+
+/a(b)c(d)e/replace=X$1Y${2}Z
+ "abcde"
+ 1: "XbYdZ"
+
+/a(b)c(d)e/replace=X$1Y${2}Z,global
+ "abcde-abcde"
+ 2: "XbYdZ-XbYdZ"
+
+/a(?<ONE>b)c(?<TWO>d)e/replace=X$ONE+${TWO}Z
+ "abcde"
+ 1: "Xb+dZ"
+
+/a(?<ONE>b)c(?<TWO>d)e/g,replace=X$ONE+${TWO}Z
+ "abcde-abcde-"
+ 2: "Xb+dZ-Xb+dZ-"
+
+/abc/replace=a$++
+ 123abc
+Failed: error -35: invalid replacement string
+
+/abc/replace=a$bad
+ 123abc
+Failed: error -48: unknown or unset substring
+
+/abc/replace=a${A234567890123456789_123456789012}z
+ 123abc
+Failed: error -48: unknown or unset substring
+
+/abc/replace=a${A23456789012345678901234567890123}z
+ 123abc
+Failed: error -35: invalid replacement string
+
+/abc/replace=a${bcd
+ 123abc
+Failed: error -35: invalid replacement string
+
+/abc/replace=a${b+d}z
+ 123abc
+Failed: error -35: invalid replacement string
+
+/abc/replace=[10]XYZ
+ 123abc123
+ 1: 123XYZ123
+
+/abc/replace=[9]XYZ
+ 123abc123
+Failed: error -47: no more memory
+
+/abc/replace=xyz
+ 1abc2\=partial_hard
+Failed: error -34: bad option value
+
+# End of substitute tests
+
# End of testinput2
Modified: code/trunk/testdata/testoutput5
===================================================================
--- code/trunk/testdata/testoutput5 2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/testdata/testoutput5 2014-11-11 10:19:23 UTC (rev 140)
@@ -3997,5 +3997,11 @@
\x{100}\x{200}\x{300}
0: \x{100}\x{200}\x{300}
^^^^^^^^^^^^^^
+
+# Test UTF characters in a substitution
+/ábc/utf,replace=XሴZ
+ 123ábc123
+ 1: 123X\x{1234}Z123
+
# End of testinput5
Modified: code/trunk/testdata/testoutput6
===================================================================
(Binary files differ)
Modified: code/trunk/testdata/testoutput7
===================================================================
--- code/trunk/testdata/testoutput7 2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/testdata/testoutput7 2014-11-11 10:19:23 UTC (rev 140)
@@ -1218,7 +1218,7 @@
/ab\Cde/utf
abXde
-Failed: error -40: item unsupported for DFA matching
+Failed: error -41: item unsupported for DFA matching
/(?<=ab\Cde)X/utf
Failed: error 136 at offset 10: \C is not allowed in a lookbehind assertion