[Pcre-svn] [140] code/trunk: Code for pcre2_substitute(), an…

トップ ページ
このメッセージを削除
著者: Subversion repository
日付:  
To: pcre-svn
題目: [Pcre-svn] [140] code/trunk: Code for pcre2_substitute(), and tests.
Revision: 140
          http://www.exim.org/viewvc/pcre2?view=rev&revision=140
Author:   ph10
Date:     2014-11-11 10:19:23 +0000 (Tue, 11 Nov 2014)


Log Message:
-----------
Code for pcre2_substitute(), and tests.

Modified Paths:
--------------
    code/trunk/doc/pcre2test.1
    code/trunk/src/pcre2.h.in
    code/trunk/src/pcre2_error.c
    code/trunk/src/pcre2_substitute.c
    code/trunk/src/pcre2test.c
    code/trunk/testdata/grepoutput
    code/trunk/testdata/testinput2
    code/trunk/testdata/testinput5
    code/trunk/testdata/testoutput10
    code/trunk/testdata/testoutput12-16
    code/trunk/testdata/testoutput14
    code/trunk/testdata/testoutput16
    code/trunk/testdata/testoutput2
    code/trunk/testdata/testoutput5
    code/trunk/testdata/testoutput6
    code/trunk/testdata/testoutput7


Modified: code/trunk/doc/pcre2test.1
===================================================================
--- code/trunk/doc/pcre2test.1    2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/doc/pcre2test.1    2014-11-11 10:19:23 UTC (rev 140)
@@ -1,4 +1,4 @@
-.TH PCRE2TEST 1 "02 November 2014" "PCRE 10.00"
+.TH PCRE2TEST 1 "09 November 2014" "PCRE 10.00"
 .SH NAME
 pcre2test - a program for testing Perl-compatible regular expressions.
 .SH SYNOPSIS
@@ -447,7 +447,6 @@
       posix                     use the POSIX API
       stackguard=<number>       test the stackguard feature
       tables=[0|1|2]            select internal tables
-      use_length                use the pattern's length
 .sp
 The effects of these modifiers are described in the following sections.
 FIXME: Give more examples.
@@ -497,17 +496,12 @@
   /ab 32 59/hex
 .sp
 This feature is provided as a way of creating patterns that contain binary zero
-characters. When \fBhex\fP is set, it implies \fBuse_length\fP.
+characters. By default, \fBpcre2test\fP passes patterns as zero-terminated
+strings to \fBpcre2_compile()\fP, giving the length as PCRE2_ZERO_TERMINATED.
+However, for patterns specified in hexadecimal, the length of the pattern is
+passed.
 .
 .
-.SS "Using the pattern's length"
-.rs
-.sp
-By default, \fBpcre2test\fP passes patterns as zero-terminated strings to
-\fBpcre2_compile()\fP, giving the length as -1. If \fBuse_length\fP is set, the
-length of the pattern is passed. This is implied if \fBhex\fP is set.
-.
-.
 .SS "JIT compilation"
 .rs
 .sp
@@ -726,6 +720,7 @@
       ovector=<n>               set size of output vector
       recursion_limit=<n>       set a recursion limit
       startchar                 show startchar when relevant
+      zero_terminate            pass the subject as zero-terminated 
 .sp
 The effects of these modifiers are described in the following sections.
 FIXME: Give more examples.
@@ -931,6 +926,19 @@
 offsets.)
 .
 .
+.SS "Passing the subject as zero-terminated"
+.rs
+.sp
+By default, the subject string is passed to a native API matching function with
+its correct length. In order to test the facility for passing a zero-terminated
+string, the \fBzero_terminate\fP modifier is provided. It causes the length to
+be passed as PCRE2_ZERO_TERMINATED. (When matching via the POSIX interface, 
+this modifier has no effect, as there is no facility for passing a length.)
+.P
+When testing \fBpcre2_substitute\fP, this modifier also has the effect of
+passing the replacement string as zero-terminated.
+.
+.
 .SH "THE ALTERNATIVE MATCHING FUNCTION"
 .rs
 .sp
@@ -1192,6 +1200,6 @@
 .rs
 .sp
 .nf
-Last updated: 02 November 2014
+Last updated: 09 November 2014
 Copyright (c) 1997-2014 University of Cambridge.
 .fi


Modified: code/trunk/src/pcre2.h.in
===================================================================
--- code/trunk/src/pcre2.h.in    2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/src/pcre2.h.in    2014-11-11 10:19:23 UTC (rev 140)
@@ -206,24 +206,25 @@
 #define PCRE2_ERROR_BADMODE           (-32)
 #define PCRE2_ERROR_BADOFFSET         (-33)
 #define PCRE2_ERROR_BADOPTION         (-34)
-#define PCRE2_ERROR_BADUTFOFFSET      (-35)
-#define PCRE2_ERROR_CALLOUT           (-36)  /* Never used by PCRE2 itself */
-#define PCRE2_ERROR_DFA_BADRESTART    (-37)
-#define PCRE2_ERROR_DFA_RECURSE       (-38)
-#define PCRE2_ERROR_DFA_UCOND         (-39)
-#define PCRE2_ERROR_DFA_UITEM         (-40)
-#define PCRE2_ERROR_DFA_WSSIZE        (-41)
-#define PCRE2_ERROR_INTERNAL          (-42)
-#define PCRE2_ERROR_JIT_BADOPTION     (-43)
-#define PCRE2_ERROR_JIT_STACKLIMIT    (-44)
-#define PCRE2_ERROR_MATCHLIMIT        (-45)
-#define PCRE2_ERROR_NOMEMORY          (-46)
-#define PCRE2_ERROR_NOSUBSTRING       (-47)
-#define PCRE2_ERROR_NOUNIQUESUBSTRING (-48)
-#define PCRE2_ERROR_NULL              (-49)
-#define PCRE2_ERROR_RECURSELOOP       (-50)
-#define PCRE2_ERROR_RECURSIONLIMIT    (-51)
-#define PCRE2_ERROR_UNSET             (-52)
+#define PCRE2_ERROR_BADREPLACEMENT    (-35)
+#define PCRE2_ERROR_BADUTFOFFSET      (-36)
+#define PCRE2_ERROR_CALLOUT           (-37)  /* Never used by PCRE2 itself */
+#define PCRE2_ERROR_DFA_BADRESTART    (-38)
+#define PCRE2_ERROR_DFA_RECURSE       (-39)
+#define PCRE2_ERROR_DFA_UCOND         (-40)
+#define PCRE2_ERROR_DFA_UITEM         (-41)
+#define PCRE2_ERROR_DFA_WSSIZE        (-42)
+#define PCRE2_ERROR_INTERNAL          (-43)
+#define PCRE2_ERROR_JIT_BADOPTION     (-44)
+#define PCRE2_ERROR_JIT_STACKLIMIT    (-45)
+#define PCRE2_ERROR_MATCHLIMIT        (-46)
+#define PCRE2_ERROR_NOMEMORY          (-47)
+#define PCRE2_ERROR_NOSUBSTRING       (-48)
+#define PCRE2_ERROR_NOUNIQUESUBSTRING (-49)
+#define PCRE2_ERROR_NULL              (-50)
+#define PCRE2_ERROR_RECURSELOOP       (-51)
+#define PCRE2_ERROR_RECURSIONLIMIT    (-52)
+#define PCRE2_ERROR_UNSET             (-53)


/* Request types for pcre2_pattern_info() */


Modified: code/trunk/src/pcre2_error.c
===================================================================
--- code/trunk/src/pcre2_error.c    2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/src/pcre2_error.c    2014-11-11 10:19:23 UTC (rev 140)
@@ -206,24 +206,25 @@
   "bad offset value\0"
   "bad option value\0"
   /* 35 */
+  "invalid replacement string\0"
   "bad offset into UTF string\0"
   "callout error code\0"              /* Never returned by PCRE2 itself */
   "invalid data in workspace for DFA restart\0"
   "too much recursion for DFA matching\0"
+  /* 40 */
   "backreference condition or recursion test not supported for DFA matching\0"
-  /* 40 */
   "item unsupported for DFA matching\0"
   "workspace size exceeded in DFA matching\0"
   "internal error - pattern overwritten?\0"
   "bad JIT option\0"
+  /* 45 */
   "JIT stack limit reached\0"
-  /* 45 */
   "match limit exceeded\0"
   "no more memory\0"
   "unknown or unset substring\0"
   "non-unique substring name\0"
+  /* 50 */
   "NULL argument passed\0"
-  /* 50 */
   "nested recursion at the same subject position\0"
   "recursion limit exceeded\0"
   "requested value is not set\0"


Modified: code/trunk/src/pcre2_substitute.c
===================================================================
--- code/trunk/src/pcre2_substitute.c    2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/src/pcre2_substitute.c    2014-11-11 10:19:23 UTC (rev 140)
@@ -51,7 +51,7 @@
 *************************************************/


/* This function applies a compiled re to a subject string and creates a new
-string with substitutione. The first 7 arguments are the same as for
+string with substitutions. The first 7 arguments are the same as for
pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.

Arguments:
@@ -69,6 +69,7 @@

 Returns:        > 0 number of substitutions made
                 < 0 an error code, including PCRE2_ERROR_NOMATCH if no match
+                  PCRE2_ERROR_BADREPLACEMENT means invalid use of $ 
 */


PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
@@ -86,6 +87,11 @@
PCRE2_SIZE buff_offset, lengthleft, endlength;
PCRE2_SIZE *ovector;

+/* Partial matching is not valid. */
+
+if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
+ return PCRE2_ERROR_BADOPTION;
+
/* If no match data block is provided, create one. */

if (match_data == NULL)
@@ -129,11 +135,16 @@

   rc = pcre2_match(code, subject, length, start_offset, options|goptions,
     match_data, mcontext);
+    
+  /* Any error other than no match returns the error code. No match when not 
+  doing the special after-empty-match global rematch, or when at the end of the 
+  subject, breaks the global loop. Otherwise, advance the starting point and 
+  try again. */ 


   if (rc < 0)
     {
-    if (goptions == 0 || rc != PCRE2_ERROR_NOMATCH || start_offset >= length)
-      break;
+    if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
+    if (goptions == 0 || start_offset >= length) break;
     start_offset++;
     if ((code->overall_options & PCRE2_UTF) != 0)
       {
@@ -149,6 +160,8 @@
     goptions = 0;
     continue;
     }
+    
+  /* Handle a successful match. */


subs++;
if (rc == 0) rc = ovector_count;
@@ -161,29 +174,34 @@

   for (i = 0; i < rlength; i++)
     {
-    if (replacement[i] == CHAR_DOLLAR_SIGN && i != rlength - 1)
+    if (replacement[i] == CHAR_DOLLAR_SIGN)
       {
-      int group = -1;
-      int n = 0;
-      BOOL inparens = FALSE;
-      PCRE2_SIZE j = i + 1;
-      PCRE2_SIZE sublength; 
-      PCRE2_UCHAR next = replacement[j];
-      PCRE2_UCHAR name[33];
+      int group, n;
+      BOOL inparens;
+      PCRE2_SIZE sublength;
+      PCRE2_UCHAR next;
+      PCRE2_UCHAR name[33];    
+ 
+      if (++i == rlength) goto BAD;
+      if ((next = replacement[i]) == CHAR_DOLLAR_SIGN) goto LITERAL;
+ 
+      group = -1;
+      n = 0;
+      inparens = FALSE;


       if (next == CHAR_LEFT_CURLY_BRACKET)
         {
-        if (j == rlength - 1) goto LITERAL;
+        if (++i == rlength) goto BAD;
+        next = replacement[i];
         inparens = TRUE;
-        next = replacement[++j];
         }


       if (next >= CHAR_0 && next <= CHAR_9)
         {
         group = next - CHAR_0;
-        while (j < rlength - 1)
+        while (i < rlength - 1)
           {
-          next = replacement[++j];
+          next = replacement[++i];
           if (next < CHAR_0 || next > CHAR_9) break;
           group = group * 10 + next - CHAR_0;
           }
@@ -194,31 +212,31 @@
         while (MAX_255(next) && (ctypes[next] & ctype_word) != 0)
           {
           name[n++] = next;
-          if (n > 32) goto LITERAL;
-          if (j == rlength - 1) break;
-          next = replacement[++j];
+          if (n > 32) goto BAD;
+          if (i == rlength) break;
+          next = replacement[++i];
           }
+        if (n == 0) goto BAD;   
         name[n] = 0;
         }


       if (inparens)
         {
-        if (j == rlength || next != CHAR_RIGHT_CURLY_BRACKET) goto LITERAL;
+        if (i == rlength || next != CHAR_RIGHT_CURLY_BRACKET) goto BAD;
         }
-      else j--;   /* Last code unit of name/number */
-
+      else i--;   /* Last code unit of name/number */
+      
       /* Have found a syntactically correct group number or name. */


-      i = j;   /* Where to continue from */
-
+      sublength = lengthleft;
       if (group < 0)
         rc = pcre2_substring_copy_byname(match_data, name,
           buffer + buff_offset, &sublength);
       else
         rc = pcre2_substring_copy_bynumber(match_data, group,
           buffer + buff_offset, &sublength);
-
-      if (rc < 0) goto EXIT;
+          
+      if (rc < 0) goto EXIT;    
       buff_offset += sublength;
       lengthleft -= sublength;
       }
@@ -242,20 +260,16 @@
     PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
   } while (global);  /* Repeat "do" loop */


-/* No match is a "normal" end; copy the rest of the subject and return the
-number of substitutions. */
+/* Copy the rest of the subject and return the number of substitutions. */

-if (rc == PCRE2_ERROR_NOMATCH)
-  {
-  rc = subs;
-  endlength = length - start_offset;
-  if (endlength + 1 >= lengthleft) goto NOROOM;
-  memcpy(buffer + buff_offset, subject + start_offset,
-    endlength*(PCRE2_CODE_UNIT_WIDTH/8));
-  buff_offset += endlength;
-  buffer[buff_offset] = 0;
-  *blength = buff_offset;
-  }
+rc = subs;
+endlength = length - start_offset;
+if (endlength + 1 > lengthleft) goto NOROOM;
+memcpy(buffer + buff_offset, subject + start_offset,
+  endlength*(PCRE2_CODE_UNIT_WIDTH/8));
+buff_offset += endlength;
+buffer[buff_offset] = 0;
+*blength = buff_offset;


EXIT:
if (match_data_created) pcre2_match_data_free(match_data);
@@ -264,6 +278,10 @@
NOROOM:
rc = PCRE2_ERROR_NOMEMORY;
goto EXIT;
+
+BAD:
+rc = PCRE2_ERROR_BADREPLACEMENT;
+goto EXIT;
}

/* End of pcre2_substitute.c */

Modified: code/trunk/src/pcre2test.c
===================================================================
--- code/trunk/src/pcre2test.c    2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/src/pcre2test.c    2014-11-11 10:19:23 UTC (rev 140)
@@ -165,6 +165,7 @@
 #define DEFAULT_OVECCOUNT 15    /* Default ovector count */
 #define JUNK_OFFSET 0xdeadbeef  /* For initializing ovector */
 #define LOOPREPEAT 500000       /* Default loop count for timing */
+#define REPLACE_BUFFSIZE 400    /* For replacement strings */
 #define VERSION_SIZE 64         /* Size of buffer for the version strings */


 /* Execution modes */
@@ -345,9 +346,9 @@
 #define CTL_JITVERIFY          0x00010000u
 #define CTL_MARK               0x00020000u
 #define CTL_MEMORY             0x00040000u
-#define CTL_PATLEN             0x00080000u
-#define CTL_POSIX              0x00100000u
-#define CTL_STARTCHAR          0x00200000u
+#define CTL_POSIX              0x00080000u
+#define CTL_STARTCHAR          0x00100000u
+#define CTL_ZERO_TERMINATE     0x00200000u


 #define CTL_BSR_SET          0x80000000u  /* This is informational */
 #define CTL_NL_SET           0x40000000u  /* This is informational */
@@ -376,6 +377,7 @@
   uint32_t  stackguard_test;
   uint32_t  tables_id;
   uint8_t   locale[32];
+  uint8_t   replacement[REPLACE_BUFFSIZE];
 } patctl;


 #define MAXCPYGET 10
@@ -485,13 +487,14 @@
   { "posix",               MOD_PAT,  MOD_CTL, CTL_POSIX,                 PO(control) },
   { "ps",                  MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_SOFT,        DO(options) },
   { "recursion_limit",     MOD_CTM,  MOD_INT, 0,                         MO(recursion_limit) },
+  { "replace",             MOD_PAT,  MOD_STR, 0,                         PO(replacement) },
   { "stackguard",          MOD_PAT,  MOD_INT, 0,                         PO(stackguard_test) },
   { "startchar",           MOD_PND,  MOD_CTL, CTL_STARTCHAR,             PO(control) },
   { "tables",              MOD_PAT,  MOD_INT, 0,                         PO(tables_id) },
   { "ucp",                 MOD_PATP, MOD_OPT, PCRE2_UCP,                 PO(options) },
   { "ungreedy",            MOD_PAT,  MOD_OPT, PCRE2_UNGREEDY,            PO(options) },
-  { "use_length",          MOD_PAT,  MOD_CTL, CTL_PATLEN,                PO(control) },
-  { "utf",                 MOD_PATP, MOD_OPT, PCRE2_UTF,                 PO(options) }
+  { "utf",                 MOD_PATP, MOD_OPT, PCRE2_UTF,                 PO(options) },
+  { "zero_terminate",      MOD_DAT,  MOD_CTL, CTL_ZERO_TERMINATE,        DO(control) }
 };


 #define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct)
@@ -945,6 +948,17 @@
   else \
     pcre2_set_recursion_limit_32(G(a,32),b)


+#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
+  if (test_mode == PCRE8_MODE) \
+    a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
+      (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \
+  else if (test_mode == PCRE16_MODE) \
+    a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
+      (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \
+  else \
+    a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
+      (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
+
 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
   if (test_mode == PCRE8_MODE) \
     a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \
@@ -1298,6 +1312,16 @@
   else \
     G(pcre2_set_recursion_limit_,BITTWO)(G(a,BITTWO),b)


+#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
+  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
+    a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
+      G(g,BITONE),G(h,BITONE),(G(PCRE2_SPTR,BITONE))i,j, \
+      (G(PCRE2_UCHAR,BITONE) *)k,l); \
+  else \
+    a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
+      G(g,BITTWO),G(h,BITTWO),(G(PCRE2_SPTR,BITTWO))i,j, \
+      (G(PCRE2_UCHAR,BITTWO) *)k,l)
+
 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
     a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
@@ -1466,6 +1490,9 @@
 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
 #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b)
+#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
+  a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
+    (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
   a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
@@ -1544,6 +1571,9 @@
 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
 #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b)
+#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
+  a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
+    (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
   a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
@@ -1622,6 +1652,9 @@
 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
 #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b)
+#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
+  a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
+    (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
   a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
@@ -3199,9 +3232,9 @@
   ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
   ((controls & CTL_MARK) != 0)? " mark" : "",
   ((controls & CTL_MEMORY) != 0)? " memory" : "",
-  ((controls & CTL_PATLEN) != 0)? " use_length" : "",
   ((controls & CTL_POSIX) != 0)? " posix" : "",
-  ((controls & CTL_STARTCHAR) != 0)? " startchar" : "");
+  ((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
+  ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
 }



@@ -3672,6 +3705,7 @@
/* Look for modifiers and options after the final delimiter. */

if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
+utf = (pat_patctl.options & PCRE2_UTF) != 0;

 /* Assume full JIT compile for jitverify and/or jitfast if nothing else was
 specified. */
@@ -3679,7 +3713,6 @@
 if (pat_patctl.jit == 0 &&
     (pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
   pat_patctl.jit = 7;
-utf = (pat_patctl.options & PCRE2_UTF) != 0;


/* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting
in callouts. Convert to binary if required. */
@@ -3786,6 +3819,7 @@
/* Check for features that the POSIX interface does not support. */

if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale");
+ if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace");
if (pat_patctl.tables_id != 0) prmsg(&msg, "tables");
if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard");
if (timeit > 0) prmsg(&msg, "timing");
@@ -3863,11 +3897,11 @@
break;
}

-/* The pattern in now in pbuffer[8|16|32], with the length in patlen. By
+/* The pattern is now in pbuffer[8|16|32], with the length in patlen. By
default, however, we pass a zero-terminated pattern. The length is passed only
-if we had a hex pattern or if use_length was set. */
+if we had a hex pattern. */

-if ((pat_patctl.control & (CTL_PATLEN|CTL_HEXPAT)) == 0) patlen = -1;
+if ((pat_patctl.control & CTL_HEXPAT) == 0) patlen = PCRE2_ZERO_TERMINATED;

/* Compile many times when timing. */

@@ -4491,22 +4525,6 @@
 len = CASTVAR(uint8_t *, q) - dbuffer;    /* Length in bytes */
 ulen = len/code_unit_size;                /* Length in code units */


-/* If we have explicit valgrind support, mark the data from after its end to
-the end of the buffer as unaddressable, so that a read over the end of the
-buffer will be seen by valgrind, even if it doesn't cause a crash. If we're not
-building with valgrind support, at least move the data to the end of the buffer
-so that it might at least cause a crash. If we are using the POSIX interface,
-we must include the terminating zero. */
-
-pp = dbuffer;
-c = code_unit_size * ((pat_patctl.control & CTL_POSIX) != 0)? 1:0;
-
-#ifdef SUPPORT_VALGRIND
- VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + c, dbuffer_size - (len + c));
-#else
- pp = memmove(pp + dbuffer_size - len - c, pp, len + c);
-#endif
-
/* If the string was terminated by \= we must now interpret modifiers. */

if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
@@ -4522,10 +4540,27 @@
return PR_OK;
}

-/* Now run the pattern match: len contains the byte length, ulen contains the
-code unit length, and pp points to the subject string. POSIX matching is only
-possible in 8-bit mode, and it does not support timing or other fancy features.
-Some were checked at compile time, but we need to check the match-time settings
+/* If we have explicit valgrind support, mark the data from after its end to
+the end of the buffer as unaddressable, so that a read over the end of the
+buffer will be seen by valgrind, even if it doesn't cause a crash. If we're not
+building with valgrind support, at least move the data to the end of the buffer
+so that it might at least cause a crash. If we are using the POSIX interface,
+or testing zero-termination, we must include the terminating zero. */
+
+pp = dbuffer;
+c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
+                       (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
+
+#ifdef SUPPORT_VALGRIND
+  VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + c, dbuffer_size - (len + c));
+#else
+  pp = memmove(pp + dbuffer_size - len - c, pp, len + c);
+#endif
+
+/* We now have len containing the byte length, ulen containing the code unit
+length, and pp pointing to the subject string. POSIX matching is only possible
+in 8-bit mode, and it does not support timing or other fancy features. Some
+were checked at compile time, but we need to check the match-time settings
 here. */


#ifdef SUPPORT_PCRE2_8
@@ -4621,6 +4656,11 @@
dat_datctl.control &= ~CTL_ALLUSEDTEXT;
}

+/* Handle passing the subject as zero-terminated. */
+
+if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
+ ulen = PCRE2_ZERO_TERMINATED;
+
/* Enable display of malloc/free if wanted. */

show_memory = (dat_datctl.control & CTL_MEMORY) != 0;
@@ -4676,10 +4716,135 @@
PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL);
}

-/* Loop for global matching */
+/* If a replacement string is provided, call pcre2_substitute() instead of one
+of the matching functions. First we have to convert the replacement string to
+the appropriate width. */

-for (gmatched = 0;; gmatched++)
+if (pat_patctl.replacement[0] != 0)
   {
+  int rc;
+  uint8_t *pr;
+  uint8_t rbuffer[REPLACE_BUFFSIZE];
+  uint8_t nbuffer[REPLACE_BUFFSIZE];
+  uint32_t goption;
+  PCRE2_SIZE rlen;
+  PCRE2_SIZE nsize;
+
+#ifdef SUPPORT_PCRE2_8
+  uint8_t *r8 = NULL;
+#endif
+#ifdef SUPPORT_PCRE2_16
+  uint16_t *r16 = NULL;
+#endif
+#ifdef SUPPORT_PCRE2_32
+  uint32_t *r32 = NULL;
+#endif
+
+  goption = ((pat_patctl.control & CTL_GLOBAL) == 0)? 0 :
+    PCRE2_SUBSTITUTE_GLOBAL;
+  SETCASTPTR(r, rbuffer);  /* Sets r8, r16, or r32, as appropriate. */
+  pr = pat_patctl.replacement;
+
+  /* If the replacement starts with '[<number>]' we interpret that as length
+  value for the replacement buffer. */
+
+  nsize = REPLACE_BUFFSIZE/code_unit_size;
+  if (*pr == '[')
+    {
+    PCRE2_SIZE n = 0;
+    while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0;
+    if (*pr++ != ']')
+      {
+      fprintf(outfile, "Bad buffer size in replacement string\n");
+      return PR_OK;
+      }
+    if (n > nsize)
+      {
+      fprintf(outfile, "Replacement buffer setting (%ld) is too large "
+        "(max %ld)\n", n, nsize);
+      return PR_OK;
+      }
+    nsize = n;
+    }
+
+  /* Now copy the replacement string to a buffer of the appropriate width. */
+
+  while ((c = *pr++) != 0)
+    {
+    if (utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); }
+
+    /* At present no escape processing is provided for replacements. */
+
+#ifdef SUPPORT_PCRE2_8
+    if (test_mode == PCRE8_MODE)
+      {
+      if (utf)
+        {
+        r8 += ord2utf8(c, r8);
+        }
+      else
+        {
+        *r8++ = c;
+        }
+      }
+#endif
+#ifdef SUPPORT_PCRE2_16
+    if (test_mode == PCRE16_MODE)
+      {
+      if (utf)
+        {
+        if (c >= 0x10000u)
+          {
+          c-= 0x10000u;
+          *r16++ = 0xD800 | (c >> 10);
+          *r16++ = 0xDC00 | (c & 0x3ff);
+          }
+        else
+          *r16++ = c;
+        }
+      else
+        {
+        *r16++ = c;
+        }
+      }
+#endif
+#ifdef SUPPORT_PCRE2_32
+    if (test_mode == PCRE32_MODE)
+      {
+      *r32++ = c;
+      }
+#endif
+    }
+
+  SET(*r, 0);
+  if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
+    rlen = PCRE2_ZERO_TERMINATED;
+  else
+    rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
+  PCRE2_SUBSTITUTE(rc, compiled_code, pp, ulen, dat_datctl.offset,
+    dat_datctl.options|goption, match_data, dat_context,
+    rbuffer, rlen, nbuffer, &nsize);
+
+  if (rc < 0)
+    {
+    fprintf(outfile, "Failed: error %d: ", rc);
+    PCRE2_GET_ERROR_MESSAGE(nsize, rc, pbuffer);
+    PCHARSV(CASTVAR(void *, pbuffer), 0, nsize, FALSE, outfile);
+    }
+  else
+    {
+    fprintf(outfile, "%2d: ", rc);
+    PCHARSV(nbuffer, 0, nsize, utf, outfile);
+    }
+
+  fprintf(outfile, "\n");
+  }   /* End of substitution handling */
+
+/* When a replacement string is not provided, run a loop for global matching
+with one of the basic matching functions. */
+
+else for (gmatched = 0;; gmatched++)
+  {
   PCRE2_SIZE j;
   int capcount;
   PCRE2_SIZE *ovector;
@@ -4689,7 +4854,7 @@


   /* Fill the ovector with junk to detect elements that do not get set
   when they should be. */
-    
+
   for (j = 0; j < 2*dat_datctl.oveccount; j++) ovector[j] = JUNK_OFFSET;


   /* When matching is via pcre2_match(), we will detect the use of JIT via the
@@ -4787,7 +4952,7 @@
       {
       PCRE2_SET_CALLOUT(dat_context, NULL, NULL);  /* No callout */
       }
-      
+
     /* Run a single DFA or NFA match. */


     if ((dat_datctl.control & CTL_DFA) != 0)
@@ -4888,7 +5053,7 @@
         fprintf(outfile, "Start of matched string is beyond its end - "
           "displaying from end to start.\n");
         }
-        
+
       fprintf(outfile, "%2d: ", i/2);


       /* Check for an unset group */
@@ -4900,15 +5065,15 @@
         }


       /* Check for silly offsets, in particular, values that have not been
-      set when they should have been. */ 
-        
+      set when they should have been. */
+
       if (start > ulen || end > ulen)
         {
         fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
           start, end);
-        continue;    
-        }  
- 
+        continue;
+        }
+
       /* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
       JIT, it is disabled above, with a comment.) When the match is done by the
       interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is


Modified: code/trunk/testdata/grepoutput
===================================================================
(Binary files differ)

Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/testdata/testinput2    2014-11-11 10:19:23 UTC (rev 140)
@@ -4008,4 +4008,65 @@


/(((((a)))))/parens_nest_limit=2

+# Tests for pcre2_substitute()
+
+/abc/replace=XYZ
+    123123
+    123abc123
+    123abc123abc123
+    123123\=zero_terminate
+    123abc123\=zero_terminate
+    123abc123abc123\=zero_terminate
+
+/abc/g,replace=XYZ
+    123abc123
+    123abc123abc123
+
+/abc/replace=X$$Z
+    123abc123
+
+/abc/g,replace=X$$Z
+    123abc123abc123
+
+/a(b)c(d)e/replace=X$1Y${2}Z
+    "abcde"
+
+/a(b)c(d)e/replace=X$1Y${2}Z,global
+    "abcde-abcde"
+
+/a(?<ONE>b)c(?<TWO>d)e/replace=X$ONE+${TWO}Z
+    "abcde"
+
+/a(?<ONE>b)c(?<TWO>d)e/g,replace=X$ONE+${TWO}Z
+    "abcde-abcde-"
+
+/abc/replace=a$++
+    123abc
+
+/abc/replace=a$bad
+    123abc
+
+/abc/replace=a${A234567890123456789_123456789012}z
+    123abc
+
+/abc/replace=a${A23456789012345678901234567890123}z
+    123abc
+
+/abc/replace=a${bcd
+    123abc
+
+/abc/replace=a${b+d}z
+    123abc
+
+/abc/replace=[10]XYZ
+    123abc123
+
+/abc/replace=[9]XYZ
+    123abc123
+    
+/abc/replace=xyz
+    1abc2\=partial_hard
+
+# End of substitute tests 
+
 # End of testinput2 


Modified: code/trunk/testdata/testinput5
===================================================================
--- code/trunk/testdata/testinput5    2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/testdata/testinput5    2014-11-11 10:19:23 UTC (rev 140)
@@ -1629,5 +1629,10 @@


 /\x{100}\x{200}\K\x{300}/utf,startchar
     \x{100}\x{200}\x{300}
+    
+# Test UTF characters in a substitution


+/ábc/utf,replace=XሴZ
+    123ábc123
+
 # End of testinput5 


Modified: code/trunk/testdata/testoutput10
===================================================================
--- code/trunk/testdata/testoutput10    2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/testdata/testoutput10    2014-11-11 10:19:23 UTC (rev 140)
@@ -888,7 +888,7 @@
     a\x{123}aa\=offset=1
  0: aa
     a\x{123}aa\=offset=2
-Error -35 (bad UTF-8 offset)
+Error -36 (bad UTF-8 offset)
     a\x{123}aa\=offset=3
  0: aa
     a\x{123}aa\=offset=4


Modified: code/trunk/testdata/testoutput12-16
===================================================================
--- code/trunk/testdata/testoutput12-16    2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/testdata/testoutput12-16    2014-11-11 10:19:23 UTC (rev 140)
@@ -851,9 +851,9 @@


 /a/utf
     \x{10000}\=offset=1
-Error -35 (bad UTF-16 offset)
+Error -36 (bad UTF-16 offset)
     \x{10000}ab\=offset=1
-Error -35 (bad UTF-16 offset)
+Error -36 (bad UTF-16 offset)
     \x{10000}ab\=offset=2
  0: a
     \x{10000}ab\=offset=3


Modified: code/trunk/testdata/testoutput14
===================================================================
--- code/trunk/testdata/testoutput14    2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/testdata/testoutput14    2014-11-11 10:19:23 UTC (rev 140)
@@ -114,11 +114,11 @@
     aaaaaaaaaaaaaz
 No match
     aaaaaaaaaaaaaz\=match_limit=3000
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded


 /(a+)*zz/
     aaaaaaaaaaaaaz\=recursion_limit=10
-Failed: error -51: recursion limit exceeded
+Failed: error -52: recursion limit exceeded


 /(*LIMIT_MATCH=3000)(a+)*zz/I
 Capturing subpattern count = 1
@@ -127,9 +127,9 @@
 Last code unit = 'z'
 Subject length lower bound = 2
     aaaaaaaaaaaaaz
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded
     aaaaaaaaaaaaaz\=match_limit=60000
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded


 /(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
 Capturing subpattern count = 1
@@ -138,7 +138,7 @@
 Last code unit = 'z'
 Subject length lower bound = 2
     aaaaaaaaaaaaaz
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded


 /(*LIMIT_MATCH=60000)(a+)*zz/I
 Capturing subpattern count = 1
@@ -149,7 +149,7 @@
     aaaaaaaaaaaaaz
 No match
     aaaaaaaaaaaaaz\=match_limit=3000
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded


 /(*LIMIT_RECURSION=10)(a+)*zz/I
 Capturing subpattern count = 1
@@ -158,9 +158,9 @@
 Last code unit = 'z'
 Subject length lower bound = 2
     aaaaaaaaaaaaaz
-Failed: error -51: recursion limit exceeded
+Failed: error -52: recursion limit exceeded
     aaaaaaaaaaaaaz\=recursion_limit=1000
-Failed: error -51: recursion limit exceeded
+Failed: error -52: recursion limit exceeded


 /(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I
 Capturing subpattern count = 1
@@ -180,21 +180,21 @@
     aaaaaaaaaaaaaz
 No match
     aaaaaaaaaaaaaz\=recursion_limit=10
-Failed: error -51: recursion limit exceeded
+Failed: error -52: recursion limit exceeded


# These three have infinitely nested recursions.

 /((?2))((?1))/
     abc
-Failed: error -50: nested recursion at the same subject position
+Failed: error -51: nested recursion at the same subject position


 /((?(R2)a+|(?1)b))/
     aaaabcde
-Failed: error -50: nested recursion at the same subject position
+Failed: error -51: nested recursion at the same subject position


 /(?(R)a*(?1)|((?R))b)/
     aaaabcde
-Failed: error -50: nested recursion at the same subject position
+Failed: error -51: nested recursion at the same subject position


# The allusedtext modifier does not work with JIT, which does not maintain
# the leftchar/rightchar data.

Modified: code/trunk/testdata/testoutput16
===================================================================
--- code/trunk/testdata/testoutput16    2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/testdata/testoutput16    2014-11-11 10:19:23 UTC (rev 140)
@@ -15,7 +15,7 @@


 /(?(R)a*(?1)|((?R))b)/
     aaaabcde
-Failed: error -44: JIT stack limit reached
+Failed: error -45: JIT stack limit reached


 /abcd/I
 Capturing subpattern count = 0
@@ -64,13 +64,13 @@
     abcd
  0: abcd (JIT)
     ab\=ps
-Failed: error -43: bad JIT option
+Failed: error -44: bad JIT option
     ab\=ph
-Failed: error -43: bad JIT option
+Failed: error -44: bad JIT option
     xyz
 No match (JIT)
     xyz\=ps
-Failed: error -43: bad JIT option
+Failed: error -44: bad JIT option


 /abcd/jit=2
     abcd
@@ -84,13 +84,13 @@


 /abcd/jit=2,jitfast
     abcd
-Failed: error -43: bad JIT option
+Failed: error -44: bad JIT option
     ab\=ps
 Partial match: ab (JIT)
     ab\=ph
-Failed: error -43: bad JIT option
+Failed: error -44: bad JIT option
     xyz
-Failed: error -43: bad JIT option
+Failed: error -44: bad JIT option


 /abcd/jit=3
     abcd
@@ -256,7 +256,7 @@
     aaaaaaaaaaaaaz
 No match (JIT)
     aaaaaaaaaaaaaz\=match_limit=3000
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded


 /(*LIMIT_MATCH=3000)(a+)*zz/I
 Capturing subpattern count = 1
@@ -266,9 +266,9 @@
 Subject length lower bound = 2
 JIT compilation was successful
     aaaaaaaaaaaaaz
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded
     aaaaaaaaaaaaaz\=match_limit=60000
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded


 /(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
 Capturing subpattern count = 1
@@ -278,7 +278,7 @@
 Subject length lower bound = 2
 JIT compilation was successful
     aaaaaaaaaaaaaz
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded


 /(*LIMIT_MATCH=60000)(a+)*zz/I
 Capturing subpattern count = 1
@@ -290,21 +290,21 @@
     aaaaaaaaaaaaaz
 No match (JIT)
     aaaaaaaaaaaaaz\=match_limit=3000
-Failed: error -45: match limit exceeded
+Failed: error -46: match limit exceeded


# These three have infinitely nested recursions.

 /((?2))((?1))/
     abc
-Failed: error -44: JIT stack limit reached
+Failed: error -45: JIT stack limit reached


 /((?(R2)a+|(?1)b))/
     aaaabcde
-Failed: error -44: JIT stack limit reached
+Failed: error -45: JIT stack limit reached


 /(?(R)a*(?1)|((?R))b)/
     aaaabcde
-Failed: error -44: JIT stack limit reached
+Failed: error -45: JIT stack limit reached


# Invalid options disable JIT when called via pcre2_match(), causing the
# match to happen via the interpreter, but for fast JIT invalid options are

Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/testdata/testoutput2    2014-11-11 10:19:23 UTC (rev 140)
@@ -993,7 +993,7 @@
  0: abcd
  1: a
  2: d
-Copy substring 5 failed (-47): unknown or unset substring
+Copy substring 5 failed (-48): unknown or unset substring


/(.{20})/I
Capturing subpattern count = 1
@@ -1047,9 +1047,9 @@
2: <unset>
3: f
1G a (1)
-Get substring 2 failed (-47): unknown or unset substring
+Get substring 2 failed (-48): unknown or unset substring
3G f (1)
-Get substring 4 failed (-47): unknown or unset substring
+Get substring 4 failed (-48): unknown or unset substring
0L adef
1L a
2L
@@ -1062,7 +1062,7 @@
1G bc (2)
2G bc (2)
3G f (1)
-Get substring 4 failed (-47): unknown or unset substring
+Get substring 4 failed (-48): unknown or unset substring
0L bcdef
1L bc
2L bc
@@ -4363,7 +4363,7 @@
1: cd
2: gh
Number not found for group 'three'
-Copy substring 'three' failed (-47): unknown or unset substring
+Copy substring 'three' failed (-48): unknown or unset substring

/(?P<Tes>)(?P<Test>)/IB
------------------------------------------------------------------
@@ -5731,7 +5731,7 @@
1: a1
2: a1
Number not found for group 'Z'
-Copy substring 'Z' failed (-47): unknown or unset substring
+Copy substring 'Z' failed (-48): unknown or unset substring
C a1 (2) A (non-unique)

 /(?|(?<a>)(?<b>)(?<a>)|(?<a>)(?<b>)(?<a>))/I,dupnames
@@ -5772,7 +5772,7 @@
   C a (1) A (non-unique)
     cd\=copy=A
  0: cd
-Copy substring 'A' failed (-47): unknown or unset substring
+Copy substring 'A' failed (-48): unknown or unset substring


/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/I,dupnames
Capturing subpattern count = 4
@@ -5817,7 +5817,7 @@
1: a1
2: a1
Number not found for group 'Z'
-Get substring 'Z' failed (-47): unknown or unset substring
+Get substring 'Z' failed (-48): unknown or unset substring
G a1 (2) A (non-unique)

 /^(?P<A>a)(?P<A>b)/I,dupnames
@@ -5848,7 +5848,7 @@
   G a (1) A (non-unique)
     cd\=get=A
  0: cd
-Get substring 'A' failed (-47): unknown or unset substring
+Get substring 'A' failed (-48): unknown or unset substring


/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/I,dupnames
Capturing subpattern count = 4
@@ -13607,4 +13607,88 @@
/(((((a)))))/parens_nest_limit=2
Failed: error 119 at offset 3: parentheses are too deeply nested

+# Tests for pcre2_substitute()
+
+/abc/replace=XYZ
+    123123
+ 0: 123123
+    123abc123
+ 1: 123XYZ123
+    123abc123abc123
+ 1: 123XYZ123abc123
+    123123\=zero_terminate
+ 0: 123123
+    123abc123\=zero_terminate
+ 1: 123XYZ123
+    123abc123abc123\=zero_terminate
+ 1: 123XYZ123abc123
+
+/abc/g,replace=XYZ
+    123abc123
+ 1: 123XYZ123
+    123abc123abc123
+ 2: 123XYZ123XYZ123
+
+/abc/replace=X$$Z
+    123abc123
+ 1: 123X$Z123
+
+/abc/g,replace=X$$Z
+    123abc123abc123
+ 2: 123X$Z123X$Z123
+
+/a(b)c(d)e/replace=X$1Y${2}Z
+    "abcde"
+ 1: "XbYdZ"
+
+/a(b)c(d)e/replace=X$1Y${2}Z,global
+    "abcde-abcde"
+ 2: "XbYdZ-XbYdZ"
+
+/a(?<ONE>b)c(?<TWO>d)e/replace=X$ONE+${TWO}Z
+    "abcde"
+ 1: "Xb+dZ"
+
+/a(?<ONE>b)c(?<TWO>d)e/g,replace=X$ONE+${TWO}Z
+    "abcde-abcde-"
+ 2: "Xb+dZ-Xb+dZ-"
+
+/abc/replace=a$++
+    123abc
+Failed: error -35: invalid replacement string
+
+/abc/replace=a$bad
+    123abc
+Failed: error -48: unknown or unset substring
+
+/abc/replace=a${A234567890123456789_123456789012}z
+    123abc
+Failed: error -48: unknown or unset substring
+
+/abc/replace=a${A23456789012345678901234567890123}z
+    123abc
+Failed: error -35: invalid replacement string
+
+/abc/replace=a${bcd
+    123abc
+Failed: error -35: invalid replacement string
+
+/abc/replace=a${b+d}z
+    123abc
+Failed: error -35: invalid replacement string
+
+/abc/replace=[10]XYZ
+    123abc123
+ 1: 123XYZ123
+
+/abc/replace=[9]XYZ
+    123abc123
+Failed: error -47: no more memory
+    
+/abc/replace=xyz
+    1abc2\=partial_hard
+Failed: error -34: bad option value
+
+# End of substitute tests 
+
 # End of testinput2 


Modified: code/trunk/testdata/testoutput5
===================================================================
--- code/trunk/testdata/testoutput5    2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/testdata/testoutput5    2014-11-11 10:19:23 UTC (rev 140)
@@ -3997,5 +3997,11 @@
     \x{100}\x{200}\x{300}
  0: \x{100}\x{200}\x{300}
     ^^^^^^^^^^^^^^
+    
+# Test UTF characters in a substitution


+/ábc/utf,replace=XሴZ
+    123ábc123
+ 1: 123X\x{1234}Z123
+
 # End of testinput5 


Modified: code/trunk/testdata/testoutput6
===================================================================
(Binary files differ)

Modified: code/trunk/testdata/testoutput7
===================================================================
--- code/trunk/testdata/testoutput7    2014-11-09 07:23:55 UTC (rev 139)
+++ code/trunk/testdata/testoutput7    2014-11-11 10:19:23 UTC (rev 140)
@@ -1218,7 +1218,7 @@


 /ab\Cde/utf
     abXde
-Failed: error -40: item unsupported for DFA matching
+Failed: error -41: item unsupported for DFA matching


/(?<=ab\Cde)X/utf
Failed: error 136 at offset 10: \C is not allowed in a lookbehind assertion