Revision: 357
http://www.exim.org/viewvc/pcre2?view=rev&revision=357
Author: ph10
Date: 2015-08-29 18:13:09 +0100 (Sat, 29 Aug 2015)
Log Message:
-----------
Add ${*MARK} feature to pcre2_substitute().
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/doc/pcre2api.3
code/trunk/src/pcre2_internal.h
code/trunk/src/pcre2_substitute.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-08-26 13:35:58 UTC (rev 356)
+++ code/trunk/ChangeLog 2015-08-29 17:13:09 UTC (rev 357)
@@ -162,7 +162,9 @@
45. Fixed a corner case of range optimization in JIT.
+46. Add the ${*MARK} facility to pcre2_substitute().
+
Version 10.20 30-June-2015
--------------------------
Modified: code/trunk/doc/pcre2api.3
===================================================================
--- code/trunk/doc/pcre2api.3 2015-08-26 13:35:58 UTC (rev 356)
+++ code/trunk/doc/pcre2api.3 2015-08-29 17:13:09 UTC (rev 357)
@@ -1,4 +1,4 @@
-.TH PCRE2API 3 "18 August 2015" "PCRE2 10.21"
+.TH PCRE2API 3 "29 August 2015" "PCRE2 10.21"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.sp
@@ -2614,12 +2614,12 @@
In the replacement string, which is interpreted as a UTF string in UTF mode,
and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
dollar character is an escape character that can specify the insertion of
-characters from capturing groups in the pattern. The following forms are
-recognized:
+characters from capturing groups or (*MARK) items in the pattern. The following
+forms are recognized:
.sp
- $$ insert a dollar character
- $<n> insert the contents of group <n>
- ${<n>} insert the contents of group <n>
+ $$ insert a dollar character
+ $<n> or ${<n>} insert the contents of group <n>
+ $*MARK or ${*MARK} insert the name of the last (*MARK) encountered
.sp
Either a group number or a group name can be given for <n>. Curly brackets are
required only if the following character would be interpreted as part of the
@@ -2629,6 +2629,13 @@
calling \fBpcre2_copy_byname()\fP or \fBpcre2_copy_bynumber()\fP as
appropriate.
.P
+The facility for inserting a (*MARK) name can be used to perform simple
+simultaneous substitutions, as this \fBpcre2test\fP example shows:
+.sp
+ /(*:pear)apple|(*:orange)lemon/g,replace=${*MARK}
+ apple lemon
+ 2: pear orange
+.P
The first seven arguments of \fBpcre2_substitute()\fP are the same as for
\fBpcre2_match()\fP, except that the partial matching options are not
permitted, and \fImatch_data\fP may be passed as NULL, in which case a match
@@ -2946,6 +2953,6 @@
.rs
.sp
.nf
-Last updated: 18 August 2015
+Last updated: 29 August 2015
Copyright (c) 1997-2015 University of Cambridge.
.fi
Modified: code/trunk/src/pcre2_internal.h
===================================================================
--- code/trunk/src/pcre2_internal.h 2015-08-26 13:35:58 UTC (rev 356)
+++ code/trunk/src/pcre2_internal.h 2015-08-29 17:13:09 UTC (rev 357)
@@ -918,6 +918,7 @@
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)"
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
+#define STRING_MARK "MARK"
#else /* SUPPORT_UNICODE */
@@ -1190,6 +1191,7 @@
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
+#define STRING_MARK STR_M STR_A STR_R STR_K
#endif /* SUPPORT_UNICODE */
Modified: code/trunk/src/pcre2_substitute.c
===================================================================
--- code/trunk/src/pcre2_substitute.c 2015-08-26 13:35:58 UTC (rev 356)
+++ code/trunk/src/pcre2_substitute.c 2015-08-29 17:13:09 UTC (rev 357)
@@ -205,6 +205,7 @@
{
int group, n;
BOOL inparens;
+ BOOL star;
PCRE2_SIZE sublength;
PCRE2_UCHAR next;
PCRE2_UCHAR name[33];
@@ -215,6 +216,7 @@
group = -1;
n = 0;
inparens = FALSE;
+ star = FALSE;
if (next == CHAR_LEFT_CURLY_BRACKET)
{
@@ -223,8 +225,15 @@
inparens = TRUE;
}
- if (next >= CHAR_0 && next <= CHAR_9)
+ if (next == CHAR_ASTERISK)
{
+ if (++i == rlength) goto BAD;
+ next = replacement[i];
+ star = TRUE;
+ }
+
+ if (!star && next >= CHAR_0 && next <= CHAR_9)
+ {
group = next - CHAR_0;
while (++i < rlength)
{
@@ -253,19 +262,42 @@
}
else i--; /* Last code unit of name/number */
- /* Have found a syntactically correct group number or name. */
+ /* Have found a syntactically correct group number or name, or
+ *name. Only *MARK is currently recognized. */
- sublength = lengthleft;
- if (group < 0)
- rc = pcre2_substring_copy_byname(match_data, name,
- buffer + buff_offset, &sublength);
+ if (star)
+ {
+ if (PRIV(strcmp_c8)(name, STRING_MARK) == 0)
+ {
+ PCRE2_SPTR mark = pcre2_get_mark(match_data);
+ if (mark != NULL)
+ {
+ while (*mark != 0)
+ {
+ if (lengthleft-- < 1) goto NOROOM;
+ buffer[buff_offset++] = *mark++;
+ }
+ }
+ }
+ else goto BAD;
+ }
+
+ /* Substitute the contents of a group. */
+
else
- rc = pcre2_substring_copy_bynumber(match_data, group,
- buffer + buff_offset, &sublength);
+ {
+ sublength = lengthleft;
+ if (group < 0)
+ rc = pcre2_substring_copy_byname(match_data, name,
+ buffer + buff_offset, &sublength);
+ else
+ rc = pcre2_substring_copy_bynumber(match_data, group,
+ buffer + buff_offset, &sublength);
+ if (rc < 0) goto EXIT;
- if (rc < 0) goto EXIT;
- buff_offset += sublength;
- lengthleft -= sublength;
+ buff_offset += sublength;
+ lengthleft -= sublength;
+ }
}
/* Handle a literal code unit */
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2015-08-26 13:35:58 UTC (rev 356)
+++ code/trunk/testdata/testinput2 2015-08-29 17:13:09 UTC (rev 357)
@@ -4073,7 +4073,38 @@
/(.)(.)/g,replace=$2$1
abcdefgh
+
+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=${*MARK}
+ apple lemon blackberry
+ apple strudel
+ fruitless
+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/replace=${*MARK}
+ apple lemon blackberry
+
+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARK>
+ apple lemon blackberry
+ apple strudel
+ fruitless
+
+/(*:pear)apple/g,replace=${*MARKING}
+ apple lemon blackberry
+
+/(*:pear)apple/g,replace=${*MARK-time
+ apple lemon blackberry
+
+/(*:pear)apple/g,replace=${*mark}
+ apple lemon blackberry
+
+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARKET>
+ apple lemon blackberry
+
+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[22]${*MARK}
+ apple lemon blackberry
+
+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[23]${*MARK}
+ apple lemon blackberry
+
# End of substitute tests
"((?=(?(?=(?(?=(?(?=()))))))))"
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2015-08-26 13:35:58 UTC (rev 356)
+++ code/trunk/testdata/testoutput2 2015-08-29 17:13:09 UTC (rev 357)
@@ -13731,7 +13731,51 @@
/(.)(.)/g,replace=$2$1
abcdefgh
4: badcfehg
+
+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=${*MARK}
+ apple lemon blackberry
+ 3: pear orange strawberry
+ apple strudel
+ 1: pear strudel
+ fruitless
+ 0: fruitless
+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/replace=${*MARK}
+ apple lemon blackberry
+ 1: pear lemon blackberry
+
+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARK>
+ apple lemon blackberry
+ 3: <pear> <orange> <strawberry>
+ apple strudel
+ 1: <pear> strudel
+ fruitless
+ 0: fruitless
+
+/(*:pear)apple/g,replace=${*MARKING}
+ apple lemon blackberry
+Failed: error -35: invalid replacement string
+
+/(*:pear)apple/g,replace=${*MARK-time
+ apple lemon blackberry
+Failed: error -35: invalid replacement string
+
+/(*:pear)apple/g,replace=${*mark}
+ apple lemon blackberry
+Failed: error -35: invalid replacement string
+
+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARKET>
+ apple lemon blackberry
+Failed: error -35: invalid replacement string
+
+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[22]${*MARK}
+ apple lemon blackberry
+Failed: error -48: no more memory
+
+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[23]${*MARK}
+ apple lemon blackberry
+ 3: pear orange strawberry
+
# End of substitute tests
"((?=(?(?=(?(?=(?(?=()))))))))"