[Pcre-svn] [357] code/trunk: Add ${*MARK} feature to pcre2_s…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [357] code/trunk: Add ${*MARK} feature to pcre2_substitute().
Revision: 357
          http://www.exim.org/viewvc/pcre2?view=rev&revision=357
Author:   ph10
Date:     2015-08-29 18:13:09 +0100 (Sat, 29 Aug 2015)
Log Message:
-----------
Add ${*MARK} feature to pcre2_substitute().


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/doc/pcre2api.3
    code/trunk/src/pcre2_internal.h
    code/trunk/src/pcre2_substitute.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2015-08-26 13:35:58 UTC (rev 356)
+++ code/trunk/ChangeLog    2015-08-29 17:13:09 UTC (rev 357)
@@ -162,7 +162,9 @@


45. Fixed a corner case of range optimization in JIT.

+46. Add the ${*MARK} facility to pcre2_substitute().

+
Version 10.20 30-June-2015
--------------------------


Modified: code/trunk/doc/pcre2api.3
===================================================================
--- code/trunk/doc/pcre2api.3    2015-08-26 13:35:58 UTC (rev 356)
+++ code/trunk/doc/pcre2api.3    2015-08-29 17:13:09 UTC (rev 357)
@@ -1,4 +1,4 @@
-.TH PCRE2API 3 "18 August 2015" "PCRE2 10.21"
+.TH PCRE2API 3 "29 August 2015" "PCRE2 10.21"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .sp
@@ -2614,12 +2614,12 @@
 In the replacement string, which is interpreted as a UTF string in UTF mode,
 and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
 dollar character is an escape character that can specify the insertion of
-characters from capturing groups in the pattern. The following forms are
-recognized:
+characters from capturing groups or (*MARK) items in the pattern. The following
+forms are recognized:
 .sp
-  $$      insert a dollar character
-  $<n>    insert the contents of group <n>
-  ${<n>}  insert the contents of group <n>
+  $$                  insert a dollar character
+  $<n> or ${<n>}      insert the contents of group <n>
+  $*MARK or ${*MARK}  insert the name of the last (*MARK) encountered 
 .sp
 Either a group number or a group name can be given for <n>. Curly brackets are
 required only if the following character would be interpreted as part of the
@@ -2629,6 +2629,13 @@
 calling \fBpcre2_copy_byname()\fP or \fBpcre2_copy_bynumber()\fP as
 appropriate.
 .P
+The facility for inserting a (*MARK) name can be used to perform simple 
+simultaneous substitutions, as this \fBpcre2test\fP example shows:
+.sp
+  /(*:pear)apple|(*:orange)lemon/g,replace=${*MARK}
+      apple lemon
+   2: pear orange
+.P
 The first seven arguments of \fBpcre2_substitute()\fP are the same as for
 \fBpcre2_match()\fP, except that the partial matching options are not
 permitted, and \fImatch_data\fP may be passed as NULL, in which case a match
@@ -2946,6 +2953,6 @@
 .rs
 .sp
 .nf
-Last updated: 18 August 2015
+Last updated: 29 August 2015
 Copyright (c) 1997-2015 University of Cambridge.
 .fi


Modified: code/trunk/src/pcre2_internal.h
===================================================================
--- code/trunk/src/pcre2_internal.h    2015-08-26 13:35:58 UTC (rev 356)
+++ code/trunk/src/pcre2_internal.h    2015-08-29 17:13:09 UTC (rev 357)
@@ -918,6 +918,7 @@
 #define STRING_NOTEMPTY_ATSTART_RIGHTPAR  "NOTEMPTY_ATSTART)"
 #define STRING_LIMIT_MATCH_EQ             "LIMIT_MATCH="
 #define STRING_LIMIT_RECURSION_EQ         "LIMIT_RECURSION="
+#define STRING_MARK                       "MARK"


#else /* SUPPORT_UNICODE */

@@ -1190,6 +1191,7 @@
 #define STRING_NOTEMPTY_ATSTART_RIGHTPAR  STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS
 #define STRING_LIMIT_MATCH_EQ             STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
 #define STRING_LIMIT_RECURSION_EQ         STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
+#define STRING_MARK                       STR_M STR_A STR_R STR_K


#endif /* SUPPORT_UNICODE */


Modified: code/trunk/src/pcre2_substitute.c
===================================================================
--- code/trunk/src/pcre2_substitute.c    2015-08-26 13:35:58 UTC (rev 356)
+++ code/trunk/src/pcre2_substitute.c    2015-08-29 17:13:09 UTC (rev 357)
@@ -205,6 +205,7 @@
       {
       int group, n;
       BOOL inparens;
+      BOOL star;
       PCRE2_SIZE sublength;
       PCRE2_UCHAR next;
       PCRE2_UCHAR name[33];
@@ -215,6 +216,7 @@
       group = -1;
       n = 0;
       inparens = FALSE;
+      star = FALSE;


       if (next == CHAR_LEFT_CURLY_BRACKET)
         {
@@ -223,8 +225,15 @@
         inparens = TRUE;
         }


-      if (next >= CHAR_0 && next <= CHAR_9)
+      if (next == CHAR_ASTERISK)
         {
+        if (++i == rlength) goto BAD;
+        next = replacement[i];
+        star = TRUE;
+        }
+
+      if (!star && next >= CHAR_0 && next <= CHAR_9)
+        {
         group = next - CHAR_0;
         while (++i < rlength)
           {
@@ -253,19 +262,42 @@
         }
       else i--;   /* Last code unit of name/number */


-      /* Have found a syntactically correct group number or name. */
+      /* Have found a syntactically correct group number or name, or
+      *name. Only *MARK is currently recognized. */


-      sublength = lengthleft;
-      if (group < 0)
-        rc = pcre2_substring_copy_byname(match_data, name,
-          buffer + buff_offset, &sublength);
+      if (star)
+        {
+        if (PRIV(strcmp_c8)(name, STRING_MARK) == 0)
+          {
+          PCRE2_SPTR mark = pcre2_get_mark(match_data);
+          if (mark != NULL)
+            {
+            while (*mark != 0)
+              {
+              if (lengthleft-- < 1) goto NOROOM;
+              buffer[buff_offset++] = *mark++;
+              }
+            }
+          }
+        else goto BAD;
+        }
+
+      /* Substitute the contents of a group. */
+
       else
-        rc = pcre2_substring_copy_bynumber(match_data, group,
-          buffer + buff_offset, &sublength);
+        {
+        sublength = lengthleft;
+        if (group < 0)
+          rc = pcre2_substring_copy_byname(match_data, name,
+            buffer + buff_offset, &sublength);
+        else
+          rc = pcre2_substring_copy_bynumber(match_data, group,
+            buffer + buff_offset, &sublength);
+        if (rc < 0) goto EXIT;


-      if (rc < 0) goto EXIT;
-      buff_offset += sublength;
-      lengthleft -= sublength;
+        buff_offset += sublength;
+        lengthleft -= sublength;
+        }
       }


    /* Handle a literal code unit */


Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2015-08-26 13:35:58 UTC (rev 356)
+++ code/trunk/testdata/testinput2    2015-08-29 17:13:09 UTC (rev 357)
@@ -4073,7 +4073,38 @@


 /(.)(.)/g,replace=$2$1
     abcdefgh  
+    
+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=${*MARK}
+    apple lemon blackberry
+    apple strudel
+    fruitless  


+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/replace=${*MARK}
+    apple lemon blackberry
+
+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARK>
+    apple lemon blackberry
+    apple strudel
+    fruitless  
+    
+/(*:pear)apple/g,replace=${*MARKING} 
+    apple lemon blackberry
+
+/(*:pear)apple/g,replace=${*MARK-time
+    apple lemon blackberry
+
+/(*:pear)apple/g,replace=${*mark} 
+    apple lemon blackberry
+
+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARKET>
+    apple lemon blackberry
+
+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[22]${*MARK}
+    apple lemon blackberry
+
+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[23]${*MARK}
+    apple lemon blackberry
+
 # End of substitute tests 


"((?=(?(?=(?(?=(?(?=()))))))))"

Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2015-08-26 13:35:58 UTC (rev 356)
+++ code/trunk/testdata/testoutput2    2015-08-29 17:13:09 UTC (rev 357)
@@ -13731,7 +13731,51 @@
 /(.)(.)/g,replace=$2$1
     abcdefgh  
  4: badcfehg
+    
+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=${*MARK}
+    apple lemon blackberry
+ 3: pear orange strawberry
+    apple strudel
+ 1: pear strudel
+    fruitless  
+ 0: fruitless


+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/replace=${*MARK}
+    apple lemon blackberry
+ 1: pear lemon blackberry
+
+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARK>
+    apple lemon blackberry
+ 3: <pear> <orange> <strawberry>
+    apple strudel
+ 1: <pear> strudel
+    fruitless  
+ 0: fruitless
+    
+/(*:pear)apple/g,replace=${*MARKING} 
+    apple lemon blackberry
+Failed: error -35: invalid replacement string
+
+/(*:pear)apple/g,replace=${*MARK-time
+    apple lemon blackberry
+Failed: error -35: invalid replacement string
+
+/(*:pear)apple/g,replace=${*mark} 
+    apple lemon blackberry
+Failed: error -35: invalid replacement string
+
+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARKET>
+    apple lemon blackberry
+Failed: error -35: invalid replacement string
+
+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[22]${*MARK}
+    apple lemon blackberry
+Failed: error -48: no more memory
+
+/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[23]${*MARK}
+    apple lemon blackberry
+ 3: pear orange strawberry
+
 # End of substitute tests 


"((?=(?(?=(?(?=(?(?=()))))))))"