Revision: 760
http://www.exim.org/viewvc/pcre2?view=rev&revision=760
Author: ph10
Date: 2017-04-18 17:21:50 +0100 (Tue, 18 Apr 2017)
Log Message:
-----------
Implement Perl's /n option, which is the same as PCRE2_NO_AUTO_CAPTURE.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/doc/pcre2api.3
code/trunk/doc/pcre2pattern.3
code/trunk/doc/pcre2syntax.3
code/trunk/doc/pcre2test.1
code/trunk/src/pcre2_compile.c
code/trunk/src/pcre2test.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2017-04-18 14:37:01 UTC (rev 759)
+++ code/trunk/ChangeLog 2017-04-18 16:21:50 UTC (rev 760)
@@ -151,7 +151,9 @@
29. Implemented PCRE2_EXTENDED_MORE and related /xx and (?xx) features.
+30. Implement (?n: for PCRE2_NO_AUTO_CAPTURE, because Perl now has this.
+
Version 10.23 14-February-2017
------------------------------
Modified: code/trunk/doc/pcre2api.3
===================================================================
--- code/trunk/doc/pcre2api.3 2017-04-18 14:37:01 UTC (rev 759)
+++ code/trunk/doc/pcre2api.3 2017-04-18 16:21:50 UTC (rev 760)
@@ -1,4 +1,4 @@
-.TH PCRE2API 3 "17 April 2017" "PCRE2 10.30"
+.TH PCRE2API 3 "18 April 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.sp
@@ -1426,8 +1426,8 @@
If this option is set, it disables the use of numbered capturing parentheses in
the pattern. Any opening parenthesis that is not followed by ? behaves as if it
were followed by ?: but named parentheses can still be used for capturing (and
-they acquire numbers in the usual way). There is no equivalent of this option
-in Perl. Note that, if this option is set, references to capturing groups (back
+they acquire numbers in the usual way). This is the same as Perl's /n option.
+Note that, when this option is set, references to capturing groups (back
references or recursion/subroutine calls) may only refer to named groups,
though the reference can be by name or by number.
.sp
@@ -3402,6 +3402,6 @@
.rs
.sp
.nf
-Last updated: 17 April 2017
+Last updated: 18 April 2017
Copyright (c) 1997-2017 University of Cambridge.
.fi
Modified: code/trunk/doc/pcre2pattern.3
===================================================================
--- code/trunk/doc/pcre2pattern.3 2017-04-18 14:37:01 UTC (rev 759)
+++ code/trunk/doc/pcre2pattern.3 2017-04-18 16:21:50 UTC (rev 760)
@@ -1543,12 +1543,13 @@
.rs
.sp
The settings of the PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL,
-PCRE2_EXTENDED, and PCRE2_EXTENDED_MORE options (which are Perl-compatible) can
-be changed from within the pattern by a sequence of Perl option letters
-enclosed between "(?" and ")". The option letters are
+PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE options (which
+are Perl-compatible) can be changed from within the pattern by a sequence of
+Perl option letters enclosed between "(?" and ")". The option letters are
.sp
i for PCRE2_CASELESS
m for PCRE2_MULTILINE
+ n for PCRE2_NO_AUTO_CAPTURE
s for PCRE2_DOTALL
x for PCRE2_EXTENDED
xx for PCRE2_EXTENDED_MORE
Modified: code/trunk/doc/pcre2syntax.3
===================================================================
--- code/trunk/doc/pcre2syntax.3 2017-04-18 14:37:01 UTC (rev 759)
+++ code/trunk/doc/pcre2syntax.3 2017-04-18 16:21:50 UTC (rev 760)
@@ -407,6 +407,7 @@
(?i) caseless
(?J) allow duplicate names
(?m) multiline
+ (?n) no auto capture
(?s) single line (dotall)
(?U) default ungreedy (lazy)
(?x) extended: ignore white space except in classes
Modified: code/trunk/doc/pcre2test.1
===================================================================
--- code/trunk/doc/pcre2test.1 2017-04-18 14:37:01 UTC (rev 759)
+++ code/trunk/doc/pcre2test.1 2017-04-18 16:21:50 UTC (rev 760)
@@ -1,4 +1,4 @@
-.TH PCRE2TEST 1 "17 April 2017" "PCRE 10.30"
+.TH PCRE2TEST 1 "18 April 2017" "PCRE 10.30"
.SH NAME
pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS
@@ -519,10 +519,11 @@
.SS "Setting compilation options"
.rs
.sp
-The following modifiers set options for \fBpcre2_compile()\fP. The most common
-ones have single-letter abbreviations, with special handling for /x (to make
-it like Perl). If a second x is present, PCRE2_EXTENDED is converted into
-PCRE2_EXTENDED_MORE. A third appearance adds PCRE2_EXTENDED as well. See
+The following modifiers set options for \fBpcre2_compile()\fP. There are some
+single-letter abbreviations that are the same as Perl options. There is special
+handling for /x: if a second x is present, PCRE2_EXTENDED is converted into
+PCRE2_EXTENDED_MORE as in Perl. A third appearance adds PCRE2_EXTENDED as well,
+though this makes no difference to the way \fBpcre2_compile()\fP behaves. See
.\" HREF
\fBpcre2api\fP
.\"
@@ -547,7 +548,7 @@
never_backslash_c set PCRE2_NEVER_BACKSLASH_C
never_ucp set PCRE2_NEVER_UCP
never_utf set PCRE2_NEVER_UTF
- no_auto_capture set PCRE2_NO_AUTO_CAPTURE
+ /n no_auto_capture set PCRE2_NO_AUTO_CAPTURE
no_auto_possess set PCRE2_NO_AUTO_POSSESS
no_dotstar_anchor set PCRE2_NO_DOTSTAR_ANCHOR
no_start_optimize set PCRE2_NO_START_OPTIMIZE
@@ -570,7 +571,8 @@
.rs
.sp
The following modifiers affect the compilation process or request information
-about the pattern:
+about the pattern. There are single-letter abbreviations for some that are
+heavily used in the test files.
.sp
bsr=[anycrlf|unicode] specify \eR handling
/B bincode show binary code without lengths
@@ -1786,6 +1788,6 @@
.rs
.sp
.nf
-Last updated: 17 April 2017
+Last updated: 18 April 2017
Copyright (c) 1997-2017 University of Cambridge.
.fi
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2017-04-18 14:37:01 UTC (rev 759)
+++ code/trunk/src/pcre2_compile.c 2017-04-18 16:21:50 UTC (rev 760)
@@ -2233,11 +2233,11 @@
#define NSF_RESET 0x0001u
#define NSF_CONDASSERT 0x0002u
-/* These options (changeable within the pattern) are tracked during parsing.
-The rest are put into META_OPTIONS items and used when compiling. */
+/* Of the options that are changeable within the pattern, these are tracked
+during parsing. The rest are used from META_OPTIONS items when compiling. */
#define PARSE_TRACKED_OPTIONS \
- (PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_DUPNAMES)
+ (PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_NO_AUTO_CAPTURE)
/* States used for analyzing ranges in character classes. The two OK values
must be last. */
@@ -3422,9 +3422,7 @@
ptr++;
}
- /* Scan for options imsxJU. Some of them are tracked during parsing (see
- PARSE_TRACKED_OPTIONS) as they are local to groups. Others are not needed
- till compile time. */
+ /* Scan for options imnsxJU to be set or unset. */
else
{
@@ -3447,6 +3445,7 @@
case CHAR_i: *optset |= PCRE2_CASELESS; break;
case CHAR_m: *optset |= PCRE2_MULTILINE; break;
+ case CHAR_n: *optset |= PCRE2_NO_AUTO_CAPTURE; break;
case CHAR_s: *optset |= PCRE2_DOTALL; break;
case CHAR_U: *optset |= PCRE2_UNGREEDY; break;
Modified: code/trunk/src/pcre2test.c
===================================================================
--- code/trunk/src/pcre2test.c 2017-04-18 14:37:01 UTC (rev 759)
+++ code/trunk/src/pcre2test.c 2017-04-18 16:21:50 UTC (rev 760)
@@ -720,13 +720,14 @@
} c1modstruct;
static c1modstruct c1modlist[] = {
- { "bincode", 'B', -1 },
- { "info", 'I', -1 },
- { "global", 'g', -1 },
- { "caseless", 'i', -1 },
- { "multiline", 'm', -1 },
- { "dotall", 's', -1 },
- { "extended", 'x', -1 }
+ { "bincode", 'B', -1 },
+ { "info", 'I', -1 },
+ { "global", 'g', -1 },
+ { "caseless", 'i', -1 },
+ { "multiline", 'm', -1 },
+ { "no_auto_capture", 'n', -1 },
+ { "dotall", 's', -1 },
+ { "extended", 'x', -1 }
};
#define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct)
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2017-04-18 14:37:01 UTC (rev 759)
+++ code/trunk/testdata/testinput2 2017-04-18 16:21:50 UTC (rev 760)
@@ -5259,4 +5259,6 @@
/[a b](?xx: [ 12 ] (?-x:[ 34 ]) )y z/B
+/(a)(?-n:(b))(c)/nB
+
# End of testinput2
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2017-04-18 14:37:01 UTC (rev 759)
+++ code/trunk/testdata/testoutput2 2017-04-18 16:21:50 UTC (rev 760)
@@ -15945,6 +15945,24 @@
End
------------------------------------------------------------------
+/(a)(?-n:(b))(c)/nB
+------------------------------------------------------------------
+ Bra
+ Bra
+ a
+ Ket
+ Bra
+ CBra 1
+ b
+ Ket
+ Ket
+ Bra
+ c
+ Ket
+ Ket
+ End
+------------------------------------------------------------------
+
# End of testinput2
Error -64: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data