Revision: 1266
http://vcs.pcre.org/viewvc?view=rev&revision=1266
Author: ph10
Date: 2013-03-03 11:14:26 +0000 (Sun, 03 Mar 2013)
Log Message:
-----------
Allow callout before assertion condition in a conditional group.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/doc/pcrecallout.3
code/trunk/doc/pcrepattern.3
code/trunk/pcre_compile.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2013-03-03 10:42:46 UTC (rev 1265)
+++ code/trunk/ChangeLog 2013-03-03 11:14:26 UTC (rev 1266)
@@ -89,6 +89,10 @@
there was a conditional group that depended on an assertion, if the
assertion was false, the callout that immediately followed the alternation
in the condition was skipped when pcre_exec() was used for matching.
+
+23. Allow an explicit callout to be inserted before an assertion that is the
+ condition for a conditional group, for compatibility with automatic
+ callouts, which always insert a callout at this point.
Version 8.32 30-November-2012
Modified: code/trunk/doc/pcrecallout.3
===================================================================
--- code/trunk/doc/pcrecallout.3 2013-03-03 10:42:46 UTC (rev 1265)
+++ code/trunk/doc/pcrecallout.3 2013-03-03 11:14:26 UTC (rev 1266)
@@ -1,4 +1,4 @@
-.TH PCRECALLOUT 3 "13 January 2013" "PCRE 8.33"
+.TH PCRECALLOUT 3 "03 March 2013" "PCRE 8.33"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -41,8 +41,17 @@
(?C255)A(?C255)((?C255)\ed{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
.sp
Notice that there is a callout before and after each parenthesis and
-alternation bar. Automatic callouts can be used for tracking the progress of
-pattern matching. The
+alternation bar. If the pattern contains a conditional group whose condition is
+an assertion, an automatic callout is inserted immediately before the
+condition. Such a callout may also be inserted explicitly, for example:
+.sp
+ (?(?C9)(?=a)ab|de)
+.sp
+This applies only to assertion conditions (because they are themselves
+independent groups).
+.P
+Automatic callouts can be used for tracking the progress of pattern matching.
+The
.\" HREF
\fBpcretest\fP
.\"
@@ -115,10 +124,10 @@
.P
The \fIoffset_vector\fP field is a pointer to the vector of offsets that was
passed by the caller to the matching function. When \fBpcre_exec()\fP or
-\fBpcre[16|32]_exec()\fP is used, the contents can be inspected, in order to extract
-substrings that have been matched so far, in the same way as for extracting
-substrings after a match has completed. For the DFA matching functions, this
-field is not useful.
+\fBpcre[16|32]_exec()\fP is used, the contents can be inspected, in order to
+extract substrings that have been matched so far, in the same way as for
+extracting substrings after a match has completed. For the DFA matching
+functions, this field is not useful.
.P
The \fIsubject\fP and \fIsubject_length\fP fields contain copies of the values
that were passed to the matching function.
@@ -171,11 +180,12 @@
same callout number. However, they are set for all callouts.
.P
The \fImark\fP field is present from version 2 of the callout structure. In
-callouts from \fBpcre_exec()\fP or \fBpcre[16|32]_exec()\fP it contains a pointer to
-the zero-terminated name of the most recently passed (*MARK), (*PRUNE), or
-(*THEN) item in the match, or NULL if no such items have been passed. Instances
-of (*PRUNE) or (*THEN) without a name do not obliterate a previous (*MARK). In
-callouts from the DFA matching functions this field always contains NULL.
+callouts from \fBpcre_exec()\fP or \fBpcre[16|32]_exec()\fP it contains a
+pointer to the zero-terminated name of the most recently passed (*MARK),
+(*PRUNE), or (*THEN) item in the match, or NULL if no such items have been
+passed. Instances of (*PRUNE) or (*THEN) without a name do not obliterate a
+previous (*MARK). In callouts from the DFA matching functions this field always
+contains NULL.
.
.
.SH "RETURN VALUES"
@@ -207,6 +217,6 @@
.rs
.sp
.nf
-Last updated: 13 January 2013
+Last updated: 03 March 2013
Copyright (c) 1997-2013 University of Cambridge.
.fi
Modified: code/trunk/doc/pcrepattern.3
===================================================================
--- code/trunk/doc/pcrepattern.3 2013-03-03 10:42:46 UTC (rev 1265)
+++ code/trunk/doc/pcrepattern.3 2013-03-03 11:14:26 UTC (rev 1266)
@@ -1,4 +1,4 @@
-.TH PCREPATTERN 3 "27 February 2013" "PCRE 8.33"
+.TH PCREPATTERN 3 "03 March 2013" "PCRE 8.33"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "PCRE REGULAR EXPRESSION DETAILS"
@@ -2640,7 +2640,14 @@
.sp
If the PCRE_AUTO_CALLOUT flag is passed to a compiling function, callouts are
automatically installed before each item in the pattern. They are all numbered
-255.
+255. If there is a conditional group in the pattern whose condition is an
+assertion, an additional callout is inserted just before the condition. An
+explicit callout may also be set at this position, as in this example:
+.sp
+ (?(?C9)(?=a)abc|def)
+.sp
+Note that this applies only to assertion conditions, not to other types of
+condition.
.P
During matching, when PCRE reaches a callout point, the external function is
called. It is provided with the number of the callout, the position in the
@@ -2989,6 +2996,6 @@
.rs
.sp
.nf
-Last updated: 27 February 2013
+Last updated: 03 March 2013
Copyright (c) 1997-2013 University of Cambridge.
.fi
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2013-03-03 10:42:46 UTC (rev 1265)
+++ code/trunk/pcre_compile.c 2013-03-03 11:14:26 UTC (rev 1266)
@@ -5729,6 +5729,7 @@
/* ------------------------------------------------------------ */
case CHAR_LEFT_PARENTHESIS:
bravalue = OP_COND; /* Conditional group */
+ tempptr = ptr;
/* A condition can be an assertion, a number (referring to a numbered
group), a name (referring to a named group), or 'R', referring to
@@ -5741,14 +5742,28 @@
be the recursive thing or the name 'R' (and similarly for 'R' followed
by digits), and (b) a number could be a name that consists of digits.
In both cases, we look for a name first; if not found, we try the other
- cases. */
+ cases.
+
+ For compatibility with auto-callouts, we allow a callout to be
+ specified before a condition that is an assertion. First, check for the
+ syntax of a callout; if found, adjust the temporary pointer that is
+ used to check for an assertion condition. That's all that is needed! */
+
+ if (ptr[1] == CHAR_QUESTION_MARK && ptr[2] == CHAR_C)
+ {
+ for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break;
+ if (ptr[i] == CHAR_RIGHT_PARENTHESIS)
+ tempptr += i + 1;
+ }
/* For conditions that are assertions, check the syntax, and then exit
the switch. This will take control down to where bracketed groups,
including assertions, are processed. */
- if (ptr[1] == CHAR_QUESTION_MARK && (ptr[2] == CHAR_EQUALS_SIGN ||
- ptr[2] == CHAR_EXCLAMATION_MARK || ptr[2] == CHAR_LESS_THAN_SIGN))
+ if (tempptr[1] == CHAR_QUESTION_MARK &&
+ (tempptr[2] == CHAR_EQUALS_SIGN ||
+ tempptr[2] == CHAR_EXCLAMATION_MARK ||
+ tempptr[2] == CHAR_LESS_THAN_SIGN))
break;
/* Most other conditions use OP_CREF (a couple change to OP_RREF
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2013-03-03 10:42:46 UTC (rev 1265)
+++ code/trunk/testdata/testinput2 2013-03-03 11:14:26 UTC (rev 1266)
@@ -3845,4 +3845,10 @@
xxxx123a\P\P
xxxx123a\P
+/^(?(?=a)aa|bb)/C
+ bb
+
+/(?C1)^(?C2)(?(?C99)(?=(?C3)a(?C4))(?C5)a(?C6)a(?C7)|(?C8)b(?C9)b(?C10))(?C11)/
+ bb
+
/-- End of testinput2 --/
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2013-03-03 10:42:46 UTC (rev 1265)
+++ code/trunk/testdata/testoutput2 2013-03-03 11:14:26 UTC (rev 1266)
@@ -12616,4 +12616,30 @@
xxxx123a\P
Partial match: 123a
+/^(?(?=a)aa|bb)/C
+ bb
+--->bb
+ +0 ^ ^
+ +1 ^ (?(?=a)aa|bb)
+ +3 ^ (?=a)
+ +6 ^ a
++11 ^ b
++12 ^^ b
++13 ^ ^ )
++14 ^ ^
+ 0: bb
+
+/(?C1)^(?C2)(?(?C99)(?=(?C3)a(?C4))(?C5)a(?C6)a(?C7)|(?C8)b(?C9)b(?C10))(?C11)/
+ bb
+--->bb
+ 1 ^ ^
+ 2 ^ (?(?C99)(?=(?C3)a(?C4))(?C5)a(?C6)a(?C7)|(?C8)b(?C9)b(?C10))
+ 99 ^ (?=(?C3)a(?C4))
+ 3 ^ a
+ 8 ^ b
+ 9 ^^ b
+ 10 ^ ^ )
+ 11 ^ ^
+ 0: bb
+
/-- End of testinput2 --/