Revision: 409
http://www.exim.org/viewvc/pcre2?view=rev&revision=409
Author: ph10
Date: 2015-11-03 17:38:00 +0000 (Tue, 03 Nov 2015)
Log Message:
-----------
Forbid \K patterns that end before they start in pcre2_substitute().
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/doc/pcre2api.3
code/trunk/src/pcre2.h
code/trunk/src/pcre2.h.in
code/trunk/src/pcre2_error.c
code/trunk/src/pcre2_substitute.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-11-03 10:39:27 UTC (rev 408)
+++ code/trunk/ChangeLog 2015-11-03 17:38:00 UTC (rev 409)
@@ -257,7 +257,10 @@
74. Give an error if a lookbehind assertion is longer than 65535 code units.
+75. Give an error in pcre2_substitute() if a match ends before it starts (as a
+result of the use of \K).
+
Version 10.20 30-June-2015
--------------------------
Modified: code/trunk/doc/pcre2api.3
===================================================================
--- code/trunk/doc/pcre2api.3 2015-11-03 10:39:27 UTC (rev 408)
+++ code/trunk/doc/pcre2api.3 2015-11-03 17:38:00 UTC (rev 409)
@@ -1,4 +1,4 @@
-.TH PCRE2API 3 "16 October 2015" "PCRE2 10.21"
+.TH PCRE2API 3 "03 November 2015" "PCRE2 10.21"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.sp
@@ -2666,7 +2666,9 @@
This function calls \fBpcre2_match()\fP and then makes a copy of the subject
string in \fIoutputbuffer\fP, replacing the part that was matched with the
\fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This can
-be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
+be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in
+which a \eK item in a lookahead in the pattern causes the match to end before
+it starts are not supported, and give rise to an error return.
.P
The first seven arguments of \fBpcre2_substitute()\fP are the same as for
\fBpcre2_match()\fP, except that the partial matching options are not
@@ -2769,8 +2771,9 @@
is not big enough. PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax
errors in the replacement string, with more particular errors being
PCRE2_ERROR_BADREPESCAPE (invalid escape sequence),
-PCRE2_ERROR_REPMISSING_BRACE (closing curly bracket not found), and
-PCRE2_BADSUBSTITUTION (syntax error in extended group substitution). As for all
+PCRE2_ERROR_REPMISSING_BRACE (closing curly bracket not found),
+PCRE2_BADSUBSTITUTION (syntax error in extended group substitution), and
+PCRE2_BADSUBPATTERN (the pattern match ended before it started). As for all
PCRE2 errors, a text message that describes the error can be obtained by
calling \fBpcre2_get_error_message()\fP.
.
@@ -3066,6 +3069,6 @@
.rs
.sp
.nf
-Last updated: 16 October 2015
+Last updated: 03 November 2015
Copyright (c) 1997-2015 University of Cambridge.
.fi
Modified: code/trunk/src/pcre2.h
===================================================================
--- code/trunk/src/pcre2.h 2015-11-03 10:39:27 UTC (rev 408)
+++ code/trunk/src/pcre2.h 2015-11-03 17:38:00 UTC (rev 409)
@@ -240,6 +240,7 @@
#define PCRE2_ERROR_BADREPESCAPE (-57)
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
+#define PCRE2_ERROR_BADSUBSPATTERN (-60)
/* Request types for pcre2_pattern_info() */
Modified: code/trunk/src/pcre2.h.in
===================================================================
--- code/trunk/src/pcre2.h.in 2015-11-03 10:39:27 UTC (rev 408)
+++ code/trunk/src/pcre2.h.in 2015-11-03 17:38:00 UTC (rev 409)
@@ -240,6 +240,7 @@
#define PCRE2_ERROR_BADREPESCAPE (-57)
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
+#define PCRE2_ERROR_BADSUBSPATTERN (-60)
/* Request types for pcre2_pattern_info() */
Modified: code/trunk/src/pcre2_error.c
===================================================================
--- code/trunk/src/pcre2_error.c 2015-11-03 10:39:27 UTC (rev 408)
+++ code/trunk/src/pcre2_error.c 2015-11-03 17:38:00 UTC (rev 409)
@@ -170,8 +170,8 @@
"(?| and/or (?J: or (?x: parentheses are too deeply nested\0"
/* 85 */
"using \\C is disabled in this PCRE2 library\0"
- "regular expression is too complicated\0"
- "lookbehind assertion is too long\0"
+ "regular expression is too complicated\0"
+ "lookbehind assertion is too long\0"
;
/* Match-time and UTF error texts are in the same format. */
@@ -247,7 +247,9 @@
"offset limit set without PCRE2_USE_OFFSET_LIMIT\0"
"bad escape sequence in replacement string\0"
"expected closing curly bracket in replacement string\0"
- "bad substitution in replacement string\0"
+ "bad substitution in replacement string\0"
+ /* 60 */
+ "match with end before start is not supported\0"
;
Modified: code/trunk/src/pcre2_substitute.c
===================================================================
--- code/trunk/src/pcre2_substitute.c 2015-11-03 10:39:27 UTC (rev 408)
+++ code/trunk/src/pcre2_substitute.c 2015-11-03 17:38:00 UTC (rev 409)
@@ -55,7 +55,7 @@
/* In extended mode, we recognize ${name:+set text:unset text} and similar
constructions. This requires the identification of unescaped : and }
characters. This function scans for such. It must deal with nested ${
-constructions. The pointer to the text is updated, either to the required end
+constructions. The pointer to the text is updated, either to the required end
character, or to where an error was detected.
Arguments:
@@ -107,7 +107,7 @@
else if (*ptr == CHAR_BACKSLASH)
{
- int erc;
+ int erc;
int errorcode = 0;
uint32_t ch;
@@ -279,10 +279,10 @@
rc = pcre2_match(code, subject, length, start_offset, options|goptions,
match_data, mcontext);
-
+
#ifdef SUPPORT_UNICODE
if (utf) options |= PCRE2_NO_UTF_CHECK; /* Only need to check once */
-#endif
+#endif
/* Any error other than no match returns the error code. No match when not
doing the special after-empty-match global rematch, or when at the end of the
@@ -320,8 +320,15 @@
continue;
}
- /* Handle a successful match. */
+ /* Handle a successful match. Matches that use \K to end before they start
+ are not supported. */
+ if (ovector[1] < ovector[0])
+ {
+ rc = PCRE2_ERROR_BADSUBSPATTERN;
+ goto EXIT;
+ }
+
subs++;
if (rc == 0) rc = ovector_count;
fraglength = ovector[0] - start_offset;
@@ -409,14 +416,14 @@
next = *ptr;
if (next < CHAR_0 || next > CHAR_9) break;
group = group * 10 + next - CHAR_0;
-
+
/* A check for a number greater than the hightest captured group
is sufficient here; no need for a separate overflow check. */
-
+
if (group > code->top_bracket)
{
rc = PCRE2_ERROR_NOSUBSTRING;
- goto PTREXIT;
+ goto PTREXIT;
}
}
}
@@ -439,7 +446,7 @@
if (inparens)
{
-
+
if (extended && !star && ptr < repend - 2 && next == CHAR_COLON)
{
special = *(++ptr);
@@ -501,8 +508,8 @@
else
{
PCRE2_SPTR subptr, subptrend;
-
- /* Find a number for a named group. In case there are duplicate names,
+
+ /* Find a number for a named group. In case there are duplicate names,
search for the first one that is set. */
if (group < 0)
@@ -516,18 +523,18 @@
if (ng < ovector_count)
{
if (group < 0) group = ng; /* First in ovector */
- if (ovector[ng*2] != PCRE2_UNSET)
+ if (ovector[ng*2] != PCRE2_UNSET)
{
group = ng; /* First that is set */
break;
- }
+ }
}
}
-
- /* If group is still negative, it means we did not find a group that
+
+ /* If group is still negative, it means we did not find a group that
is in the ovector. Just set the first group. */
-
- if (group < 0) group = GET2(first, 0);
+
+ if (group < 0) group = GET2(first, 0);
}
rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2015-11-03 10:39:27 UTC (rev 408)
+++ code/trunk/testdata/testinput2 2015-11-03 17:38:00 UTC (rev 409)
@@ -4596,4 +4596,7 @@
/(?<!a{65535})x/I
+/(?=a\K)/replace=z
+ BaCaD
+
# End of testinput2
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2015-11-03 10:39:27 UTC (rev 408)
+++ code/trunk/testdata/testoutput2 2015-11-03 17:38:00 UTC (rev 409)
@@ -14690,4 +14690,8 @@
First code unit = 'x'
Subject length lower bound = 1
+/(?=a\K)/replace=z
+ BaCaD
+Failed: error -60: match with end before start is not supported
+
# End of testinput2