Revision: 184
http://www.exim.org/viewvc/pcre2?view=rev&revision=184
Author: ph10
Date: 2015-01-13 16:01:24 +0000 (Tue, 13 Jan 2015)
Log Message:
-----------
Fix conditional group backreference bug.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/configure.ac
code/trunk/doc/pcre2api.3
code/trunk/src/pcre2_compile.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-01-05 16:11:28 UTC (rev 183)
+++ code/trunk/ChangeLog 2015-01-13 16:01:24 UTC (rev 184)
@@ -1,9 +1,25 @@
Change Log for PCRE2
--------------------
-Version 10.00 05-January-2015
+Version 10.10 13-January-2015
-----------------------------
+1. When a pattern is compiled, it remembers the highest back reference so that
+when matching, if the ovector is too small, extra memory can be obtained to
+use instead. A conditional subpattern whose condition is a check on a capture
+having happened, such as, for example in the pattern /^(?:(a)|b)(?(1)A|B)/, is
+another kind of back reference, but it was not setting the highest
+backreference number. This mattered only if pcre2_match() was called with an
+ovector that was too small to hold the capture, and there was no other kind of
+back reference (a situation which is probably quite rare). The effect of the
+bug was that the condition was always treated as FALSE when the capture could
+not be consulted, leading to a incorrect behaviour by pcre2_match(). This bug
+has been fixed.
+
+
+Version 10.00 05-January-2015
+-----------------------------
+
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
library. Changes prior to 10.00 are logged in the ChangeLog file for the old
API, up to item 20 for release 8.36.
Modified: code/trunk/configure.ac
===================================================================
--- code/trunk/configure.ac 2015-01-05 16:11:28 UTC (rev 183)
+++ code/trunk/configure.ac 2015-01-13 16:01:24 UTC (rev 184)
@@ -9,9 +9,9 @@
dnl be defined as -RC2, for example. For real releases, it should be empty.
m4_define(pcre2_major, [10])
-m4_define(pcre2_minor, [00])
-m4_define(pcre2_prerelease, [])
-m4_define(pcre2_date, [2014-01-05])
+m4_define(pcre2_minor, [10])
+m4_define(pcre2_prerelease, [-RC1])
+m4_define(pcre2_date, [2014-01-13])
# NOTE: The CMakeLists.txt file searches for the above variables in the first
# 50 lines of this file. Please update that if the variables above are moved.
Modified: code/trunk/doc/pcre2api.3
===================================================================
--- code/trunk/doc/pcre2api.3 2015-01-05 16:11:28 UTC (rev 183)
+++ code/trunk/doc/pcre2api.3 2015-01-13 16:01:24 UTC (rev 184)
@@ -1,4 +1,4 @@
-.TH PCRE2API 3 "02 January 2015" "PCRE2 10.00"
+.TH PCRE2API 3 "13 January 2015" "PCRE2 10.10"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.sp
@@ -1474,8 +1474,12 @@
PCRE2_INFO_BACKREFMAX
.sp
Return the number of the highest back reference in the pattern. The third
-argument should point to an \fBuint32_t\fP variable. Zero is returned if there
-are no back references.
+argument should point to an \fBuint32_t\fP variable. Named subpatterns acquire
+numbers as well as names, and these count towards the highest back reference.
+Back references such as \e4 or \eg{12} match the captured characters of the
+given group, but in addition, the check that a capturing group is set in a
+conditional subpattern such as (?(3)a|b) is also a back reference. Zero is
+returned if there are no back references.
.sp
PCRE2_INFO_BSR
.sp
@@ -2849,6 +2853,6 @@
.rs
.sp
.nf
-Last updated: 02 January 2015
+Last updated: 13 January 2015
Copyright (c) 1997-2015 University of Cambridge.
.fi
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2015-01-05 16:11:28 UTC (rev 183)
+++ code/trunk/src/pcre2_compile.c 2015-01-13 16:01:24 UTC (rev 184)
@@ -5336,6 +5336,7 @@
goto FAILED;
}
PUT2(code, 2+LINK_SIZE, recno);
+ if (recno > cb->top_backref) cb->top_backref = recno;
break;
}
@@ -5355,15 +5356,18 @@
if (i < cb->names_found)
{
- int offset = i++;
- int count = 1;
- recno = GET2(slot, 0); /* Number from first found */
- for (; i < cb->names_found; i++)
+ int offset = i; /* Offset of first name found */
+ int count = 0;
+
+ for (;;)
{
+ recno = GET2(slot, 0); /* Number for last found */
+ if (recno > cb->top_backref) cb->top_backref = recno;
+ count++;
+ if (++i >= cb->names_found) break;
slot += cb->name_entry_size;
if (PRIV(strncmp)(name, slot+IMM2_SIZE, namelen) != 0 ||
(slot+IMM2_SIZE)[namelen] != 0) break;
- count++;
}
if (count > 1)
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2015-01-05 16:11:28 UTC (rev 183)
+++ code/trunk/testdata/testinput2 2015-01-13 16:01:24 UTC (rev 184)
@@ -4116,4 +4116,22 @@
/(*NO_DOTSTAR_ANCHOR)(?s).*\d/info
+'^(?:(a)|b)(?(1)A|B)'
+ aA123\=ovector=1
+ aA123\=ovector=2
+
+'^(?:(?<AA>a)|b)(?(<AA>)A|B)'
+ aA123\=ovector=1
+ aA123\=ovector=2
+
+'^(?<AA>)(?:(?<AA>a)|b)(?(<AA>)A|B)'dupnames
+ aA123\=ovector=1
+ aA123\=ovector=2
+ aA123\=ovector=3
+
+'^(?:(?<AA>X)|)(?:(?<AA>a)|b)\k{AA}'dupnames
+ aa123\=ovector=1
+ aa123\=ovector=2
+ aa123\=ovector=3
+
# End of testinput2
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2015-01-05 16:11:28 UTC (rev 183)
+++ code/trunk/testdata/testoutput2 2015-01-13 16:01:24 UTC (rev 184)
@@ -1535,28 +1535,33 @@
/a(?(1)b)(.)/I
Capturing subpattern count = 1
+Max back reference = 1
First code unit = 'a'
Subject length lower bound = 2
/a(?(1)bag|big)(.)/I
Capturing subpattern count = 1
+Max back reference = 1
First code unit = 'a'
Last code unit = 'g'
Subject length lower bound = 5
/a(?(1)bag|big)*(.)/I
Capturing subpattern count = 1
+Max back reference = 1
First code unit = 'a'
Subject length lower bound = 2
/a(?(1)bag|big)+(.)/I
Capturing subpattern count = 1
+Max back reference = 1
First code unit = 'a'
Last code unit = 'g'
Subject length lower bound = 5
/a(?(1)b..|b..)(.)/I
Capturing subpattern count = 1
+Max back reference = 1
First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 5
@@ -3345,21 +3350,25 @@
/(?(1)ab|ac)(.)/I
Capturing subpattern count = 1
+Max back reference = 1
First code unit = 'a'
Subject length lower bound = 3
/(?(1)abz|acz)(.)/I
Capturing subpattern count = 1
+Max back reference = 1
First code unit = 'a'
Last code unit = 'z'
Subject length lower bound = 4
/(?(1)abz)(.)/I
Capturing subpattern count = 1
+Max back reference = 1
Subject length lower bound = 1
/(?(1)abz)(1)23/I
Capturing subpattern count = 1
+Max back reference = 1
Last code unit = '3'
Subject length lower bound = 3
@@ -5923,6 +5932,7 @@
/^(?P<A>a)?(?(A)a|b)/I
Capturing subpattern count = 1
+Max back reference = 1
Named capturing subpatterns:
A 1
Compile options: <none>
@@ -5940,6 +5950,7 @@
/(?:(?(ZZ)a|b)(?P<ZZ>X))+/I
Capturing subpattern count = 1
+Max back reference = 1
Named capturing subpatterns:
ZZ 1
Last code unit = 'X'
@@ -5956,6 +5967,7 @@
/(?:(?(ZZ)a|b)(?(ZZ)a|b)(?P<ZZ>X))+/I
Capturing subpattern count = 1
+Max back reference = 1
Named capturing subpatterns:
ZZ 1
Last code unit = 'X'
@@ -5966,6 +5978,7 @@
/(?:(?(ZZ)a|\(b\))\\(?P<ZZ>X))+/I
Capturing subpattern count = 1
+Max back reference = 1
Named capturing subpatterns:
ZZ 1
Last code unit = 'X'
@@ -9713,6 +9726,7 @@
(?(1)|.) # check that there was an empty component
/Iix
Capturing subpattern count = 1
+Max back reference = 1
Compile options: caseless extended
Overall options: anchored caseless extended
Last code unit = ':'
@@ -9740,6 +9754,7 @@
b(?<quote> (?<apostrophe>')|(?<realquote>")) )
(?('quote')[a-z]+|[0-9]+)/Ix,dupnames
Capturing subpattern count = 6
+Max back reference = 4
Named capturing subpatterns:
apostrophe 2
apostrophe 5
@@ -9802,6 +9817,7 @@
End
------------------------------------------------------------------
Capturing subpattern count = 4
+Max back reference = 4
Named capturing subpatterns:
D 4
D 1
@@ -9849,6 +9865,7 @@
End
------------------------------------------------------------------
Capturing subpattern count = 4
+Max back reference = 4
Named capturing subpatterns:
A 1
A 4
@@ -9964,6 +9981,7 @@
/()i(?(1)a)/I
Capturing subpattern count = 1
+Max back reference = 1
First code unit = 'i'
Subject length lower bound = 1
ia
@@ -13540,6 +13558,7 @@
/(?:(?<VERSION>abc)|xyz)(?(VERSION)yes|no)/I
Capturing subpattern count = 1
+Max back reference = 1
Named capturing subpatterns:
VERSION 1
Starting code units: a x
@@ -13832,4 +13851,46 @@
Overall options: dotall no_dotstar_anchor
Subject length lower bound = 1
+'^(?:(a)|b)(?(1)A|B)'
+ aA123\=ovector=1
+Matched, but too many substrings
+ 0: aA
+ aA123\=ovector=2
+ 0: aA
+ 1: a
+
+'^(?:(?<AA>a)|b)(?(<AA>)A|B)'
+ aA123\=ovector=1
+Matched, but too many substrings
+ 0: aA
+ aA123\=ovector=2
+ 0: aA
+ 1: a
+
+'^(?<AA>)(?:(?<AA>a)|b)(?(<AA>)A|B)'dupnames
+ aA123\=ovector=1
+Matched, but too many substrings
+ 0: aA
+ aA123\=ovector=2
+Matched, but too many substrings
+ 0: aA
+ 1:
+ aA123\=ovector=3
+ 0: aA
+ 1:
+ 2: a
+
+'^(?:(?<AA>X)|)(?:(?<AA>a)|b)\k{AA}'dupnames
+ aa123\=ovector=1
+Matched, but too many substrings
+ 0: aa
+ aa123\=ovector=2
+Matched, but too many substrings
+ 0: aa
+ 1: <unset>
+ aa123\=ovector=3
+ 0: aa
+ 1: <unset>
+ 2: a
+
# End of testinput2