Revision: 1520
http://vcs.pcre.org/viewvc?view=rev&revision=1520
Author: ph10
Date: 2015-02-08 16:29:23 +0000 (Sun, 08 Feb 2015)
Log Message:
-----------
Fix conditional test not setting highest back reference.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_compile.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-02-06 17:55:54 UTC (rev 1519)
+++ code/trunk/ChangeLog 2015-02-08 16:29:23 UTC (rev 1520)
@@ -31,7 +31,19 @@
7. A UTF pattern containing a "not" match of a non-ASCII character and a
subroutine reference could loop at compile time. Example: /[^\xff]((?1))/.
+8. When a pattern is compiled, it remembers the highest back reference so that
+ when matching, if the ovector is too small, extra memory can be obtained to
+ use instead. A conditional subpattern whose condition is a check on a
+ capture having happened, such as, for example in the pattern
+ /^(?:(a)|b)(?(1)A|B)/, is another kind of back reference, but it was not
+ setting the highest backreference number. This mattered only if pcre_exec()
+ was called with an ovector that was too small to hold the capture, and there
+ was no other kind of back reference (a situation which is probably quite
+ rare). The effect of the bug was that the condition was always treated as
+ FALSE when the capture could not be consulted, leading to a incorrect
+ behaviour by pcre2_match(). This bug has been fixed.
+
Version 8.36 26-September-2014
------------------------------
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2015-02-06 17:55:54 UTC (rev 1519)
+++ code/trunk/pcre_compile.c 2015-02-08 16:29:23 UTC (rev 1520)
@@ -6771,6 +6771,7 @@
goto FAILED;
}
PUT2(code, 2+LINK_SIZE, recno);
+ if (recno > cd->top_backref) cd->top_backref = recno;
break;
}
@@ -6793,6 +6794,7 @@
int offset = i++;
int count = 1;
recno = GET2(slot, 0); /* Number from first found */
+ if (recno > cd->top_backref) cd->top_backref = recno;
for (; i < cd->names_found; i++)
{
slot += cd->name_entry_size;
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2015-02-06 17:55:54 UTC (rev 1519)
+++ code/trunk/testdata/testinput2 2015-02-08 16:29:23 UTC (rev 1520)
@@ -4084,4 +4084,20 @@
"(?(?=)?==)(((((((((?=)))))))))"
a
+/^(?:(a)|b)(?(1)A|B)/I
+ aA123\O3
+ aA123\O6
+
+'^(?:(?<AA>a)|b)(?(<AA>)A|B)'
+ aA123\O3
+ aA123\O6
+
+'^(?<AA>)(?:(?<AA>a)|b)(?(<AA>)A|B)'J
+ aA123\O3
+ aA123\O6
+
+'^(?:(?<AA>X)|)(?:(?<AA>a)|b)\k{AA}'J
+ aa123\O3
+ aa123\O6
+
/-- End of testinput2 --/
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2015-02-06 17:55:54 UTC (rev 1519)
+++ code/trunk/testdata/testoutput2 2015-02-08 16:29:23 UTC (rev 1520)
@@ -1566,30 +1566,35 @@
/a(?(1)b)(.)/I
Capturing subpattern count = 1
+Max back reference = 1
No options
First char = 'a'
No need char
/a(?(1)bag|big)(.)/I
Capturing subpattern count = 1
+Max back reference = 1
No options
First char = 'a'
Need char = 'g'
/a(?(1)bag|big)*(.)/I
Capturing subpattern count = 1
+Max back reference = 1
No options
First char = 'a'
No need char
/a(?(1)bag|big)+(.)/I
Capturing subpattern count = 1
+Max back reference = 1
No options
First char = 'a'
Need char = 'g'
/a(?(1)b..|b..)(.)/I
Capturing subpattern count = 1
+Max back reference = 1
No options
First char = 'a'
Need char = 'b'
@@ -3379,24 +3384,28 @@
/(?(1)ab|ac)(.)/I
Capturing subpattern count = 1
+Max back reference = 1
No options
First char = 'a'
No need char
/(?(1)abz|acz)(.)/I
Capturing subpattern count = 1
+Max back reference = 1
No options
First char = 'a'
Need char = 'z'
/(?(1)abz)(.)/I
Capturing subpattern count = 1
+Max back reference = 1
No options
No first char
No need char
/(?(1)abz)(1)23/I
Capturing subpattern count = 1
+Max back reference = 1
No options
No first char
Need char = '3'
@@ -6336,6 +6345,7 @@
/^(?P<A>a)?(?(A)a|b)/I
Capturing subpattern count = 1
+Max back reference = 1
Named capturing subpatterns:
A 1
Options: anchored
@@ -6353,6 +6363,7 @@
/(?:(?(ZZ)a|b)(?P<ZZ>X))+/I
Capturing subpattern count = 1
+Max back reference = 1
Named capturing subpatterns:
ZZ 1
No options
@@ -6370,6 +6381,7 @@
/(?:(?(ZZ)a|b)(?(ZZ)a|b)(?P<ZZ>X))+/I
Capturing subpattern count = 1
+Max back reference = 1
Named capturing subpatterns:
ZZ 1
No options
@@ -6381,6 +6393,7 @@
/(?:(?(ZZ)a|\(b\))\\(?P<ZZ>X))+/I
Capturing subpattern count = 1
+Max back reference = 1
Named capturing subpatterns:
ZZ 1
No options
@@ -10226,6 +10239,7 @@
(?(1)|.) # check that there was an empty component
/xiIS
Capturing subpattern count = 1
+Max back reference = 1
Options: anchored caseless extended
No first char
Need char = ':'
@@ -10255,6 +10269,7 @@
b(?<quote> (?<apostrophe>')|(?<realquote>")) )
(?('quote')[a-z]+|[0-9]+)/JIx
Capturing subpattern count = 6
+Max back reference = 1
Named capturing subpatterns:
apostrophe 2
apostrophe 5
@@ -10317,6 +10332,7 @@
End
------------------------------------------------------------------
Capturing subpattern count = 4
+Max back reference = 4
Named capturing subpatterns:
D 4
D 1
@@ -10364,6 +10380,7 @@
End
------------------------------------------------------------------
Capturing subpattern count = 4
+Max back reference = 1
Named capturing subpatterns:
A 1
A 4
@@ -10486,6 +10503,7 @@
/()i(?(1)a)/SI
Capturing subpattern count = 1
+Max back reference = 1
No options
No first char
Need char = 'i'
@@ -14216,4 +14234,43 @@
a
No match
+/^(?:(a)|b)(?(1)A|B)/I
+Capturing subpattern count = 1
+Max back reference = 1
+Options: anchored
+No first char
+No need char
+ aA123\O3
+Matched, but too many substrings
+ 0: aA
+ aA123\O6
+ 0: aA
+ 1: a
+
+'^(?:(?<AA>a)|b)(?(<AA>)A|B)'
+ aA123\O3
+Matched, but too many substrings
+ 0: aA
+ aA123\O6
+ 0: aA
+ 1: a
+
+'^(?<AA>)(?:(?<AA>a)|b)(?(<AA>)A|B)'J
+ aA123\O3
+Matched, but too many substrings
+ 0: aA
+ aA123\O6
+Matched, but too many substrings
+ 0: aA
+ 1:
+
+'^(?:(?<AA>X)|)(?:(?<AA>a)|b)\k{AA}'J
+ aa123\O3
+Matched, but too many substrings
+ 0: aa
+ aa123\O6
+Matched, but too many substrings
+ 0: aa
+ 1: <unset>
+
/-- End of testinput2 --/