Revision: 103
http://www.exim.org/viewvc/pcre2?view=rev&revision=103
Author: ph10
Date: 2014-10-10 17:42:03 +0100 (Fri, 10 Oct 2014)
Log Message:
-----------
Fix match_unset_backref bug.
Modified Paths:
--------------
code/trunk/src/pcre2_match.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/src/pcre2_match.c
===================================================================
--- code/trunk/src/pcre2_match.c 2014-10-10 16:16:44 UTC (rev 102)
+++ code/trunk/src/pcre2_match.c 2014-10-10 16:42:03 UTC (rev 103)
@@ -2763,19 +2763,20 @@
continue; /* With the main loop */
}
- /* Handle repeated back references. If a set group has length zero, just
- continue with the main loop, because it matches however many times. For an
- unset reference, in non-match-unset-backref mode, if the minimum is
- zero, we can continue at the same level without recursion. For any other
- minimum, carrying on will result in NOMATCH. */
+ /* Handle repeated back references. If a set group has length zero, just
+ continue with the main loop, because it matches however many times. For an
+ unset reference, if the minimum is zero, we can also just continue. We an
+ also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
+ group be have as a zero-length group. For any other unset cases, carrying
+ on will result in NOMATCH. */
if (offset < offset_top && mb->ovector[offset] != PCRE2_UNSET)
{
if (mb->ovector[offset] == mb->ovector[offset + 1]) continue;
}
- else
+ else /* Group is not set */
{
- if (min == 0 && (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) == 0)
+ if (min == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
continue;
}
@@ -2856,7 +2857,7 @@
eptr += slength;
}
- /* If the length matched for each repetiaion is the same as the length of
+ /* If the length matched for each repetition is the same as the length of
the captured group, we can easily work backwards. This is the normal
case. However, in caseless UTF-8 mode there are pairs of case-equivalent
characters whose lengths (in terms of code units) differ. However, this
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2014-10-10 16:16:44 UTC (rev 102)
+++ code/trunk/testdata/testinput2 2014-10-10 16:42:03 UTC (rev 103)
@@ -4003,4 +4003,7 @@
abd
xyd
+/\k<A>*(?<A>aa)(?<A>bb)/match_unset_backref,dupnames
+ aabb
+
# End of testinput2
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2014-10-10 16:16:44 UTC (rev 102)
+++ code/trunk/testdata/testoutput2 2014-10-10 16:42:03 UTC (rev 103)
@@ -13576,4 +13576,10 @@
xyd
0: d
+/\k<A>*(?<A>aa)(?<A>bb)/match_unset_backref,dupnames
+ aabb
+ 0: aabb
+ 1: aa
+ 2: bb
+
# End of testinput2