Revision: 600
http://vcs.pcre.org/viewvc?view=rev&revision=600
Author: ph10
Date: 2011-05-09 09:54:11 +0100 (Mon, 09 May 2011)
Log Message:
-----------
Fix backup bug for \R with greedy quantifier.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_exec.c
code/trunk/testdata/testinput2
code/trunk/testdata/testinput5
code/trunk/testdata/testoutput2
code/trunk/testdata/testoutput5
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2011-05-07 16:09:06 UTC (rev 599)
+++ code/trunk/ChangeLog 2011-05-09 08:54:11 UTC (rev 600)
@@ -36,6 +36,13 @@
pcre_exec() or pcre_dfa_exec() fails; if the error is a UTF-8 check
failure, the offset and reason code are output.
+8. When \R was used with a maximizing quantifier it failed to skip backwards
+ over a \r\n pair if the subsequent match failed. Instead, it just skipped
+ back over a single character (\n). This seems wrong (because it treated the
+ two characters as a single entity when going forwards), conflicts with the
+ documentation that \R is equivalent to (?>\r\n|\n|...etc), and makes the
+ behaviour of \R* different to (\R)*, which also seems wrong. The behaviour
+ has been changed.
Version 8.12 15-Jan-2011
Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c 2011-05-07 16:09:06 UTC (rev 599)
+++ code/trunk/pcre_exec.c 2011-05-09 08:54:11 UTC (rev 600)
@@ -2017,6 +2017,7 @@
switch(c)
{
default: MRRETURN(MATCH_NOMATCH);
+
case 0x000d:
if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
break;
@@ -3791,6 +3792,7 @@
switch(c)
{
default: MRRETURN(MATCH_NOMATCH);
+
case 0x000d:
if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
break;
@@ -4067,9 +4069,11 @@
switch(*eptr++)
{
default: MRRETURN(MATCH_NOMATCH);
+
case 0x000d:
if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
break;
+
case 0x000a:
break;
@@ -5258,7 +5262,11 @@
RRETURN(PCRE_ERROR_INTERNAL);
}
- /* eptr is now past the end of the maximum run */
+ /* eptr is now past the end of the maximum run. If possessive, we are
+ done (no backing up). Otherwise, match at this position; anything other
+ than no match is immediately returned. For nomatch, back up one
+ character, unless we are matching \R and the last thing matched was
+ \r\n, in which case, back up two bytes. */
if (possessive) continue;
for(;;)
@@ -5267,6 +5275,8 @@
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (eptr-- == pp) break; /* Stop if tried at original pos */
BACKCHAR(eptr);
+ if (ctype == OP_ANYNL && eptr > pp && *eptr == '\n' &&
+ eptr[-1] == '\r') eptr--;
}
}
else
@@ -5465,14 +5475,20 @@
RRETURN(PCRE_ERROR_INTERNAL);
}
- /* eptr is now past the end of the maximum run */
+ /* eptr is now past the end of the maximum run. If possessive, we are
+ done (no backing up). Otherwise, match at this position; anything other
+ than no match is immediately returned. For nomatch, back up one
+ character (byte), unless we are matching \R and the last thing matched
+ was \r\n, in which case, back up two bytes. */
if (possessive) continue;
while (eptr >= pp)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--;
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (ctype == OP_ANYNL && eptr > pp && *eptr == '\n' &&
+ eptr[-1] == '\r') eptr--;
}
}
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2011-05-07 16:09:06 UTC (rev 599)
+++ code/trunk/testdata/testinput2 2011-05-09 08:54:11 UTC (rev 600)
@@ -3570,4 +3570,23 @@
/(?P<abn>(?P=axn)xxx)(?<axn>yy)/BZ
+/-- These tests are here because Perl gets the first one wrong. --/
+
+/(\R*)(.)/s
+ \r\n
+ \r\r\n\n\r
+ \r\r\n\n\r\n
+
+/(\R)*(.)/s
+ \r\n
+ \r\r\n\n\r
+ \r\r\n\n\r\n
+
+/((?>\r\n|\n|\x0b|\f|\r|\x85)*)(.)/s
+ \r\n
+ \r\r\n\n\r
+ \r\r\n\n\r\n
+
+/-- --/
+
/-- End of testinput2 --/
Modified: code/trunk/testdata/testinput5
===================================================================
--- code/trunk/testdata/testinput5 2011-05-07 16:09:06 UTC (rev 599)
+++ code/trunk/testdata/testinput5 2011-05-09 08:54:11 UTC (rev 600)
@@ -844,4 +844,17 @@
/^\cģ/8
+/(\R*)(.)/s8
+ \r\n
+ \r\r\n\n\r
+ \r\r\n\n\r\n
+
+/(\R)*(.)/s8
+ \r\n
+ \r\r\n\n\r
+ \r\r\n\n\r\n
+
+/(\X*)(.)/s8
+ A\x{300}
+
/-- End of testinput5 --/
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2011-05-07 16:09:06 UTC (rev 599)
+++ code/trunk/testdata/testoutput2 2011-05-09 08:54:11 UTC (rev 600)
@@ -11305,4 +11305,50 @@
End
------------------------------------------------------------------
+/-- These tests are here because Perl gets the first one wrong. --/
+
+/(\R*)(.)/s
+ \r\n
+ 0: \x0d
+ 1:
+ 2: \x0d
+ \r\r\n\n\r
+ 0: \x0d\x0d\x0a\x0a\x0d
+ 1: \x0d\x0d\x0a\x0a
+ 2: \x0d
+ \r\r\n\n\r\n
+ 0: \x0d\x0d\x0a\x0a\x0d
+ 1: \x0d\x0d\x0a\x0a
+ 2: \x0d
+
+/(\R)*(.)/s
+ \r\n
+ 0: \x0d
+ 1: <unset>
+ 2: \x0d
+ \r\r\n\n\r
+ 0: \x0d\x0d\x0a\x0a\x0d
+ 1: \x0a
+ 2: \x0d
+ \r\r\n\n\r\n
+ 0: \x0d\x0d\x0a\x0a\x0d
+ 1: \x0a
+ 2: \x0d
+
+/((?>\r\n|\n|\x0b|\f|\r|\x85)*)(.)/s
+ \r\n
+ 0: \x0d
+ 1:
+ 2: \x0d
+ \r\r\n\n\r
+ 0: \x0d\x0d\x0a\x0a\x0d
+ 1: \x0d\x0d\x0a\x0a
+ 2: \x0d
+ \r\r\n\n\r\n
+ 0: \x0d\x0d\x0a\x0a\x0d
+ 1: \x0d\x0d\x0a\x0a
+ 2: \x0d
+
+/-- --/
+
/-- End of testinput2 --/
Modified: code/trunk/testdata/testoutput5
===================================================================
--- code/trunk/testdata/testoutput5 2011-05-07 16:09:06 UTC (rev 599)
+++ code/trunk/testdata/testoutput5 2011-05-09 08:54:11 UTC (rev 600)
@@ -2343,4 +2343,38 @@
/^\cģ/8
Failed: \c must be followed by an ASCII character at offset 3
+/(\R*)(.)/s8
+ \r\n
+ 0: \x{0d}
+ 1:
+ 2: \x{0d}
+ \r\r\n\n\r
+ 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d}
+ 1: \x{0d}\x{0d}\x{0a}\x{0a}
+ 2: \x{0d}
+ \r\r\n\n\r\n
+ 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d}
+ 1: \x{0d}\x{0d}\x{0a}\x{0a}
+ 2: \x{0d}
+
+/(\R)*(.)/s8
+ \r\n
+ 0: \x{0d}
+ 1: <unset>
+ 2: \x{0d}
+ \r\r\n\n\r
+ 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d}
+ 1: \x{0a}
+ 2: \x{0d}
+ \r\r\n\n\r\n
+ 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d}
+ 1: \x{0a}
+ 2: \x{0d}
+
+/(\X*)(.)/s8
+ A\x{300}
+ 0: A
+ 1:
+ 2: A
+
/-- End of testinput5 --/