Revision: 243
http://www.exim.org/viewvc/pcre2?view=rev&revision=243
Author: ph10
Date: 2015-04-06 13:16:36 +0100 (Mon, 06 Apr 2015)
Log Message:
-----------
Fix handling of global matching in pcre2test when a lookbehind assertion
contains \K.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2test.c
code/trunk/testdata/testinput2
code/trunk/testdata/testinput5
code/trunk/testdata/testoutput2
code/trunk/testdata/testoutput5
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-04-03 11:14:19 UTC (rev 242)
+++ code/trunk/ChangeLog 2015-04-06 12:16:36 UTC (rev 243)
@@ -70,7 +70,10 @@
17. The use of \K in a positive lookbehind assertion in a non-anchored pattern
(e.g. /(?<=\Ka)/) could make pcre2grep loop.
+18. There was a similar problem to 17 in pcre2test for global matches, though
+the code there did catch the loop.
+
Version 10.10 06-March-2015
---------------------------
Modified: code/trunk/src/pcre2test.c
===================================================================
--- code/trunk/src/pcre2test.c 2015-04-03 11:14:19 UTC (rev 242)
+++ code/trunk/src/pcre2test.c 2015-04-06 12:16:36 UTC (rev 243)
@@ -3557,14 +3557,14 @@
callout argument string point to strings of the appropriate width. Casts can be
used to deal with this.
-Argument:
+Argument:
cb pointer to enumerate block
callout_data user data
-Returns: 0
+Returns: 0
*/
-static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
+static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
void *callout_data)
{
uint32_t i;
@@ -3587,13 +3587,13 @@
}
fprintf(outfile, "%c ", delimiter);
}
-else fprintf(outfile, "%d ", cb->callout_number);
+else fprintf(outfile, "%d ", cb->callout_number);
fprintf(outfile, "%.*s\n",
(int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
pbuffer8 + cb->pattern_position);
-
-return 0;
+
+return 0;
}
@@ -3879,10 +3879,10 @@
int len;
fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
if (errorcode < 0)
- {
+ {
PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
- }
+ }
fprintf(outfile, "\n");
return PR_SKIP;
}
@@ -5684,20 +5684,20 @@
ovector = FLD(match_data, ovector);
- /* After the first time round a global loop, save the current ovector[0,1] so
- that we can check that they do change each time. Otherwise a matching bug
- that returns the same string causes an infinite loop. It has happened! */
+ /* After the first time round a global loop, for a normal global (/g)
+ iteration, save the current ovector[0,1] so that we can check that they do
+ change each time. Otherwise a matching bug that returns the same string
+ causes an infinite loop. It has happened! */
- if (gmatched > 0)
+ if (gmatched > 0 && (dat_datctl.control & CTL_GLOBAL) != 0)
{
ovecsave[0] = ovector[0];
ovecsave[1] = ovector[1];
}
- /* Set the variables on the first iteration, just to stop a compiler warning
- when ovecsave[] is referenced below. */
+ /* For altglobal (or first time round the loop), set an "unset" value. */
- else ovecsave[0] = ovecsave[1] = 0;
+ else ovecsave[0] = ovecsave[1] = PCRE2_UNSET;
/* Fill the ovector with junk to detect elements that do not get set
when they should be. */
@@ -6169,14 +6169,49 @@
if (end_offset == ulen) break; /* End of subject */
g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
}
- else g_notempty = 0;
- /* For /g, update the start offset, leaving the rest alone */
+ /* However, even after matching a non-empty string, there is still one
+ tricky case. If a pattern contains \K within a lookbehind assertion at the
+ start, the end of the matched string can be at the offset where the match
+ started. In the case of a normal /g iteration without special action, this
+ leads to a loop that keeps on returning the same substring. The loop would
+ be caught above, but we really want to move on to the next match. */
- if ((dat_datctl.control & CTL_GLOBAL) != 0) dat_datctl.offset = end_offset;
+ else
+ {
+ g_notempty = 0; /* Set for a "normal" repeat */
+ if ((dat_datctl.control & CTL_GLOBAL) != 0)
+ {
+ PCRE2_SIZE startchar;
+ PCRE2_GET_STARTCHAR(startchar, match_data);
+ if (end_offset <= startchar)
+ {
+ if (startchar >= ulen) break; /* End of subject */
+ end_offset = startchar + 1;
+ if (utf && test_mode != PCRE32_MODE)
+ {
+ if (test_mode == PCRE8_MODE)
+ {
+ for (; end_offset < ulen; end_offset++)
+ if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
+ }
+ else /* 16-bit mode */
+ {
+ for (; end_offset < ulen; end_offset++)
+ if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
+ }
+ }
+ }
+ }
+ }
- /* For /G, update the pointer and length */
+ /* For /g (global), update the start offset, leaving the rest alone. */
+ if ((dat_datctl.control & CTL_GLOBAL) != 0)
+ dat_datctl.offset = end_offset;
+
+ /* For altglobal, just update the pointer and length. */
+
else
{
pp += end_offset * code_unit_size;
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2015-04-03 11:14:19 UTC (rev 242)
+++ code/trunk/testdata/testinput2 2015-04-06 12:16:36 UTC (rev 243)
@@ -4255,4 +4255,12 @@
";(?<=()((?3))((?2)))"
+# Perl loops on this (PCRE2 used to!)
+
+/(?<=\Ka)/g,aftertext
+ aaaaa
+
+/(?<=\Ka)/altglobal,aftertext
+ aaaaa
+
# End of testinput2
Modified: code/trunk/testdata/testinput5
===================================================================
--- code/trunk/testdata/testinput5 2015-04-03 11:14:19 UTC (rev 242)
+++ code/trunk/testdata/testinput5 2015-04-06 12:16:36 UTC (rev 243)
@@ -1643,4 +1643,10 @@
/[A-`]/iB,utf
abcdefghijklmno
+/(?<=\K\x{17f})/g,utf,aftertext
+ \x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
+
+/(?<=\K\x{17f})/altglobal,utf,aftertext
+ \x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
+
# End of testinput5
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2015-04-03 11:14:19 UTC (rev 242)
+++ code/trunk/testdata/testoutput2 2015-04-06 12:16:36 UTC (rev 243)
@@ -14260,4 +14260,32 @@
";(?<=()((?3))((?2)))"
Failed: error 125 at offset 20: lookbehind assertion is not fixed length
+# Perl loops on this (PCRE2 used to!)
+
+/(?<=\Ka)/g,aftertext
+ aaaaa
+ 0: a
+ 0+ aaaa
+ 0: a
+ 0+ aaa
+ 0: a
+ 0+ aa
+ 0: a
+ 0+ a
+ 0: a
+ 0+
+
+/(?<=\Ka)/altglobal,aftertext
+ aaaaa
+ 0: a
+ 0+ aaaa
+ 0: a
+ 0+ aaa
+ 0: a
+ 0+ aa
+ 0: a
+ 0+ a
+ 0: a
+ 0+
+
# End of testinput2
Modified: code/trunk/testdata/testoutput5
===================================================================
--- code/trunk/testdata/testoutput5 2015-04-03 11:14:19 UTC (rev 242)
+++ code/trunk/testdata/testoutput5 2015-04-06 12:16:36 UTC (rev 243)
@@ -4021,4 +4021,30 @@
abcdefghijklmno
0: a
+/(?<=\K\x{17f})/g,utf,aftertext
+ \x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
+ 0: \x{17f}
+ 0+ \x{17f}\x{17f}\x{17f}\x{17f}
+ 0: \x{17f}
+ 0+ \x{17f}\x{17f}\x{17f}
+ 0: \x{17f}
+ 0+ \x{17f}\x{17f}
+ 0: \x{17f}
+ 0+ \x{17f}
+ 0: \x{17f}
+ 0+
+
+/(?<=\K\x{17f})/altglobal,utf,aftertext
+ \x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
+ 0: \x{17f}
+ 0+ \x{17f}\x{17f}\x{17f}\x{17f}
+ 0: \x{17f}
+ 0+ \x{17f}\x{17f}\x{17f}
+ 0: \x{17f}
+ 0+ \x{17f}\x{17f}
+ 0: \x{17f}
+ 0+ \x{17f}
+ 0: \x{17f}
+ 0+
+
# End of testinput5