Revision: 889
http://www.exim.org/viewvc/pcre2?view=rev&revision=889
Author: ph10
Date: 2017-12-12 16:23:01 +0000 (Tue, 12 Dec 2017)
Log Message:
-----------
Fix infelicity in not finding a first character inside a non-assertive group
within a positive assertion.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_compile.c
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2017-12-12 15:01:51 UTC (rev 888)
+++ code/trunk/ChangeLog 2017-12-12 16:23:01 UTC (rev 889)
@@ -70,7 +70,15 @@
recorded. For example, for the pattern /(?=(a))\1?b/, "b" was incorrectly set
as the first character of a match.
+18. Characters in a leading positive assertion are considered for recording a
+first character of a match when the rest of the pattern does not provide one.
+However, a character in a non-assertive group within a leading assertion such
+as in the pattern /(?=(a))\1?b/ caused this process to fail. This was an
+infelicity rather than an outright bug, because it did not affect the result of
+a match, just its speed. (In fact, in this case, the starting 'a' was
+subsequently picked up in the study.)
+
Version 10.30 14-August-2017
----------------------------
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2017-12-12 15:01:51 UTC (rev 888)
+++ code/trunk/src/pcre2_compile.c 2017-12-12 16:23:01 UTC (rev 889)
@@ -8106,13 +8106,13 @@
Arguments:
code points to start of compiled pattern
flags points to the first code unit flags
- inassert TRUE if in an assertion
+ inassert non-zero if in an assertion
Returns: the fixed first code unit, or 0 with REQ_NONE in flags
*/
static uint32_t
-find_firstassertedcu(PCRE2_SPTR code, int32_t *flags, BOOL inassert)
+find_firstassertedcu(PCRE2_SPTR code, int32_t *flags, uint32_t inassert)
{
uint32_t c = 0;
int cflags = REQ_NONE;
@@ -8139,7 +8139,7 @@
case OP_SCBRAPOS:
case OP_ASSERT:
case OP_ONCE:
- d = find_firstassertedcu(scode, &dflags, op == OP_ASSERT);
+ d = find_firstassertedcu(scode, &dflags, inassert + ((op==OP_ASSERT)?1:0));
if (dflags < 0)
return 0;
if (cflags < 0) { c = d; cflags = dflags; }
@@ -8154,7 +8154,7 @@
case OP_PLUS:
case OP_MINPLUS:
case OP_POSPLUS:
- if (!inassert) return 0;
+ if (inassert == 0) return 0;
if (cflags < 0) { c = scode[1]; cflags = 0; }
else if (c != scode[1]) return 0;
break;
@@ -8167,7 +8167,7 @@
case OP_PLUSI:
case OP_MINPLUSI:
case OP_POSPLUSI:
- if (!inassert) return 0;
+ if (inassert == 0) return 0;
if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; }
else if (c != scode[1]) return 0;
break;
@@ -9674,7 +9674,7 @@
actual literals that follow). */
if (firstcuflags < 0)
- firstcu = find_firstassertedcu(codestart, &firstcuflags, FALSE);
+ firstcu = find_firstassertedcu(codestart, &firstcuflags, 0);
/* Save the data for a first code unit. */
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2017-12-12 15:01:51 UTC (rev 888)
+++ code/trunk/testdata/testoutput2 2017-12-12 16:23:01 UTC (rev 889)
@@ -16358,7 +16358,7 @@
"(?=(a))\1?b"I
Capturing subpattern count = 1
Max back reference = 1
-Starting code units: a
+First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 1
ab