Revision: 1480
http://vcs.pcre.org/viewvc?view=rev&revision=1480
Author: ph10
Date: 2014-05-27 19:02:51 +0100 (Tue, 27 May 2014)
Log Message:
-----------
Fix auto-possessification bug.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_compile.c
code/trunk/testdata/testinput1
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput1
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2014-05-27 16:30:07 UTC (rev 1479)
+++ code/trunk/ChangeLog 2014-05-27 18:02:51 UTC (rev 1480)
@@ -40,6 +40,9 @@
level, when possessive repeated groups should always return to a higher
level as they have no backtrack points in them. The empty string test now
occurs at the outer level.
+
+8. Fixed a bug that was incorrectly auto-possessifying \w+ in the pattern
+ ^\w+(?>\s*)(?<=\w) which caused it not to match "test test".
Version 8.35 04-April-2014
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2014-05-27 16:30:07 UTC (rev 1479)
+++ code/trunk/pcre_compile.c 2014-05-27 18:02:51 UTC (rev 1480)
@@ -3076,6 +3076,7 @@
const pcre_uint8 *set1, *set2, *set_end;
pcre_uint32 chr;
BOOL accepted, invert_bits;
+BOOL entered_a_group = FALSE;
/* Note: the base_list[1] contains whether the current opcode has greedy
(represented by a non-zero value) quantifier. This is a different from
@@ -3088,7 +3089,7 @@
Therefore infinite recursions are not possible. */
c = *code;
-
+
/* Skip over callouts */
if (c == OP_CALLOUT)
@@ -3117,7 +3118,7 @@
/* If the bracket is capturing, and referenced by an OP_RECURSE, or
it is an atomic sub-pattern (assert, once, etc.) the non-greedy case
cannot be converted to a possessive form. */
-
+
if (base_list[1] == 0) return FALSE;
switch(*(code - GET(code, 1)))
@@ -3129,8 +3130,10 @@
case OP_ONCE:
case OP_ONCE_NC:
/* Atomic sub-patterns and assertions can always auto-possessify their
- last iterator. */
- return TRUE;
+ last iterator. However, if the group was entered as a result of checking
+ a previous iterator, this is not possible. */
+
+ return !entered_a_group;
}
code += PRIV(OP_lengths)[c];
@@ -3149,6 +3152,8 @@
code = next_code + 1 + LINK_SIZE;
next_code += GET(next_code, 1);
}
+
+ entered_a_group = TRUE;
continue;
case OP_BRAZERO:
@@ -3168,13 +3173,16 @@
code += PRIV(OP_lengths)[c];
continue;
+
+ default:
+ break;
}
/* Check for a supported opcode, and load its properties. */
code = get_chr_property_list(code, utf, cd->fcc, list);
if (code == NULL) return FALSE; /* Unsupported */
-
+
/* If either opcode is a small character list, set pointers for comparing
characters from that list with another list, or with a property. */
@@ -3406,9 +3414,8 @@
rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
- if (!accepted)
- return FALSE;
-
+ if (!accepted) return FALSE;
+
if (list[1] == 0) return TRUE;
/* Might be an empty repeat. */
continue;
Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1 2014-05-27 16:30:07 UTC (rev 1479)
+++ code/trunk/testdata/testinput1 2014-05-27 18:02:51 UTC (rev 1480)
@@ -5681,4 +5681,7 @@
'\A([^\"1]++|[\"2]([^\"3]*+|[\"4][\"5])*+[\"6])++'
NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
+/^\w+(?>\s*)(?<=\w)/
+ test test
+
/-- End of testinput1 --/
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2014-05-27 16:30:07 UTC (rev 1479)
+++ code/trunk/testdata/testinput2 2014-05-27 18:02:51 UTC (rev 1480)
@@ -4062,4 +4062,6 @@
/(((((a)))))/Q
+/^\w+(?>\s*)(?<=\w)/BZ
+
/-- End of testinput2 --/
Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1 2014-05-27 16:30:07 UTC (rev 1479)
+++ code/trunk/testdata/testoutput1 2014-05-27 18:02:51 UTC (rev 1480)
@@ -9335,4 +9335,8 @@
1: AFTER
2:
+/^\w+(?>\s*)(?<=\w)/
+ test test
+ 0: tes
+
/-- End of testinput1 --/
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2014-05-27 16:30:07 UTC (rev 1479)
+++ code/trunk/testdata/testoutput2 2014-05-27 18:02:51 UTC (rev 1480)
@@ -14149,4 +14149,20 @@
/(((((a)))))/Q
** Missing 0 or 1 after /Q
+/^\w+(?>\s*)(?<=\w)/BZ
+------------------------------------------------------------------
+ Bra
+ ^
+ \w+
+ Once_NC
+ \s*+
+ Ket
+ AssertB
+ Reverse
+ \w
+ Ket
+ Ket
+ End
+------------------------------------------------------------------
+
/-- End of testinput2 --/