Revision: 1381
http://vcs.pcre.org/viewvc?view=rev&revision=1381
Author: zherczeg
Date: 2013-10-16 07:23:00 +0100 (Wed, 16 Oct 2013)
Log Message:
-----------
Auto-possessifying now supports property comparison with zero repeat quantifiers.
Modified Paths:
--------------
code/trunk/pcre_compile.c
code/trunk/testdata/testinput2
code/trunk/testdata/testinput6
code/trunk/testdata/testoutput2
code/trunk/testdata/testoutput6
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2013-10-15 16:49:12 UTC (rev 1380)
+++ code/trunk/pcre_compile.c 2013-10-16 06:23:00 UTC (rev 1381)
@@ -3041,6 +3041,7 @@
const pcre_uchar *next_code;
const pcre_uint8 *class_bits;
pcre_uint32 chr;
+BOOL accepted;
/* Note: the base_list[1] contains whether the current opcode has greedy
(represented by a non-zero value) quantifier. This is a different from
@@ -3169,15 +3170,16 @@
{
pcre_uint32 leftop, rightop;
- if (list[1] != 0) return FALSE; /* Must match at least one character */
leftop = base_list[0];
rightop = list[0];
#ifdef SUPPORT_UCP
+ accepted = FALSE; /* Always set in non-unicode case. */
if (leftop == OP_PROP || leftop == OP_NOTPROP)
{
- if (rightop == OP_EOD) return TRUE;
- if (rightop == OP_PROP || rightop == OP_NOTPROP)
+ if (rightop == OP_EOD)
+ accepted = TRUE;
+ else if (rightop == OP_PROP || rightop == OP_NOTPROP)
{
int n;
const pcre_uint8 *p;
@@ -3198,16 +3200,18 @@
n = propposstab[base_list[2]][list[2]];
switch(n)
{
- case 0: return FALSE;
- case 1: return bothprop;
- case 2: return (base_list[3] == list[3]) != same;
- case 3: return !same;
+ case 0: break;
+ case 1: accepted = bothprop; break;
+ case 2: accepted = (base_list[3] == list[3]) != same; break;
+ case 3: accepted = !same; break;
case 4: /* Left general category, right particular category */
- return risprop && catposstab[base_list[3]][list[3]] == same;
+ accepted = risprop && catposstab[base_list[3]][list[3]] == same;
+ break;
case 5: /* Right general category, left particular category */
- return lisprop && catposstab[list[3]][base_list[3]] == same;
+ accepted = lisprop && catposstab[list[3]][base_list[3]] == same;
+ break;
/* This code is logically tricky. Think hard before fiddling with it.
The posspropstab table has four entries per row. Each row relates to
@@ -3232,48 +3236,58 @@
case 7: /* Left space vs right general category */
case 8: /* Left word vs right general category */
p = posspropstab[n-6];
- return risprop && lisprop ==
+ accepted = risprop && lisprop ==
(list[3] != p[0] &&
list[3] != p[1] &&
(list[3] != p[2] || !lisprop));
+ break;
case 9: /* Right alphanum vs left general category */
case 10: /* Right space vs left general category */
case 11: /* Right word vs left general category */
p = posspropstab[n-9];
- return lisprop && risprop ==
+ accepted = lisprop && risprop ==
(base_list[3] != p[0] &&
base_list[3] != p[1] &&
(base_list[3] != p[2] || !risprop));
+ break;
case 12: /* Left alphanum vs right particular category */
case 13: /* Left space vs right particular category */
case 14: /* Left word vs right particular category */
p = posspropstab[n-12];
- return risprop && lisprop ==
+ accepted = risprop && lisprop ==
(catposstab[p[0]][list[3]] &&
catposstab[p[1]][list[3]] &&
(list[3] != p[3] || !lisprop));
+ break;
case 15: /* Right alphanum vs left particular category */
case 16: /* Right space vs left particular category */
case 17: /* Right word vs left particular category */
p = posspropstab[n-15];
- return lisprop && risprop ==
+ accepted = lisprop && risprop ==
(catposstab[p[0]][base_list[3]] &&
catposstab[p[1]][base_list[3]] &&
(base_list[3] != p[3] || !risprop));
+ break;
}
}
- return FALSE;
}
else
#endif /* SUPPORT_UCP */
- return leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
+ accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
+
+ if (!accepted)
+ return FALSE;
+
+ if (list[1] == 0) return TRUE;
+ /* Might be an empty repeat. */
+ continue;
}
/* Control reaches here only if one of the items is a small character list.
@@ -3449,7 +3463,7 @@
static void
auto_possessify(pcre_uchar *code, BOOL utf, const compile_data *cd)
{
-register pcre_uchar c, d;
+register pcre_uchar c;
const pcre_uchar *end;
pcre_uchar *repeat_code;
pcre_uint32 list[8];
@@ -3513,18 +3527,17 @@
#endif
repeat_code = code + 1 + (32 / sizeof(pcre_uchar));
- d = *repeat_code;
- if (d >= OP_CRSTAR && d <= OP_CRMINRANGE)
+ c = *repeat_code;
+ if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
{
/* end must not be NULL. */
end = get_chr_property_list(code, utf, cd->fcc, list);
- list[1] = d == OP_CRSTAR || d == OP_CRPLUS || d == OP_CRQUERY ||
- d == OP_CRRANGE;
+ list[1] = (c & 1) == 0;
if (compare_opcodes(end, utf, cd, list, end))
{
- switch (d)
+ switch (c)
{
case OP_CRSTAR:
*repeat_code = OP_CRPOSSTAR;
@@ -3544,6 +3557,7 @@
}
}
}
+ c = *code;
}
switch(c)
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2013-10-15 16:49:12 UTC (rev 1380)
+++ code/trunk/testdata/testinput2 2013-10-16 06:23:00 UTC (rev 1381)
@@ -3909,31 +3909,33 @@
/[ab]*?/BZ
aaaa
-
+
/[ab]?/BZ
aaaa
-
+
/[ab]??/BZ
aaaa
-
+
/[ab]+/BZ
aaaa
-
+
/[ab]+?/BZ
aaaa
-
+
/[ab]{2,3}/BZ
aaaa
-
+
/[ab]{2,3}?/BZ
aaaa
-
+
/[ab]{2,}/BZ
- aaaa
+ aaaa
/[ab]{2,}?/BZ
- aaaa
+ aaaa
+/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/BZ
+
/-- End of special auto-possessive tests --/
/^A\o{1239}B/
Modified: code/trunk/testdata/testinput6
===================================================================
--- code/trunk/testdata/testinput6 2013-10-15 16:49:12 UTC (rev 1380)
+++ code/trunk/testdata/testinput6 2013-10-16 06:23:00 UTC (rev 1381)
@@ -1298,7 +1298,9 @@
/\x{1f80}+/8i
\x{1f88}\x{1f80}
+/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/8WBZ
+
/-- Perl 5.12.4 gets these wrong, but 5.15.3 is OK --/
/\x{004b}+/8i
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2013-10-15 16:49:12 UTC (rev 1380)
+++ code/trunk/testdata/testoutput2 2013-10-16 06:23:00 UTC (rev 1381)
@@ -13685,7 +13685,7 @@
------------------------------------------------------------------
aaaa
0:
-
+
/[ab]?/BZ
------------------------------------------------------------------
Bra
@@ -13695,7 +13695,7 @@
------------------------------------------------------------------
aaaa
0: a
-
+
/[ab]??/BZ
------------------------------------------------------------------
Bra
@@ -13705,7 +13705,7 @@
------------------------------------------------------------------
aaaa
0:
-
+
/[ab]+/BZ
------------------------------------------------------------------
Bra
@@ -13715,7 +13715,7 @@
------------------------------------------------------------------
aaaa
0: aaaa
-
+
/[ab]+?/BZ
------------------------------------------------------------------
Bra
@@ -13725,7 +13725,7 @@
------------------------------------------------------------------
aaaa
0: a
-
+
/[ab]{2,3}/BZ
------------------------------------------------------------------
Bra
@@ -13735,7 +13735,7 @@
------------------------------------------------------------------
aaaa
0: aaa
-
+
/[ab]{2,3}?/BZ
------------------------------------------------------------------
Bra
@@ -13745,7 +13745,7 @@
------------------------------------------------------------------
aaaa
0: aa
-
+
/[ab]{2,}/BZ
------------------------------------------------------------------
Bra
@@ -13753,7 +13753,7 @@
Ket
End
------------------------------------------------------------------
- aaaa
+ aaaa
0: aaaa
/[ab]{2,}?/BZ
@@ -13763,9 +13763,24 @@
Ket
End
------------------------------------------------------------------
- aaaa
+ aaaa
0: aa
+/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/BZ
+------------------------------------------------------------------
+ Bra
+ \d++
+ \s{0,5}+
+ =
+ \s*+
+ \S?
+ =
+ \w{0,4}+
+ \W*+
+ Ket
+ End
+------------------------------------------------------------------
+
/-- End of special auto-possessive tests --/
/^A\o{1239}B/
Modified: code/trunk/testdata/testoutput6
===================================================================
--- code/trunk/testdata/testoutput6 2013-10-15 16:49:12 UTC (rev 1380)
+++ code/trunk/testdata/testoutput6 2013-10-16 06:23:00 UTC (rev 1381)
@@ -2111,7 +2111,22 @@
\x{1f88}\x{1f80}
0: \x{1f88}\x{1f80}
+/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/8WBZ
+------------------------------------------------------------------
+ Bra
+ prop Nd ++
+ prop Xsp {0,5}+
+ =
+ prop Xsp *+
+ notprop Xsp ?
+ =
+ prop Xwd {0,4}+
+ notprop Xwd *+
+ Ket
+ End
+------------------------------------------------------------------
+
/-- Perl 5.12.4 gets these wrong, but 5.15.3 is OK --/
/\x{004b}+/8i