Revision: 1383
http://vcs.pcre.org/viewvc?view=rev&revision=1383
Author: zherczeg
Date: 2013-10-18 18:50:06 +0100 (Fri, 18 Oct 2013)
Log Message:
-----------
Allow auto-possessify to check more complex bracketed expressions.
Modified Paths:
--------------
code/trunk/pcre_compile.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2013-10-18 07:55:07 UTC (rev 1382)
+++ code/trunk/pcre_compile.c 2013-10-18 17:50:06 UTC (rev 1383)
@@ -3051,6 +3051,9 @@
for(;;)
{
+ /* All operations move the code pointer forward.
+ Therefore infinite recursions are not possible. */
+
c = *code;
/* Skip over callouts */
@@ -3104,14 +3107,6 @@
case OP_ONCE_NC:
case OP_BRA:
case OP_CBRA:
- next_code = code;
- do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
-
- /* We do not support repeated brackets, because they can lead to
- infinite recursion. */
-
- if (*next_code != OP_KET) return FALSE;
-
next_code = code + GET(code, 1);
code += PRIV(OP_lengths)[c];
@@ -3127,19 +3122,16 @@
case OP_BRAMINZERO:
next_code = code + 1;
- if (*next_code != OP_BRA && *next_code != OP_CBRA)
- return FALSE;
+ if (*next_code != OP_BRA && *next_code != OP_CBRA
+ && *next_code != OP_ONCE && *next_code != OP_ONCE_NC) return FALSE;
do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
- /* We do not support repeated brackets, because they can lead to
- infinite recursion. */
- if (*next_code != OP_KET) return FALSE;
-
/* The bracket content will be checked by the
OP_BRA/OP_CBRA case above. */
next_code += 1 + LINK_SIZE;
- if (!compare_opcodes(next_code, utf, cd, base_list, base_end)) return FALSE;
+ if (!compare_opcodes(next_code, utf, cd, base_list, base_end))
+ return FALSE;
code += PRIV(OP_lengths)[c];
continue;
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2013-10-18 07:55:07 UTC (rev 1382)
+++ code/trunk/testdata/testinput2 2013-10-18 17:50:06 UTC (rev 1383)
@@ -3940,6 +3940,22 @@
/[a-z]*\s#[ \t]?\S#[a-c]*\S#[C-G]+?\d#[4-8]*\D#[4-9,]*\D#[!$]{0,5}\w#[M-Xf-l]+\W#[a-c,]?\W/BZ
+/a+(aa|bb)*c#a*(bb|cc)*a#a?(bb|cc)*d#[a-f]*(g|hh)*f/BZ
+
+/[a-f]*(g|hh|i)*i#[a-x]{4,}(y{0,6})*y#[a-k]+(ll|mm)+n/BZ
+
+/[a-f]*(?>gg|hh)+#[a-f]*(?>gg|hh)?#[a-f]*(?>gg|hh)*a#[a-f]*(?>gg|hh)*h/BZ
+
+/[a-c]*d/DZS
+
+/[a-c]+d/DZS
+
+/[a-c]?d/DZS
+
+/[a-c]{4,6}d/DZS
+
+/[a-c]{0,6}d/DZS
+
/-- End of special auto-possessive tests --/
/^A\o{1239}B/
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2013-10-18 07:55:07 UTC (rev 1382)
+++ code/trunk/testdata/testoutput2 2013-10-18 17:50:06 UTC (rev 1383)
@@ -13558,7 +13558,7 @@
Alt
cc
Ket
- a+
+ a++
Bra
bb
Alt
@@ -13607,7 +13607,7 @@
Ket
Ket
#
- a+
+ a++
Brazero
Bra
bb
@@ -13829,6 +13829,188 @@
End
------------------------------------------------------------------
+/a+(aa|bb)*c#a*(bb|cc)*a#a?(bb|cc)*d#[a-f]*(g|hh)*f/BZ
+------------------------------------------------------------------
+ Bra
+ a+
+ Brazero
+ CBra 1
+ aa
+ Alt
+ bb
+ KetRmax
+ c#
+ a*
+ Brazero
+ CBra 2
+ bb
+ Alt
+ cc
+ KetRmax
+ a#
+ a?+
+ Brazero
+ CBra 3
+ bb
+ Alt
+ cc
+ KetRmax
+ d#
+ [a-f]*
+ Brazero
+ CBra 4
+ g
+ Alt
+ hh
+ KetRmax
+ f
+ Ket
+ End
+------------------------------------------------------------------
+
+/[a-f]*(g|hh|i)*i#[a-x]{4,}(y{0,6})*y#[a-k]+(ll|mm)+n/BZ
+------------------------------------------------------------------
+ Bra
+ [a-f]*+
+ Brazero
+ CBra 1
+ g
+ Alt
+ hh
+ Alt
+ i
+ KetRmax
+ i#
+ [a-x]{4,}
+ Brazero
+ SCBra 2
+ y{0,6}
+ KetRmax
+ y#
+ [a-k]++
+ CBra 3
+ ll
+ Alt
+ mm
+ KetRmax
+ n
+ Ket
+ End
+------------------------------------------------------------------
+
+/[a-f]*(?>gg|hh)+#[a-f]*(?>gg|hh)?#[a-f]*(?>gg|hh)*a#[a-f]*(?>gg|hh)*h/BZ
+------------------------------------------------------------------
+ Bra
+ [a-f]*+
+ Once_NC
+ gg
+ Alt
+ hh
+ KetRmax
+ #
+ [a-f]*+
+ Brazero
+ Once_NC
+ gg
+ Alt
+ hh
+ Ket
+ #
+ [a-f]*
+ Brazero
+ Once_NC
+ gg
+ Alt
+ hh
+ KetRmax
+ a#
+ [a-f]*+
+ Brazero
+ Once_NC
+ gg
+ Alt
+ hh
+ KetRmax
+ h
+ Ket
+ End
+------------------------------------------------------------------
+
+/[a-c]*d/DZS
+------------------------------------------------------------------
+ Bra
+ [a-c]*+
+ d
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+No options
+No first char
+Need char = 'd'
+Subject length lower bound = 1
+Starting byte set: a b c d
+
+/[a-c]+d/DZS
+------------------------------------------------------------------
+ Bra
+ [a-c]++
+ d
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+No options
+No first char
+Need char = 'd'
+Subject length lower bound = 2
+Starting byte set: a b c
+
+/[a-c]?d/DZS
+------------------------------------------------------------------
+ Bra
+ [a-c]?+
+ d
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+No options
+No first char
+Need char = 'd'
+Subject length lower bound = 1
+Starting byte set: a b c d
+
+/[a-c]{4,6}d/DZS
+------------------------------------------------------------------
+ Bra
+ [a-c]{4,6}+
+ d
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+No options
+No first char
+Need char = 'd'
+Subject length lower bound = 5
+Starting byte set: a b c
+
+/[a-c]{0,6}d/DZS
+------------------------------------------------------------------
+ Bra
+ [a-c]{0,6}+
+ d
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+No options
+No first char
+Need char = 'd'
+Subject length lower bound = 1
+Starting byte set: a b c d
+
/-- End of special auto-possessive tests --/
/^A\o{1239}B/