[Pcre-svn] [1383] code/trunk: Allow auto-possessify to check…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [1383] code/trunk: Allow auto-possessify to check more complex bracketed expressions.
Revision: 1383
          http://vcs.pcre.org/viewvc?view=rev&revision=1383
Author:   zherczeg
Date:     2013-10-18 18:50:06 +0100 (Fri, 18 Oct 2013)


Log Message:
-----------
Allow auto-possessify to check more complex bracketed expressions.

Modified Paths:
--------------
    code/trunk/pcre_compile.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput2


Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c    2013-10-18 07:55:07 UTC (rev 1382)
+++ code/trunk/pcre_compile.c    2013-10-18 17:50:06 UTC (rev 1383)
@@ -3051,6 +3051,9 @@


for(;;)
{
+ /* All operations move the code pointer forward.
+ Therefore infinite recursions are not possible. */
+
c = *code;

   /* Skip over callouts */
@@ -3104,14 +3107,6 @@
     case OP_ONCE_NC:
     case OP_BRA:
     case OP_CBRA:
-    next_code = code;
-    do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
-
-    /* We do not support repeated brackets, because they can lead to
-    infinite recursion. */
-
-    if (*next_code != OP_KET) return FALSE;
-
     next_code = code + GET(code, 1);
     code += PRIV(OP_lengths)[c];


@@ -3127,19 +3122,16 @@
     case OP_BRAMINZERO:


     next_code = code + 1;
-    if (*next_code != OP_BRA && *next_code != OP_CBRA)
-      return FALSE;
+    if (*next_code != OP_BRA && *next_code != OP_CBRA
+        && *next_code != OP_ONCE && *next_code != OP_ONCE_NC) return FALSE;


     do next_code += GET(next_code, 1); while (*next_code == OP_ALT);


-    /* We do not support repeated brackets, because they can lead to
-    infinite recursion. */
-    if (*next_code != OP_KET) return FALSE;
-
     /* The bracket content will be checked by the
     OP_BRA/OP_CBRA case above. */
     next_code += 1 + LINK_SIZE;
-    if (!compare_opcodes(next_code, utf, cd, base_list, base_end)) return FALSE;
+    if (!compare_opcodes(next_code, utf, cd, base_list, base_end))
+      return FALSE;


     code += PRIV(OP_lengths)[c];
     continue;


Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2013-10-18 07:55:07 UTC (rev 1382)
+++ code/trunk/testdata/testinput2    2013-10-18 17:50:06 UTC (rev 1383)
@@ -3940,6 +3940,22 @@


/[a-z]*\s#[ \t]?\S#[a-c]*\S#[C-G]+?\d#[4-8]*\D#[4-9,]*\D#[!$]{0,5}\w#[M-Xf-l]+\W#[a-c,]?\W/BZ

+/a+(aa|bb)*c#a*(bb|cc)*a#a?(bb|cc)*d#[a-f]*(g|hh)*f/BZ
+
+/[a-f]*(g|hh|i)*i#[a-x]{4,}(y{0,6})*y#[a-k]+(ll|mm)+n/BZ
+
+/[a-f]*(?>gg|hh)+#[a-f]*(?>gg|hh)?#[a-f]*(?>gg|hh)*a#[a-f]*(?>gg|hh)*h/BZ
+
+/[a-c]*d/DZS
+
+/[a-c]+d/DZS
+
+/[a-c]?d/DZS
+
+/[a-c]{4,6}d/DZS
+
+/[a-c]{0,6}d/DZS
+
/-- End of special auto-possessive tests --/

/^A\o{1239}B/

Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2013-10-18 07:55:07 UTC (rev 1382)
+++ code/trunk/testdata/testoutput2    2013-10-18 17:50:06 UTC (rev 1383)
@@ -13558,7 +13558,7 @@
         Alt
         cc
         Ket
-        a+
+        a++
         Bra
         bb
         Alt
@@ -13607,7 +13607,7 @@
         Ket
         Ket
         #
-        a+
+        a++
         Brazero
         Bra
         bb
@@ -13829,6 +13829,188 @@
         End
 ------------------------------------------------------------------


+/a+(aa|bb)*c#a*(bb|cc)*a#a?(bb|cc)*d#[a-f]*(g|hh)*f/BZ
+------------------------------------------------------------------
+        Bra
+        a+
+        Brazero
+        CBra 1
+        aa
+        Alt
+        bb
+        KetRmax
+        c#
+        a*
+        Brazero
+        CBra 2
+        bb
+        Alt
+        cc
+        KetRmax
+        a#
+        a?+
+        Brazero
+        CBra 3
+        bb
+        Alt
+        cc
+        KetRmax
+        d#
+        [a-f]*
+        Brazero
+        CBra 4
+        g
+        Alt
+        hh
+        KetRmax
+        f
+        Ket
+        End
+------------------------------------------------------------------
+
+/[a-f]*(g|hh|i)*i#[a-x]{4,}(y{0,6})*y#[a-k]+(ll|mm)+n/BZ
+------------------------------------------------------------------
+        Bra
+        [a-f]*+
+        Brazero
+        CBra 1
+        g
+        Alt
+        hh
+        Alt
+        i
+        KetRmax
+        i#
+        [a-x]{4,}
+        Brazero
+        SCBra 2
+        y{0,6}
+        KetRmax
+        y#
+        [a-k]++
+        CBra 3
+        ll
+        Alt
+        mm
+        KetRmax
+        n
+        Ket
+        End
+------------------------------------------------------------------
+
+/[a-f]*(?>gg|hh)+#[a-f]*(?>gg|hh)?#[a-f]*(?>gg|hh)*a#[a-f]*(?>gg|hh)*h/BZ
+------------------------------------------------------------------
+        Bra
+        [a-f]*+
+        Once_NC
+        gg
+        Alt
+        hh
+        KetRmax
+        #
+        [a-f]*+
+        Brazero
+        Once_NC
+        gg
+        Alt
+        hh
+        Ket
+        #
+        [a-f]*
+        Brazero
+        Once_NC
+        gg
+        Alt
+        hh
+        KetRmax
+        a#
+        [a-f]*+
+        Brazero
+        Once_NC
+        gg
+        Alt
+        hh
+        KetRmax
+        h
+        Ket
+        End
+------------------------------------------------------------------
+
+/[a-c]*d/DZS
+------------------------------------------------------------------
+        Bra
+        [a-c]*+
+        d
+        Ket
+        End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+No options
+No first char
+Need char = 'd'
+Subject length lower bound = 1
+Starting byte set: a b c d 
+
+/[a-c]+d/DZS
+------------------------------------------------------------------
+        Bra
+        [a-c]++
+        d
+        Ket
+        End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+No options
+No first char
+Need char = 'd'
+Subject length lower bound = 2
+Starting byte set: a b c 
+
+/[a-c]?d/DZS
+------------------------------------------------------------------
+        Bra
+        [a-c]?+
+        d
+        Ket
+        End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+No options
+No first char
+Need char = 'd'
+Subject length lower bound = 1
+Starting byte set: a b c d 
+
+/[a-c]{4,6}d/DZS
+------------------------------------------------------------------
+        Bra
+        [a-c]{4,6}+
+        d
+        Ket
+        End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+No options
+No first char
+Need char = 'd'
+Subject length lower bound = 5
+Starting byte set: a b c 
+
+/[a-c]{0,6}d/DZS
+------------------------------------------------------------------
+        Bra
+        [a-c]{0,6}+
+        d
+        Ket
+        End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+No options
+No first char
+Need char = 'd'
+Subject length lower bound = 1
+Starting byte set: a b c d 
+
 /-- End of special auto-possessive tests --/


/^A\o{1239}B/