[Pcre-svn] [621] code/trunk: Remove atomic from single repea…

トップ ページ
このメッセージを削除
著者: Subversion repository
日付:  
To: pcre-svn
題目: [Pcre-svn] [621] code/trunk: Remove atomic from single repeats; convert possessive atomic to possessive
Revision: 621
          http://vcs.pcre.org/viewvc?view=rev&revision=621
Author:   ph10
Date:     2011-07-18 11:14:09 +0100 (Mon, 18 Jul 2011)


Log Message:
-----------
Remove atomic from single repeats; convert possessive atomic to possessive
non-atomic (because they are the same).

Modified Paths:
--------------
    code/trunk/pcre_compile.c
    code/trunk/testdata/testinput1
    code/trunk/testdata/testinput11
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput1
    code/trunk/testdata/testoutput10
    code/trunk/testdata/testoutput11
    code/trunk/testdata/testoutput2
    code/trunk/testdata/testoutput5
    code/trunk/testdata/testoutput7


Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c    2011-07-17 13:53:14 UTC (rev 620)
+++ code/trunk/pcre_compile.c    2011-07-18 10:14:09 UTC (rev 621)
@@ -4208,6 +4208,35 @@
       ptr++;
       }
     else repeat_type = greedy_default;
+    
+    /* If previous was a recursion call, wrap it in atomic brackets so that 
+    previous becomes the atomic group. All recursions were so wrapped in the
+    past, but it no longer happens for non-repeated recursions. In fact, the
+    repeated ones could be re-implemented independently so as not to need this,
+    but for the moment we rely on the code for repeating groups. */
+    
+    if (*previous == OP_RECURSE)
+      {
+      memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);
+      *previous = OP_ONCE;
+      PUT(previous, 1, 2 + 2*LINK_SIZE);
+      previous[2 + 2*LINK_SIZE] = OP_KET;
+      PUT(previous, 3 + 2*LINK_SIZE, 2 + 2*LINK_SIZE);
+      code += 2 + 2 * LINK_SIZE;
+      length_prevgroup = 3 + 3*LINK_SIZE;
+      
+      /* When actually compiling, we need to check whether this was a forward
+      reference, and if so, adjust the offset. */
+      
+      if (lengthptr == NULL && cd->hwm >= cd->start_workspace + LINK_SIZE)
+        {
+        int offset = GET(cd->hwm, -LINK_SIZE);
+        if (offset == previous + 1 - cd->start_code)
+          PUT(cd->hwm, -LINK_SIZE, offset + 1 + LINK_SIZE); 
+        }    
+      }    
+      
+    /* Now handle repetition for the different types of item. */


     /* If previous was a character match, abolish the item and generate a
     repeat item instead. If a char item has a minumum of more than one, ensure
@@ -4727,14 +4756,10 @@
         }


       /* If the maximum is unlimited, set a repeater in the final copy. For
-      ONCE brackets, that's all we need to do. 
-      
-      (To be done next, after recursion adjusted)
-      However, possessively repeated 
-      ONCE brackets can be converted into non-capturing brackets, as the 
-      behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to 
+      ONCE brackets, that's all we need to do. However, possessively repeated
+      ONCE brackets can be converted into non-capturing brackets, as the
+      behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to
       deal with possessive ONCEs specially.
-      (....) 


       Otherwise, if the quantifier was possessive, we convert the BRA code to
       the POS form, and the KET code to KETRPOS. (It turns out to be convenient
@@ -4755,12 +4780,8 @@
         {
         uschar *ketcode = code - 1 - LINK_SIZE;
         uschar *bracode = ketcode - GET(ketcode, 1);
-
-/****
-        if (*bracode == OP_ONCE && possessive_quantifier)
-          *bracode = OP_BRA; 
-****/
-           
+         
+        if (*bracode == OP_ONCE && possessive_quantifier) *bracode = OP_BRA; 
         if (*bracode == OP_ONCE) 
           *ketcode = OP_KETRMAX + repeat_type;
         else
@@ -5677,10 +5698,10 @@


               /* Fudge the value of "called" so that when it is inserted as an
               offset below, what it actually inserted is the reference number
-              of the group. */
+              of the group. Then remember the forward reference. */


               called = cd->start_code + recno;
-              PUTINC(cd->hwm, 0, (int)(code + 2 + LINK_SIZE - cd->start_code));
+              PUTINC(cd->hwm, 0, (int)(code + 1 - cd->start_code));
               }


             /* If not a forward reference, and the subpattern is still open,
@@ -5695,23 +5716,11 @@
               }
             }


-          /* Insert the recursion/subroutine item, automatically wrapped inside
-          "once" brackets. Set up a "previous group" length so that a
-          subsequent quantifier will work. */
+          /* Insert the recursion/subroutine item. */


-          *code = OP_ONCE;
-          PUT(code, 1, 2 + 2*LINK_SIZE);
-          code += 1 + LINK_SIZE;
-
           *code = OP_RECURSE;
           PUT(code, 1, (int)(called - cd->start_code));
           code += 1 + LINK_SIZE;
-
-          *code = OP_KET;
-          PUT(code, 1, 2 + 2*LINK_SIZE);
-          code += 1 + LINK_SIZE;
-
-          length_prevgroup = 3 + 3*LINK_SIZE;
           }


         /* Can't determine a first byte now */


Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1    2011-07-17 13:53:14 UTC (rev 620)
+++ code/trunk/testdata/testinput1    2011-07-18 10:14:09 UTC (rev 621)
@@ -4181,4 +4181,13 @@
 /(?(?=(a))a)(b)/
     ab


+/^(?:a|ab)++c/
+    aaaabc
+
+/^(?>a|ab)++c/
+    aaaabc
+
+/^(?:a|ab)+c/
+    aaaabc
+
 /-- End of testinput1 --/


Modified: code/trunk/testdata/testinput11
===================================================================
--- code/trunk/testdata/testinput11    2011-07-17 13:53:14 UTC (rev 620)
+++ code/trunk/testdata/testinput11    2011-07-18 10:14:09 UTC (rev 621)
@@ -633,4 +633,10 @@
 /(?(?=(a(*ACCEPT)z))a)/
     a


+/^(a)(?1)+ab/
+    aaaab
+    
+/^(a)(?1)++ab/
+    aaaab
+
 /-- End of testinput11 --/


Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2011-07-17 13:53:14 UTC (rev 620)
+++ code/trunk/testdata/testinput2    2011-07-18 10:14:09 UTC (rev 621)
@@ -3771,4 +3771,12 @@


/(abc)(?1)/SI

+/^(?>a)++/
+    aa\M
+    aaaaaaaaa\M 
+    
+/(a)(?1)++/
+    aa\M
+    aaaaaaaaa\M  
+
 /-- End of testinput2 --/


Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1    2011-07-17 13:53:14 UTC (rev 620)
+++ code/trunk/testdata/testoutput1    2011-07-18 10:14:09 UTC (rev 621)
@@ -6843,4 +6843,16 @@
  1: a
  2: b


+/^(?:a|ab)++c/
+    aaaabc
+No match
+
+/^(?>a|ab)++c/
+    aaaabc
+No match
+
+/^(?:a|ab)+c/
+    aaaabc
+ 0: aaaabc
+
 /-- End of testinput1 --/


Modified: code/trunk/testdata/testoutput10
===================================================================
--- code/trunk/testdata/testoutput10    2011-07-17 13:53:14 UTC (rev 620)
+++ code/trunk/testdata/testoutput10    2011-07-18 10:14:09 UTC (rev 621)
@@ -166,18 +166,16 @@
 ------------------------------------------------------------------


 /(a(?1)b)/BM
-Memory allocation (code space): 28
+Memory allocation (code space): 22
 ------------------------------------------------------------------
-  0  24 Bra
-  3  18 CBra 1
+  0  18 Bra
+  3  12 CBra 1
   8     a
- 10   6 Once
- 13   3 Recurse
- 16   6 Ket
- 19     b
- 21  18 Ket
- 24  24 Ket
- 27     End
+ 10   3 Recurse
+ 13     b
+ 15  12 Ket
+ 18  18 Ket
+ 21     End
 ------------------------------------------------------------------


/(a(?1)+b)/BM
@@ -234,9 +232,9 @@
------------------------------------------------------------------

 /(?P<a>a)...(?P=a)bbb(?P>a)d/BM
-Memory allocation (code space): 43
+Memory allocation (code space): 37
 ------------------------------------------------------------------
-  0  36 Bra
+  0  30 Bra
   3   7 CBra 1
   8     a
  10   7 Ket
@@ -245,12 +243,10 @@
  15     Any
  16     \1
  19     bbb
- 25   6 Once
- 28   3 Recurse
- 31   6 Ket
- 34     d
- 36  36 Ket
- 39     End
+ 25   3 Recurse
+ 28     d
+ 30  30 Ket
+ 33     End
 ------------------------------------------------------------------


/abc(?C255)de(?C)f/BM

Modified: code/trunk/testdata/testoutput11
===================================================================
--- code/trunk/testdata/testoutput11    2011-07-17 13:53:14 UTC (rev 620)
+++ code/trunk/testdata/testoutput11    2011-07-18 10:14:09 UTC (rev 621)
@@ -1203,4 +1203,13 @@
  0: a
  1: a


+/^(a)(?1)+ab/
+    aaaab
+ 0: aaaab
+ 1: a
+    
+/^(a)(?1)++ab/
+    aaaab
+No match
+
 /-- End of testinput11 --/


Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2011-07-17 13:53:14 UTC (rev 620)
+++ code/trunk/testdata/testoutput2    2011-07-18 10:14:09 UTC (rev 621)
@@ -4004,9 +4004,7 @@
         Bra
         CBra 1
         a
-        Once
         Recurse
-        Ket
         b
         Ket
         Ket
@@ -4227,9 +4225,7 @@
         Any
         \1
         bbb
-        Once
         Recurse
-        Ket
         d
         Ket
         End
@@ -4601,9 +4597,7 @@
         a
         CBra 2
         b
-        Once
         Recurse
-        Ket
         c
         Ket
         Ket
@@ -4623,9 +4617,7 @@
         a
         CBra 2
         b
-        Once
         Recurse
-        Ket
         c
         Ket
         KetRmax
@@ -4646,9 +4638,7 @@
         a
         CBra 2
         b
-        Once
         Recurse
-        Ket
         c
         Ket
         Ket
@@ -4657,9 +4647,7 @@
         a
         CBra 2
         b
-        Once
         Recurse
-        Ket
         c
         Ket
         Ket
@@ -7765,9 +7753,7 @@
 ------------------------------------------------------------------
         Bra
         ^
-        Once
         Recurse
-        Ket
         [()]
         CBra 1
         Ket
@@ -7801,9 +7787,7 @@
 ------------------------------------------------------------------
         Bra
         ^
-        Once
         Recurse
-        Ket
         ()
         CBra 1
         Ket
@@ -7815,9 +7799,7 @@
 ------------------------------------------------------------------
         Bra
         ^
-        Once
         Recurse
-        Ket
         [(\]a]
         CBra 1
         Ket
@@ -7830,9 +7812,7 @@
 ------------------------------------------------------------------
         Bra
         ^
-        Once
         Recurse
-        Ket
         CBra 1
         Ket
         Ket
@@ -8284,10 +8264,8 @@
         Alt
         c
         Ket
-        Once
         Recurse
         Ket
-        Ket
         End
 ------------------------------------------------------------------
     abc
@@ -8298,9 +8276,7 @@
 ------------------------------------------------------------------
         Bra
         xy
-        Once
         Recurse
-        Ket
         CBra 1
         abc
         Ket
@@ -9928,12 +9904,8 @@
 /(?&word)(?&element)(?(DEFINE)(?<element><[^m][^>]>[^<])(?<word>\w*+))/BZ
 ------------------------------------------------------------------
         Bra
-        Once
         Recurse
-        Ket
-        Once
         Recurse
-        Ket
         Cond
         Cond def
         CBra 1
@@ -9954,12 +9926,8 @@
 /(?&word)(?&element)(?(DEFINE)(?<element><[^\d][^>]>[^<])(?<word>\w*+))/BZ
 ------------------------------------------------------------------
         Bra
-        Once
         Recurse
-        Ket
-        Once
         Recurse
-        Ket
         Cond
         Cond def
         CBra 1
@@ -10700,12 +10668,10 @@
         Cond nrecurse 1
         $
         Alt
-        Once
         Recurse
         Ket
         Ket
         Ket
-        Ket
         End
 ------------------------------------------------------------------
 Capturing subpattern count = 4
@@ -10923,12 +10889,10 @@
 ------------------------------------------------------------------
         Bra
         ^
-        Once
-        Brazero
-        Once
+        Braposzero
+        SBraPos
         Recurse
-        KetRmax
-        Ket
+        KetRpos
         Cond
         Cond def
         CBra 1
@@ -11940,4 +11904,26 @@
 Subject length lower bound = 6
 No set of starting bytes


+/^(?>a)++/
+    aa\M
+Minimum match() limit = 5
+Minimum match() recursion limit = 3
+ 0: aa
+    aaaaaaaaa\M 
+Minimum match() limit = 12
+Minimum match() recursion limit = 3
+ 0: aaaaaaaaa
+    
+/(a)(?1)++/
+    aa\M
+Minimum match() limit = 7
+Minimum match() recursion limit = 5
+ 0: aa
+ 1: a
+    aaaaaaaaa\M  
+Minimum match() limit = 21
+Minimum match() recursion limit = 5
+ 0: aaaaaaaaa
+ 1: a
+
 /-- End of testinput2 --/


Modified: code/trunk/testdata/testoutput5
===================================================================
--- code/trunk/testdata/testoutput5    2011-07-17 13:53:14 UTC (rev 620)
+++ code/trunk/testdata/testoutput5    2011-07-18 10:14:09 UTC (rev 621)
@@ -599,10 +599,8 @@
         Bra
         \x{100}*+
         \d
-        Once
         Recurse
         Ket
-        Ket
         End
 ------------------------------------------------------------------
 Capturing subpattern count = 0
@@ -906,11 +904,9 @@
         \x{100}abc
         CBra 1
         xyz
-        Once
         Recurse
         Ket
         Ket
-        Ket
         End
 ------------------------------------------------------------------
 Capturing subpattern count = 1
@@ -925,11 +921,9 @@
         abc
         CBra 1
         xyz
-        Once
         Recurse
         Ket
         Ket
-        Ket
         End
 ------------------------------------------------------------------
 Capturing subpattern count = 1
@@ -944,11 +938,9 @@
         abc
         CBra 1
         xyz
-        Once
         Recurse
         Ket
         Ket
-        Ket
         End
 ------------------------------------------------------------------
 Capturing subpattern count = 1
@@ -964,9 +956,7 @@
         \x{100}
         CBra 2
         b
-        Once
         Recurse
-        Ket
         c
         Ket
         Ket
@@ -987,9 +977,7 @@
         \x{100}
         CBra 2
         b
-        Once
         Recurse
-        Ket
         c
         Ket
         Ket
@@ -998,9 +986,7 @@
         \x{100}
         CBra 2
         b
-        Once
         Recurse
-        Ket
         c
         Ket
         Ket
@@ -1021,9 +1007,7 @@
         \x{100}
         CBra 2
         b
-        Once
         Recurse
-        Ket
         c
         Ket
         Ket
@@ -1044,9 +1028,7 @@
         \x{100}
         CBra 2
         b
-        Once
         Recurse
-        Ket
         c
         Ket
         Ket
@@ -1055,9 +1037,7 @@
         \x{100}
         CBra 2
         b
-        Once
         Recurse
-        Ket
         c
         Ket
         Ket


Modified: code/trunk/testdata/testoutput7
===================================================================
--- code/trunk/testdata/testoutput7    2011-07-17 13:53:14 UTC (rev 620)
+++ code/trunk/testdata/testoutput7    2011-07-18 10:14:09 UTC (rev 621)
@@ -676,6 +676,7 @@
 /^(a*\w|ab)=(?1)/
     ab=ab
  0: ab=ab
+ 1: ab=a


 /^([^()]|\((?1)*\))*$/
     abc