[Pcre-svn] [312] code/trunk: Ignore {1} quantifiers.

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [312] code/trunk: Ignore {1} quantifiers.
Revision: 312
          http://www.exim.org/viewvc/pcre2?view=rev&revision=312
Author:   ph10
Date:     2015-07-17 14:41:09 +0100 (Fri, 17 Jul 2015)
Log Message:
-----------
Ignore {1} quantifiers.


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/src/pcre2_compile.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2015-07-16 17:47:20 UTC (rev 311)
+++ code/trunk/ChangeLog    2015-07-17 13:41:09 UTC (rev 312)
@@ -37,7 +37,10 @@
 9. The handling of callouts during the pre-pass for named group identification 
 has been tightened up.


+10. The quantifier {1} can be ignored, whether greedy, non-greedy, or
+possessive. This is a very minor optimization.

+
Version 10.20 30-June-2015
--------------------------


Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c    2015-07-16 17:47:20 UTC (rev 311)
+++ code/trunk/src/pcre2_compile.c    2015-07-17 13:41:09 UTC (rev 312)
@@ -3253,11 +3253,11 @@
         else top_nest->nest_depth = nest_depth;
         }
       break;
-      
+
       /* Skip over a numerical or string argument for a callout. */
-      
+
       case CHAR_C:
-      ptr += 2; 
+      ptr += 2;
       if (ptr[1] == CHAR_RIGHT_PARENTHESIS) break;
       if (IS_DIGIT(ptr[1]))
         {
@@ -3265,14 +3265,14 @@
         if (ptr[1] != CHAR_RIGHT_PARENTHESIS)
           {
           errorcode = ERR39;
-          ptr++; 
+          ptr++;
           goto FAILED;
-          }    
+          }
         break;
-        } 
+        }


       /* Handle a string argument */
-       
+
       ptr++;
       delimiter = 0;
       for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
@@ -3302,8 +3302,8 @@
         if (ptr[0] == delimiter && ptr[1] == delimiter) ptr += 2;
         }
       while (ptr[0] != delimiter);
-      break; 
- 
+      break;
+
       case CHAR_NUMBER_SIGN:
       ptr += 3;
       while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
@@ -4719,6 +4719,10 @@
       }
     else repeat_type = greedy_default;


+    /* If the repeat is {1} we can ignore it. */
+
+    if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
+
     /* If previous was a recursion call, wrap it in atomic brackets so that
     previous becomes the atomic group. All recursions were so wrapped in the
     past, but it no longer happens for non-repeated recursions. In fact, the
@@ -6113,8 +6117,8 @@
             }


           /* During the pre-compile phase, we parse the string and update the
-          length. There is no need to generate any code. (In fact, the string 
-          has already been parsed in the pre-pass that looks for named 
+          length. There is no need to generate any code. (In fact, the string
+          has already been parsed in the pre-pass that looks for named
           parentheses, but it does no harm to leave this code in.) */


           if (lengthptr != NULL)     /* Only check the string */


Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2015-07-16 17:47:20 UTC (rev 311)
+++ code/trunk/testdata/testinput2    2015-07-17 13:41:09 UTC (rev 312)
@@ -1259,8 +1259,12 @@


/(a(b(?2)c)){0,2}/IB

-/[ab]{1}+/IB
+/[ab]{1}+/B

+/()(?1){1}/B
+
+/()(?1)/B
+
 /((w\/|-|with)*(free|immediate)*.*?shipping\s*[!.-]*)/Ii
      Baby Bjorn Active Carrier - With free SHIPPING!!



Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2015-07-16 17:47:20 UTC (rev 311)
+++ code/trunk/testdata/testoutput2    2015-07-17 13:41:09 UTC (rev 312)
@@ -4533,17 +4533,34 @@
 May match empty string
 Subject length lower bound = 0


-/[ab]{1}+/IB
+/[ab]{1}+/B
 ------------------------------------------------------------------
         Bra
-        [ab]{1,1}+
+        [ab]
         Ket
         End
 ------------------------------------------------------------------
-Capturing subpattern count = 0
-Starting code units: a b 
-Subject length lower bound = 1


+/()(?1){1}/B
+------------------------------------------------------------------
+        Bra
+        CBra 1
+        Ket
+        Recurse
+        Ket
+        End
+------------------------------------------------------------------
+
+/()(?1)/B
+------------------------------------------------------------------
+        Bra
+        CBra 1
+        Ket
+        Recurse
+        Ket
+        End
+------------------------------------------------------------------
+
 /((w\/|-|with)*(free|immediate)*.*?shipping\s*[!.-]*)/Ii
 Capturing subpattern count = 3
 Options: caseless