Revision: 312
http://www.exim.org/viewvc/pcre2?view=rev&revision=312
Author: ph10
Date: 2015-07-17 14:41:09 +0100 (Fri, 17 Jul 2015)
Log Message:
-----------
Ignore {1} quantifiers.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_compile.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-07-16 17:47:20 UTC (rev 311)
+++ code/trunk/ChangeLog 2015-07-17 13:41:09 UTC (rev 312)
@@ -37,7 +37,10 @@
9. The handling of callouts during the pre-pass for named group identification
has been tightened up.
+10. The quantifier {1} can be ignored, whether greedy, non-greedy, or
+possessive. This is a very minor optimization.
+
Version 10.20 30-June-2015
--------------------------
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2015-07-16 17:47:20 UTC (rev 311)
+++ code/trunk/src/pcre2_compile.c 2015-07-17 13:41:09 UTC (rev 312)
@@ -3253,11 +3253,11 @@
else top_nest->nest_depth = nest_depth;
}
break;
-
+
/* Skip over a numerical or string argument for a callout. */
-
+
case CHAR_C:
- ptr += 2;
+ ptr += 2;
if (ptr[1] == CHAR_RIGHT_PARENTHESIS) break;
if (IS_DIGIT(ptr[1]))
{
@@ -3265,14 +3265,14 @@
if (ptr[1] != CHAR_RIGHT_PARENTHESIS)
{
errorcode = ERR39;
- ptr++;
+ ptr++;
goto FAILED;
- }
+ }
break;
- }
+ }
/* Handle a string argument */
-
+
ptr++;
delimiter = 0;
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
@@ -3302,8 +3302,8 @@
if (ptr[0] == delimiter && ptr[1] == delimiter) ptr += 2;
}
while (ptr[0] != delimiter);
- break;
-
+ break;
+
case CHAR_NUMBER_SIGN:
ptr += 3;
while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
@@ -4719,6 +4719,10 @@
}
else repeat_type = greedy_default;
+ /* If the repeat is {1} we can ignore it. */
+
+ if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
+
/* If previous was a recursion call, wrap it in atomic brackets so that
previous becomes the atomic group. All recursions were so wrapped in the
past, but it no longer happens for non-repeated recursions. In fact, the
@@ -6113,8 +6117,8 @@
}
/* During the pre-compile phase, we parse the string and update the
- length. There is no need to generate any code. (In fact, the string
- has already been parsed in the pre-pass that looks for named
+ length. There is no need to generate any code. (In fact, the string
+ has already been parsed in the pre-pass that looks for named
parentheses, but it does no harm to leave this code in.) */
if (lengthptr != NULL) /* Only check the string */
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2015-07-16 17:47:20 UTC (rev 311)
+++ code/trunk/testdata/testinput2 2015-07-17 13:41:09 UTC (rev 312)
@@ -1259,8 +1259,12 @@
/(a(b(?2)c)){0,2}/IB
-/[ab]{1}+/IB
+/[ab]{1}+/B
+/()(?1){1}/B
+
+/()(?1)/B
+
/((w\/|-|with)*(free|immediate)*.*?shipping\s*[!.-]*)/Ii
Baby Bjorn Active Carrier - With free SHIPPING!!
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2015-07-16 17:47:20 UTC (rev 311)
+++ code/trunk/testdata/testoutput2 2015-07-17 13:41:09 UTC (rev 312)
@@ -4533,17 +4533,34 @@
May match empty string
Subject length lower bound = 0
-/[ab]{1}+/IB
+/[ab]{1}+/B
------------------------------------------------------------------
Bra
- [ab]{1,1}+
+ [ab]
Ket
End
------------------------------------------------------------------
-Capturing subpattern count = 0
-Starting code units: a b
-Subject length lower bound = 1
+/()(?1){1}/B
+------------------------------------------------------------------
+ Bra
+ CBra 1
+ Ket
+ Recurse
+ Ket
+ End
+------------------------------------------------------------------
+
+/()(?1)/B
+------------------------------------------------------------------
+ Bra
+ CBra 1
+ Ket
+ Recurse
+ Ket
+ End
+------------------------------------------------------------------
+
/((w\/|-|with)*(free|immediate)*.*?shipping\s*[!.-]*)/Ii
Capturing subpattern count = 3
Options: caseless