Revision: 1112
http://www.exim.org/viewvc/pcre2?view=rev&revision=1112
Author: ph10
Date: 2019-06-19 17:39:18 +0100 (Wed, 19 Jun 2019)
Log Message:
-----------
Minor code and comment tidies.
Modified Paths:
--------------
code/trunk/src/pcre2_study.c
Modified: code/trunk/src/pcre2_study.c
===================================================================
--- code/trunk/src/pcre2_study.c 2019-06-19 16:27:50 UTC (rev 1111)
+++ code/trunk/src/pcre2_study.c 2019-06-19 16:39:18 UTC (rev 1112)
@@ -88,6 +88,9 @@
countptr pointer to call count (to catch over complexity)
backref_cache vector for caching back references.
+This function is no longer called when the pattern contains (*ACCEPT); however,
+the old code for returning -1 is retained, just in case.
+
Returns: the minimum length
-1 \C in UTF-8 mode
or (*ACCEPT)
@@ -205,7 +208,9 @@
cc += 1 + LINK_SIZE;
break;
- /* ACCEPT makes things far too complicated; we have to give up. */
+ /* ACCEPT makes things far too complicated; we have to give up. In fact,
+ from 10.34 onwards, if a pattern contains (*ACCEPT), this function is not
+ used. However, leave the code in place, just in case. */
case OP_ACCEPT:
case OP_ASSERT_ACCEPT:
@@ -453,9 +458,9 @@
For backreferenes, if duplicate numbers are present in the pattern we check
for a reference to a duplicate. If it is, we don't know which version will
be referenced, so we have to set the minimum length to zero. */
-
- /* Duplicate named pattern back reference. */
+ /* Duplicate named pattern back reference. */
+
case OP_DNREF:
case OP_DNREFI:
if (!dupcapused && (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
@@ -481,7 +486,7 @@
ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno);
if (cs == NULL) return -2;
do ce += GET(ce, 1); while (*ce == OP_ALT);
-
+
dd = 0;
if (!dupcapused ||
(PCRE2_UCHAR *)PRIV(find_bracket)(ce, utf, recno) == NULL)
@@ -508,7 +513,7 @@
if (dd < 0) return dd;
}
}
- }
+ }
backref_cache[recno] = dd;
for (i = backref_cache[0] + 1; i < recno; i++) backref_cache[i] = -1;
@@ -524,7 +529,7 @@
cc += 1 + 2*IMM2_SIZE;
goto REPEAT_BACK_REFERENCE;
- /* Single back reference by number. References by name are converted to by
+ /* Single back reference by number. References by name are converted to by
number when there is no duplication. */
case OP_REF:
@@ -1585,7 +1590,6 @@
int
PRIV(study)(pcre2_real_code *re)
{
-int min;
int count = 0;
PCRE2_UCHAR *code;
BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
@@ -1607,21 +1611,23 @@
}
/* Find the minimum length of subject string. If the pattern can match an empty
-string, the minimum length is already known. If the pattern contains (*ACCEPT)
-all bets are off. If there are more back references than the size of the vector
-we are going to cache them in, do nothing. A pattern that complicated will
-probably take a long time to analyze and may in any case turn out to be too
-complicated. Note that back reference minima are held as 16-bit numbers. */
+string, the minimum length is already known. If the pattern contains (*ACCEPT)
+all bets are off, and we don't even try to find a minimum length. If there are
+more back references than the size of the vector we are going to cache them in,
+do nothing. A pattern that complicated will probably take a long time to
+analyze and may in any case turn out to be too complicated. Note that back
+reference minima are held as 16-bit numbers. */
if ((re->flags & (PCRE2_MATCH_EMPTY|PCRE2_HASACCEPT)) == 0 &&
re->top_backref <= MAX_CACHE_BACKREF)
{
+ int min;
int backref_cache[MAX_CACHE_BACKREF+1];
backref_cache[0] = 0; /* Highest one that is set */
min = find_minlength(re, code, code, utf, NULL, &count, backref_cache);
switch(min)
{
- case -1: /* \C in UTF mode or (*ACCEPT) or over-complex regex */
+ case -1: /* \C in UTF mode or over-complex regex */
break; /* Leave minlength unchanged (will be zero) */
case -2:
@@ -1631,8 +1637,7 @@
return 3; /* unrecognized opcode */
default:
- if (min > UINT16_MAX) min = UINT16_MAX;
- re->minlength = min;
+ re->minlength = (min > UINT16_MAX)? UINT16_MAX : min;
break;
}
}