Revision: 1584
http://vcs.pcre.org/viewvc?view=rev&revision=1584
Author: ph10
Date: 2015-08-01 10:30:02 +0100 (Sat, 01 Aug 2015)
Log Message:
-----------
Give up on minimum length for overly complex patterns.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_study.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-07-31 09:54:44 UTC (rev 1583)
+++ code/trunk/ChangeLog 2015-08-01 09:30:02 UTC (rev 1584)
@@ -97,6 +97,9 @@
25. If (?R was followed by - or + incorrect behaviour happened instead of a
diagnostic.
+
+26. Arrange to give up on finding the minimum matching length for overly
+ complex patterns.
Version 8.37 28-April-2015
Modified: code/trunk/pcre_study.c
===================================================================
--- code/trunk/pcre_study.c 2015-07-31 09:54:44 UTC (rev 1583)
+++ code/trunk/pcre_study.c 2015-08-01 09:30:02 UTC (rev 1584)
@@ -71,6 +71,7 @@
startcode pointer to start of the whole pattern's code
options the compiling options
recurses chain of recurse_check to catch mutual recursion
+ countptr pointer to call count (to catch over complexity)
Returns: the minimum length
-1 if \C in UTF-8 mode or (*ACCEPT) was encountered
@@ -80,7 +81,8 @@
static int
find_minlength(const REAL_PCRE *re, const pcre_uchar *code,
- const pcre_uchar *startcode, int options, recurse_check *recurses)
+ const pcre_uchar *startcode, int options, recurse_check *recurses,
+ int *countptr)
{
int length = -1;
/* PCRE_UTF16 has the same value as PCRE_UTF8. */
@@ -90,6 +92,8 @@
register int branchlength = 0;
register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE;
+if ((*countptr)++ > 1000) return -1; /* too complex */
+
if (*code == OP_CBRA || *code == OP_SCBRA ||
*code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE;
@@ -131,7 +135,7 @@
case OP_SBRAPOS:
case OP_ONCE:
case OP_ONCE_NC:
- d = find_minlength(re, cc, startcode, options, recurses);
+ d = find_minlength(re, cc, startcode, options, recurses, countptr);
if (d < 0) return d;
branchlength += d;
do cc += GET(cc, 1); while (*cc == OP_ALT);
@@ -415,7 +419,8 @@
int dd;
this_recurse.prev = recurses;
this_recurse.group = cs;
- dd = find_minlength(re, cs, startcode, options, &this_recurse);
+ dd = find_minlength(re, cs, startcode, options, &this_recurse,
+ countptr);
if (dd < d) d = dd;
}
}
@@ -451,7 +456,8 @@
{
this_recurse.prev = recurses;
this_recurse.group = cs;
- d = find_minlength(re, cs, startcode, options, &this_recurse);
+ d = find_minlength(re, cs, startcode, options, &this_recurse,
+ countptr);
}
}
}
@@ -514,7 +520,7 @@
this_recurse.prev = recurses;
this_recurse.group = cs;
branchlength += find_minlength(re, cs, startcode, options,
- &this_recurse);
+ &this_recurse, countptr);
}
}
cc += 1 + LINK_SIZE;
@@ -1453,6 +1459,7 @@
#endif
{
int min;
+int count = 0;
BOOL bits_set = FALSE;
pcre_uint8 start_bits[32];
PUBL(extra) *extra = NULL;
@@ -1539,7 +1546,7 @@
/* Find the minimum length of subject string. */
-switch(min = find_minlength(re, code, code, re->options, NULL))
+switch(min = find_minlength(re, code, code, re->options, NULL, &count))
{
case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2015-07-31 09:54:44 UTC (rev 1583)
+++ code/trunk/testdata/testinput2 2015-08-01 09:30:02 UTC (rev 1584)
@@ -4192,4 +4192,6 @@
/(?R-:(?</
+/(?1){3918}(((((0(\k'R'))))(?J)(?'R'(?'R'\3){99})))/I
+
/-- End of testinput2 --/
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2015-07-31 09:54:44 UTC (rev 1583)
+++ code/trunk/testdata/testoutput2 2015-08-01 09:30:02 UTC (rev 1584)
@@ -14526,4 +14526,15 @@
/(?R-:(?</
Failed: (?R or (?[+-]digits must be followed by ) at offset 3
+/(?1){3918}(((((0(\k'R'))))(?J)(?'R'(?'R'\3){99})))/I
+Capturing subpattern count = 8
+Max back reference = 8
+Named capturing subpatterns:
+ R 7
+ R 8
+No options
+Duplicate name status changes
+No first char
+Need char = '0'
+
/-- End of testinput2 --/