Revision: 620
http://www.exim.org/viewvc/pcre2?view=rev&revision=620
Author: ph10
Date: 2016-12-20 16:47:41 +0000 (Tue, 20 Dec 2016)
Log Message:
-----------
Fix incorrect internal error for very complicated back reference handling.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_study.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2016-12-11 16:47:39 UTC (rev 619)
+++ code/trunk/ChangeLog 2016-12-20 16:47:41 UTC (rev 620)
@@ -223,7 +223,13 @@
repeated back reference (example: /(Z)(a)\2{1,2}?(?-i)\1X/i should match ZaAAZX
but didn't).
+35. When a pattern is too complicated, PCRE2 gives up trying to find a minimum
+matching length and just records zero. Typically this happens when there are
+too many nested or recursive back references. If the limit was reached in
+certain recursive cases it failed to be triggered and an internal error could
+be the result.
+
Version 10.22 29-July-2016
--------------------------
Modified: code/trunk/src/pcre2_study.c
===================================================================
--- code/trunk/src/pcre2_study.c 2016-12-11 16:47:39 UTC (rev 619)
+++ code/trunk/src/pcre2_study.c 2016-12-20 16:47:41 UTC (rev 620)
@@ -485,6 +485,7 @@
this_recurse.prev = recurses;
this_recurse.group = cs;
dd = find_minlength(re, cs, startcode, utf, &this_recurse, countptr);
+ if (dd < 0) return dd;
if (dd < d) d = dd;
}
}
@@ -525,6 +526,7 @@
this_recurse.prev = recurses;
this_recurse.group = cs;
d = find_minlength(re, cs, startcode, utf, &this_recurse, countptr);
+ if (d < 0) return d;
}
}
}
@@ -1551,7 +1553,8 @@
if ((re->flags & PCRE2_MATCH_EMPTY) == 0)
{
- switch(min = find_minlength(re, code, code, utf, NULL, &count))
+ min = find_minlength(re, code, code, utf, NULL, &count);
+ switch(min)
{
case -1: /* \C in UTF mode or (*ACCEPT) or over-complex regex */
break; /* Leave minlength unchanged (will be zero) */
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2016-12-11 16:47:39 UTC (rev 619)
+++ code/trunk/testdata/testinput2 2016-12-20 16:47:41 UTC (rev 620)
@@ -4935,4 +4935,10 @@
".+\QX\E+"B,auto_callout,no_auto_possess
+# This one is here because Perl gives an 'unmatched )' error which goes away
+# if one of the \) sequences is removed - which is weird. PCRE finds it too
+# complicated to find a minimum matching length.
+
+"()X|((((((((()))))))((((())))))\2())((((((\2\2)))\2)(\22((((\2\2)2))\2)))(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z+:)Z|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z((Z*(\2(Z\':))\0)i|||||||||||||||loZ\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0nte!rnal errpr\2\\21r(2\ZZZ)+:)Z!|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZernZal ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \))\ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)))\2))))((((((\2\2))))))"I
+
# End of testinput2
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2016-12-11 16:47:39 UTC (rev 619)
+++ code/trunk/testdata/testoutput2 2016-12-20 16:47:41 UTC (rev 620)
@@ -15407,6 +15407,16 @@
End
------------------------------------------------------------------
+# This one is here because Perl gives an 'unmatched )' error which goes away
+# if one of the \) sequences is removed - which is weird. PCRE finds it too
+# complicated to find a minimum matching length.
+
+"()X|((((((((()))))))((((())))))\2())((((((\2\2)))\2)(\22((((\2\2)2))\2)))(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z+:)Z|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z((Z*(\2(Z\':))\0)i|||||||||||||||loZ\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0nte!rnal errpr\2\\21r(2\ZZZ)+:)Z!|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZernZal ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \))\ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)))\2))))((((((\2\2))))))"I
+Capturing subpattern count = 108
+Max back reference = 22
+Contains explicit CR or LF match
+Subject length lower bound = 0
+
# End of testinput2
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data