[Pcre-svn] [1584] code/trunk: Give up on minimum length for …

トップ ページ
このメッセージを削除
著者: Subversion repository
日付:  
To: pcre-svn
題目: [Pcre-svn] [1584] code/trunk: Give up on minimum length for overly complex patterns.
Revision: 1584
          http://vcs.pcre.org/viewvc?view=rev&revision=1584
Author:   ph10
Date:     2015-08-01 10:30:02 +0100 (Sat, 01 Aug 2015)
Log Message:
-----------
Give up on minimum length for overly complex patterns.


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_study.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2015-07-31 09:54:44 UTC (rev 1583)
+++ code/trunk/ChangeLog    2015-08-01 09:30:02 UTC (rev 1584)
@@ -97,6 +97,9 @@


 25. If (?R was followed by - or + incorrect behaviour happened instead of a
     diagnostic. 
+    
+26. Arrange to give up on finding the minimum matching length for overly 
+    complex patterns. 



Version 8.37 28-April-2015

Modified: code/trunk/pcre_study.c
===================================================================
--- code/trunk/pcre_study.c    2015-07-31 09:54:44 UTC (rev 1583)
+++ code/trunk/pcre_study.c    2015-08-01 09:30:02 UTC (rev 1584)
@@ -71,6 +71,7 @@
   startcode       pointer to start of the whole pattern's code
   options         the compiling options
   recurses        chain of recurse_check to catch mutual recursion
+  countptr        pointer to call count (to catch over complexity) 


 Returns:   the minimum length
            -1 if \C in UTF-8 mode or (*ACCEPT) was encountered
@@ -80,7 +81,8 @@


static int
find_minlength(const REAL_PCRE *re, const pcre_uchar *code,
- const pcre_uchar *startcode, int options, recurse_check *recurses)
+ const pcre_uchar *startcode, int options, recurse_check *recurses,
+ int *countptr)
{
int length = -1;
/* PCRE_UTF16 has the same value as PCRE_UTF8. */
@@ -90,6 +92,8 @@
register int branchlength = 0;
register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE;

+if ((*countptr)++ > 1000) return -1;   /* too complex */
+
 if (*code == OP_CBRA || *code == OP_SCBRA ||
     *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE;


@@ -131,7 +135,7 @@
     case OP_SBRAPOS:
     case OP_ONCE:
     case OP_ONCE_NC:
-    d = find_minlength(re, cc, startcode, options, recurses);
+    d = find_minlength(re, cc, startcode, options, recurses, countptr);
     if (d < 0) return d;
     branchlength += d;
     do cc += GET(cc, 1); while (*cc == OP_ALT);
@@ -415,7 +419,8 @@
             int dd;
             this_recurse.prev = recurses;
             this_recurse.group = cs;
-            dd = find_minlength(re, cs, startcode, options, &this_recurse);
+            dd = find_minlength(re, cs, startcode, options, &this_recurse,
+              countptr);
             if (dd < d) d = dd;
             }
           }
@@ -451,7 +456,8 @@
           {
           this_recurse.prev = recurses;
           this_recurse.group = cs;
-          d = find_minlength(re, cs, startcode, options, &this_recurse);
+          d = find_minlength(re, cs, startcode, options, &this_recurse,
+            countptr);
           }
         }
       }
@@ -514,7 +520,7 @@
         this_recurse.prev = recurses;
         this_recurse.group = cs;
         branchlength += find_minlength(re, cs, startcode, options,
-          &this_recurse);
+          &this_recurse, countptr);
         }
       }
     cc += 1 + LINK_SIZE;
@@ -1453,6 +1459,7 @@
 #endif
 {
 int min;
+int count = 0;
 BOOL bits_set = FALSE;
 pcre_uint8 start_bits[32];
 PUBL(extra) *extra = NULL;
@@ -1539,7 +1546,7 @@


/* Find the minimum length of subject string. */

-switch(min = find_minlength(re, code, code, re->options, NULL))
+switch(min = find_minlength(re, code, code, re->options, NULL, &count))
{
case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
case -3: *errorptr = "internal error: opcode not recognized"; return NULL;

Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2015-07-31 09:54:44 UTC (rev 1583)
+++ code/trunk/testdata/testinput2    2015-08-01 09:30:02 UTC (rev 1584)
@@ -4192,4 +4192,6 @@


/(?R-:(?</

+/(?1){3918}(((((0(\k'R'))))(?J)(?'R'(?'R'\3){99})))/I
+
/-- End of testinput2 --/

Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2015-07-31 09:54:44 UTC (rev 1583)
+++ code/trunk/testdata/testoutput2    2015-08-01 09:30:02 UTC (rev 1584)
@@ -14526,4 +14526,15 @@
 /(?R-:(?</
 Failed: (?R or (?[+-]digits must be followed by ) at offset 3


+/(?1){3918}(((((0(\k'R'))))(?J)(?'R'(?'R'\3){99})))/I
+Capturing subpattern count = 8
+Max back reference = 8
+Named capturing subpatterns:
+ R 7
+ R 8
+No options
+Duplicate name status changes
+No first char
+Need char = '0'
+
/-- End of testinput2 --/