[Pcre-svn] [1547] code/trunk: Fix slow study when much mutua…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [1547] code/trunk: Fix slow study when much mutual recursion.
Revision: 1547
          http://vcs.pcre.org/viewvc?view=rev&revision=1547
Author:   ph10
Date:     2015-04-13 10:31:55 +0100 (Mon, 13 Apr 2015)


Log Message:
-----------
Fix slow study when much mutual recursion.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_compile.c
    code/trunk/pcre_internal.h
    code/trunk/pcre_study.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2015-04-08 16:56:28 UTC (rev 1546)
+++ code/trunk/ChangeLog    2015-04-13 09:31:55 UTC (rev 1547)
@@ -158,7 +158,13 @@
     other cases where backtracking after \C could crash. This set of bugs was
     discovered by the LLVM fuzzer.


+20. The function for finding the minimum length of a matching string could take
+    a very long time if mutual recursion was present many times in a pattern,
+    for example, /((?2){73}(?2))((?1))/. A better mutual recursion detection
+    method has been implemented. This infelicity was discovered by the LLVM
+    fuzzer.


+
Version 8.36 26-September-2014
------------------------------


Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c    2015-04-08 16:56:28 UTC (rev 1546)
+++ code/trunk/pcre_compile.c    2015-04-13 09:31:55 UTC (rev 1547)
@@ -866,15 +866,7 @@
 };



-/* Structure for mutual recursion detection. */

-typedef struct recurse_check {
-  struct recurse_check *prev;
-  const pcre_uchar *group;
-} recurse_check;
-
-
-
 /*************************************************
 *            Find an error text                  *
 *************************************************/


Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h    2015-04-08 16:56:28 UTC (rev 1546)
+++ code/trunk/pcre_internal.h    2015-04-13 09:31:55 UTC (rev 1547)
@@ -2460,6 +2460,13 @@
   pcre_uchar *current_branch;
 } branch_chain;


+/* Structure for mutual recursion detection. */
+
+typedef struct recurse_check {
+ struct recurse_check *prev;
+ const pcre_uchar *group;
+} recurse_check;
+
/* Structure for items in a linked list that represents an explicit recursive
call within the pattern; used by pcre_exec(). */


Modified: code/trunk/pcre_study.c
===================================================================
--- code/trunk/pcre_study.c    2015-04-08 16:56:28 UTC (rev 1546)
+++ code/trunk/pcre_study.c    2015-04-13 09:31:55 UTC (rev 1547)
@@ -70,7 +70,7 @@
   code            pointer to start of group (the bracket)
   startcode       pointer to start of the whole pattern's code
   options         the compiling options
-  int             RECURSE depth
+  recurses        chain of recurse_check to catch mutual recursion


 Returns:   the minimum length
            -1 if \C in UTF-8 mode or (*ACCEPT) was encountered
@@ -80,12 +80,13 @@


static int
find_minlength(const REAL_PCRE *re, const pcre_uchar *code,
- const pcre_uchar *startcode, int options, int recurse_depth)
+ const pcre_uchar *startcode, int options, recurse_check *recurses)
{
int length = -1;
/* PCRE_UTF16 has the same value as PCRE_UTF8. */
BOOL utf = (options & PCRE_UTF8) != 0;
BOOL had_recurse = FALSE;
+recurse_check this_recurse;
register int branchlength = 0;
register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE;

@@ -130,7 +131,7 @@
     case OP_SBRAPOS:
     case OP_ONCE:
     case OP_ONCE_NC:
-    d = find_minlength(re, cc, startcode, options, recurse_depth);
+    d = find_minlength(re, cc, startcode, options, recurses);
     if (d < 0) return d;
     branchlength += d;
     do cc += GET(cc, 1); while (*cc == OP_ALT);
@@ -393,17 +394,31 @@
         ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0));
         if (cs == NULL) return -2;
         do ce += GET(ce, 1); while (*ce == OP_ALT);
-        if ((cc > cs && cc < ce) || recurse_depth > 10)
+        if (cc > cs && cc < ce)     /* Simple recursion */
           {
           d = 0;
           had_recurse = TRUE;
           break;
           }
         else
-          {
-          int dd = find_minlength(re, cs, startcode, options, recurse_depth+1);
-          if (dd < d) d = dd;
-          }
+          {    
+          recurse_check *r = recurses;                                       
+          for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
+          if (r != NULL)           /* Mutual recursion */
+            {                                                              
+            d = 0;                                                       
+            had_recurse = TRUE;                                           
+            break;                                                      
+            }                
+          else
+            {
+            int dd;
+            this_recurse.prev = recurses;
+            this_recurse.group = cs;  
+            dd = find_minlength(re, cs, startcode, options, &this_recurse);
+            if (dd < d) d = dd;
+            }
+          }   
         slot += re->name_entry_size;
         }
       }
@@ -418,14 +433,26 @@
       ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1));
       if (cs == NULL) return -2;
       do ce += GET(ce, 1); while (*ce == OP_ALT);
-      if ((cc > cs && cc < ce) || recurse_depth > 10)
+      if (cc > cs && cc < ce)    /* Simple recursion */
         {
         d = 0;
         had_recurse = TRUE;
         }
       else
-        {
-        d = find_minlength(re, cs, startcode, options, recurse_depth + 1);
+        {    
+        recurse_check *r = recurses;                                       
+        for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
+        if (r != NULL)           /* Mutual recursion */
+          {                                                              
+          d = 0;                                                       
+          had_recurse = TRUE;                                           
+          }                
+        else
+          {
+          this_recurse.prev = recurses;
+          this_recurse.group = cs;  
+          d = find_minlength(re, cs, startcode, options, &this_recurse);
+          } 
         }
       }
     else d = 0;
@@ -474,12 +501,21 @@
     case OP_RECURSE:
     cs = ce = (pcre_uchar *)startcode + GET(cc, 1);
     do ce += GET(ce, 1); while (*ce == OP_ALT);
-    if ((cc > cs && cc < ce) || recurse_depth > 10)
+    if (cc > cs && cc < ce)    /* Simple recursion */
       had_recurse = TRUE;
     else
-      {
-      branchlength += find_minlength(re, cs, startcode, options,
-        recurse_depth + 1);
+      {    
+      recurse_check *r = recurses;                                       
+      for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
+      if (r != NULL)           /* Mutual recursion */
+        had_recurse = TRUE;                                           
+      else
+        {
+        this_recurse.prev = recurses;
+        this_recurse.group = cs;  
+        branchlength += find_minlength(re, cs, startcode, options,
+          &this_recurse);
+        }   
       }
     cc += 1 + LINK_SIZE;
     break;
@@ -1503,7 +1539,7 @@


/* Find the minimum length of subject string. */

-switch(min = find_minlength(re, code, code, re->options, 0))
+switch(min = find_minlength(re, code, code, re->options, NULL))
{
case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
case -3: *errorptr = "internal error: opcode not recognized"; return NULL;

Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2015-04-08 16:56:28 UTC (rev 1546)
+++ code/trunk/testdata/testinput2    2015-04-13 09:31:55 UTC (rev 1547)
@@ -4150,4 +4150,6 @@
 /(?<=\Ka)/G+
     aaaaa


+/((?2){73}(?2))((?1))/
+
/-- End of testinput2 --/

Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2015-04-08 16:56:28 UTC (rev 1546)
+++ code/trunk/testdata/testoutput2    2015-04-13 09:31:55 UTC (rev 1547)
@@ -14421,4 +14421,6 @@
  0: a
  0+ 


+/((?2){73}(?2))((?1))/
+
/-- End of testinput2 --/