[Pcre-svn] [503] code/trunk: Fix incorrect compile time erro…

Página Inicial
Delete this message
Autor: Subversion repository
Data:  
Para: pcre-svn
Assunto: [Pcre-svn] [503] code/trunk: Fix incorrect compile time error for certain types of recursive patterns.
Revision: 503
          http://vcs.pcre.org/viewvc?view=rev&revision=503
Author:   ph10
Date:     2010-03-07 17:35:52 +0000 (Sun, 07 Mar 2010)


Log Message:
-----------
Fix incorrect compile time error for certain types of recursive patterns.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_compile.c
    code/trunk/testdata/testinput11
    code/trunk/testdata/testoutput11


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2010-03-07 12:05:20 UTC (rev 502)
+++ code/trunk/ChangeLog    2010-03-07 17:35:52 UTC (rev 503)
@@ -51,6 +51,12 @@
     implementation of support for (*MARK) will need an extra pointer on the 
     stack; I have reserved it now, so that the stack frame size does not
     decrease.
+    
+13. A pattern such as (?P<L1>(?P<L2>0)|(?P>L2)(?P>L1)) in which the only other 
+    item in branch that calls a recursion is a subroutine call - as in the 
+    second branch in the above example - was incorrectly given the compile-
+    time error "recursive call could loop indefinitely" because pcre_compile()
+    was not correctly checking the subroutine for matching a non-empty string. 



Version 8.01 19-Jan-2010

Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c    2010-03-07 12:05:20 UTC (rev 502)
+++ code/trunk/pcre_compile.c    2010-03-07 17:35:52 UTC (rev 503)
@@ -1785,12 +1785,14 @@
   code        points to start of search
   endcode     points to where to stop
   utf8        TRUE if in UTF8 mode
+  cd          contains pointers to tables etc.


 Returns:      TRUE if what is matched could be empty
 */


static BOOL
-could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8)
+could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8,
+ compile_data *cd)
{
register int c;
for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE);
@@ -1800,7 +1802,7 @@
const uschar *ccode;

c = *code;
-
+
/* Skip over forward assertions; the other assertions are skipped by
first_significant_code() with a TRUE final argument. */

@@ -1820,6 +1822,22 @@
     c = *code;
     continue;
     }
+    
+  /* For a recursion/subroutine call, if its end has been reached, which
+  implies a subroutine call, we can scan it. */
+  
+  if (c == OP_RECURSE)
+    {
+    const uschar *scode = cd->start_code + GET(code, 1);
+    if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */
+    do
+      {
+      if (!could_be_empty_branch(scode, endcode, utf8, cd)) return FALSE;
+      scode += GET(scode, 1);
+      }
+    while (*scode == OP_ALT);
+    continue;
+    }   


/* For other groups, scan the branches. */

@@ -1839,7 +1857,7 @@
       empty_branch = FALSE;
       do
         {
-        if (!empty_branch && could_be_empty_branch(code, endcode, utf8))
+        if (!empty_branch && could_be_empty_branch(code, endcode, utf8, cd))
           empty_branch = TRUE;
         code += GET(code, 1);
         }
@@ -1973,6 +1991,11 @@
     if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];
     break;
 #endif
+
+    /* None of the remaining opcodes are required to match a character. */
+     
+    default:
+    break;  
     }
   }


@@ -1995,17 +2018,18 @@
   endcode     points to where to stop (current RECURSE item)
   bcptr       points to the chain of current (unclosed) branch starts
   utf8        TRUE if in UTF-8 mode
+  cd          pointers to tables etc 


 Returns:      TRUE if what is matched could be empty
 */


 static BOOL
 could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,
-  BOOL utf8)
+  BOOL utf8, compile_data *cd)
 {
 while (bcptr != NULL && bcptr->current_branch >= code)
   {
-  if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8))
+  if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8, cd))
     return FALSE;
   bcptr = bcptr->outer;
   }
@@ -4363,7 +4387,7 @@
           uschar *scode = bracode;
           do
             {
-            if (could_be_empty_branch(scode, ketcode, utf8))
+            if (could_be_empty_branch(scode, ketcode, utf8, cd))
               {
               *bracode += OP_SBRA - OP_BRA;
               break;
@@ -5176,7 +5200,7 @@
             recursion that could loop for ever, and diagnose that case. */


             else if (GET(called, 1) == 0 &&
-                     could_be_empty(called, code, bcptr, utf8))
+                     could_be_empty(called, code, bcptr, utf8, cd))
               {
               *errorcodeptr = ERR40;
               goto FAILED;


Modified: code/trunk/testdata/testinput11
===================================================================
--- code/trunk/testdata/testinput11    2010-03-07 12:05:20 UTC (rev 502)
+++ code/trunk/testdata/testinput11    2010-03-07 17:35:52 UTC (rev 503)
@@ -379,4 +379,14 @@
     a(b)c
     a(b(c)d)e 


+/(?P<L1>(?P<L2>0)(?P>L1)|(?P>L2))/
+    0
+    00
+    0000  
+
+/(?P<L1>(?P<L2>0)|(?P>L2)(?P>L1))/
+    0
+    00
+    0000  
+
 /-- End of testinput11 --/


Modified: code/trunk/testdata/testoutput11
===================================================================
--- code/trunk/testdata/testoutput11    2010-03-07 12:05:20 UTC (rev 502)
+++ code/trunk/testdata/testoutput11    2010-03-07 17:35:52 UTC (rev 503)
@@ -776,4 +776,31 @@
  0: a(b(c)d)e
  1: e


+/(?P<L1>(?P<L2>0)(?P>L1)|(?P>L2))/
+    0
+ 0: 0
+ 1: 0
+    00
+ 0: 00
+ 1: 00
+ 2: 0
+    0000  
+ 0: 0000
+ 1: 0000
+ 2: 0
+
+/(?P<L1>(?P<L2>0)|(?P>L2)(?P>L1))/
+    0
+ 0: 0
+ 1: 0
+ 2: 0
+    00
+ 0: 0
+ 1: 0
+ 2: 0
+    0000  
+ 0: 0
+ 1: 0
+ 2: 0
+
 /-- End of testinput11 --/