[Pcre-svn] [1495] code/trunk: Fix compiler crash/ misbehavio…

トップ ページ
このメッセージを削除
著者: Subversion repository
日付:  
To: pcre-svn
題目: [Pcre-svn] [1495] code/trunk: Fix compiler crash/ misbehaviour for zero-repeated groups that include a
Revision: 1495
          http://vcs.pcre.org/viewvc?view=rev&revision=1495
Author:   ph10
Date:     2014-07-12 19:22:54 +0100 (Sat, 12 Jul 2014)


Log Message:
-----------
Fix compiler crash/misbehaviour for zero-repeated groups that include a
recursive back reference.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_compile.c
    code/trunk/testdata/testinput11
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput11-16
    code/trunk/testdata/testoutput11-32
    code/trunk/testdata/testoutput11-8
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2014-07-10 16:38:05 UTC (rev 1494)
+++ code/trunk/ChangeLog    2014-07-12 18:22:54 UTC (rev 1495)
@@ -84,6 +84,11 @@
 18. Avoid a compiler warning (from some compilers) for a function call with
     a cast that removes "const" from an lvalue by using an intermediate 
     variable (to which the compiler does not object).
+    
+19. Incorrect code was compiled if a group that contained an internal recursive 
+    back reference was optional (had quantifier with a minimum of zero). This
+    example compiled incorrect code: /(((a\2)|(a*)\g<-1>))*/ and other examples 
+    caused segmentation faults because of stack overflows at compile time. 



Version 8.35 04-April-2014

Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c    2014-07-10 16:38:05 UTC (rev 1494)
+++ code/trunk/pcre_compile.c    2014-07-12 18:22:54 UTC (rev 1495)
@@ -549,7 +549,7 @@
   "group name must start with a non-digit\0"
   /* 85 */
   "parentheses are too deeply nested (stack check)\0"
-  "digits missing in \\x{} or \\o{}\0" 
+  "digits missing in \\x{} or \\o{}\0"
   ;


/* Table to identify digits and hex digits. This is used when compiling
@@ -1260,7 +1260,7 @@

     case CHAR_o:
     if (ptr[1] != CHAR_LEFT_CURLY_BRACKET) *errorcodeptr = ERR81; else
-    if (ptr[2] == CHAR_RIGHT_CURLY_BRACKET) *errorcodeptr = ERR86; else 
+    if (ptr[2] == CHAR_RIGHT_CURLY_BRACKET) *errorcodeptr = ERR86; else
       {
       ptr += 2;
       c = 0;
@@ -1334,7 +1334,7 @@
           {
           *errorcodeptr = ERR86;
           break;
-          }    
+          }
         c = 0;
         overflow = FALSE;
         while (MAX_255(*ptr) && (digitab[*ptr] & ctype_xdigit) != 0)
@@ -1590,7 +1590,7 @@
 int min = 0;
 int max = -1;


-while (IS_DIGIT(*p)) 
+while (IS_DIGIT(*p))
   {
   min = min * 10 + (int)(*p++ - CHAR_0);
   if (min > 65535)
@@ -1598,14 +1598,14 @@
     *errorcodeptr = ERR5;
     return p;
     }
-  }   
+  }


 if (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else
   {
   if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
     {
     max = 0;
-    while(IS_DIGIT(*p)) 
+    while(IS_DIGIT(*p))
       {
       max = max * 10 + (int)(*p++ - CHAR_0);
       if (max > 65535)
@@ -1613,7 +1613,7 @@
         *errorcodeptr = ERR5;
         return p;
         }
-      }   
+      }
     if (max < min)
       {
       *errorcodeptr = ERR4;
@@ -3096,7 +3096,7 @@
   Therefore infinite recursions are not possible. */


c = *code;
-
+
/* Skip over callouts */

   if (c == OP_CALLOUT)
@@ -3125,7 +3125,7 @@
     /* If the bracket is capturing, and referenced by an OP_RECURSE, or
     it is an atomic sub-pattern (assert, once, etc.) the non-greedy case
     cannot be converted to a possessive form. */
-    
+
     if (base_list[1] == 0) return FALSE;


     switch(*(code - GET(code, 1)))
@@ -3137,7 +3137,7 @@
       case OP_ONCE:
       case OP_ONCE_NC:
       /* Atomic sub-patterns and assertions can always auto-possessify their
-      last iterator. However, if the group was entered as a result of checking 
+      last iterator. However, if the group was entered as a result of checking
       a previous iterator, this is not possible. */


       return !entered_a_group;
@@ -3182,14 +3182,14 @@
     continue;


     default:
-    break; 
+    break;
     }


/* Check for a supported opcode, and load its properties. */

   code = get_chr_property_list(code, utf, cd->fcc, list);
   if (code == NULL) return FALSE;    /* Unsupported */
-  
+
   /* If either opcode is a small character list, set pointers for comparing
   characters from that list with another list, or with a property. */


@@ -3422,7 +3422,7 @@
            autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];


     if (!accepted) return FALSE;
-      
+
     if (list[1] == 0) return TRUE;
     /* Might be an empty repeat. */
     continue;
@@ -4694,7 +4694,7 @@
     previous = NULL;
     if ((options & PCRE_MULTILINE) != 0)
       {
-      if (firstcharflags == REQ_UNSET) 
+      if (firstcharflags == REQ_UNSET)
         zerofirstcharflags = firstcharflags = REQ_NONE;
       *code++ = OP_CIRCM;
       }
@@ -5983,7 +5983,7 @@
           just adjust the length as if we had. Do some paranoid checks for
           potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit
           integer type when available, otherwise double. */
-          
+
           if (lengthptr != NULL)
             {
             int delta = (repeat_min - 1)*length_prevgroup;
@@ -6701,7 +6701,7 @@
             ptr++;
             }
           namelen = (int)(ptr - name);
-          if (lengthptr != NULL && (options & PCRE_DUPNAMES) != 0) 
+          if (lengthptr != NULL && (options & PCRE_DUPNAMES) != 0)
             *lengthptr += IMM2_SIZE;
           }


@@ -6767,7 +6767,7 @@
               (slot+IMM2_SIZE)[namelen] != 0) break;
             count++;
             }
- 
+
           if (count > 1)
             {
             PUT2(code, 2+LINK_SIZE, offset);
@@ -7116,7 +7116,7 @@
           /* Count named back references. */


           if (!is_recurse) cd->namedrefcount++;
-          
+
           /* If duplicate names are permitted, we have to allow for a named
           reference to a duplicated name (this cannot be determined until the
           second pass). This needs an extra 16-bit data item. */
@@ -7168,12 +7168,12 @@
           for (i++; i < cd->names_found; i++)
             {
             if (STRCMP_UC_UC(slot + IMM2_SIZE, cslot + IMM2_SIZE) != 0) break;
-            
- 
+
+
             count++;
             cslot += cd->name_entry_size;
             }
-            
+
           if (count > 1)
             {
             if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
@@ -8267,12 +8267,16 @@


     /* If it was a capturing subpattern, check to see if it contained any
     recursive back references. If so, we must wrap it in atomic brackets.
-    In any event, remove the block from the chain. */
+    Because we are moving code along, we must ensure that any pending recursive
+    references are updated. In any event, remove the block from the chain. */


     if (capnumber > 0)
       {
       if (cd->open_caps->flag)
         {
+        *code = OP_END;
+        adjust_recurse(start_bracket, 1 + LINK_SIZE,
+          (options & PCRE_UTF8) != 0, cd, cd->hwm);
         memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
           IN_UCHARS(code - start_bracket));
         *start_bracket = OP_ONCE;
@@ -9277,7 +9281,7 @@


if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15;

-/* Unless disabled, check whether any single character iterators can be        
+/* Unless disabled, check whether any single character iterators can be
 auto-possessified. The function overwrites the appropriate opcode values, so
 the type of the pointer must be cast. NOTE: the intermediate variable "temp" is
 used in this code because at least one compiler gives a warning about loss of
@@ -9288,7 +9292,7 @@
   {
   pcre_uchar *temp = (pcre_uchar *)codestart;
   auto_possessify(temp, utf, cd);
-  } 
+  }


/* If there were any lookbehind assertions that contained OP_RECURSE
(recursions or subroutine calls), a flag is set for them to be checked here,

Modified: code/trunk/testdata/testinput11
===================================================================
--- code/trunk/testdata/testinput11    2014-07-10 16:38:05 UTC (rev 1494)
+++ code/trunk/testdata/testinput11    2014-07-12 18:22:54 UTC (rev 1495)
@@ -132,4 +132,6 @@


/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/B

+/(((a\2)|(a*)\g<-1>))*a?/B
+
/-- End of testinput11 --/

Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2014-07-10 16:38:05 UTC (rev 1494)
+++ code/trunk/testdata/testinput2    2014-07-12 18:22:54 UTC (rev 1495)
@@ -4035,6 +4035,8 @@


/(?(R&6yh)abc)/

+/(((a\2)|(a*)\g<-1>))*a?/BZ
+
/-- Test the ugly "start or end of word" compatibility syntax --/

/[[:<:]]red[[:>:]]/BZ

Modified: code/trunk/testdata/testoutput11-16
===================================================================
--- code/trunk/testdata/testoutput11-16    2014-07-10 16:38:05 UTC (rev 1494)
+++ code/trunk/testdata/testoutput11-16    2014-07-12 18:22:54 UTC (rev 1495)
@@ -709,4 +709,28 @@
  62     End
 ------------------------------------------------------------------


+/(((a\2)|(a*)\g<-1>))*a?/B
+------------------------------------------------------------------
+  0  39 Bra
+  2     Brazero
+  3  32 SCBra 1
+  6  27 Once
+  8  12 CBra 2
+ 11   7 CBra 3
+ 14     a
+ 16     \2
+ 18   7 Ket
+ 20  11 Alt
+ 22   5 CBra 4
+ 25     a*
+ 27   5 Ket
+ 29  22 Recurse
+ 31  23 Ket
+ 33  27 Ket
+ 35  32 KetRmax
+ 37     a?+
+ 39  39 Ket
+ 41     End
+------------------------------------------------------------------
+
 /-- End of testinput11 --/


Modified: code/trunk/testdata/testoutput11-32
===================================================================
--- code/trunk/testdata/testoutput11-32    2014-07-10 16:38:05 UTC (rev 1494)
+++ code/trunk/testdata/testoutput11-32    2014-07-12 18:22:54 UTC (rev 1495)
@@ -709,4 +709,28 @@
  62     End
 ------------------------------------------------------------------


+/(((a\2)|(a*)\g<-1>))*a?/B
+------------------------------------------------------------------
+  0  39 Bra
+  2     Brazero
+  3  32 SCBra 1
+  6  27 Once
+  8  12 CBra 2
+ 11   7 CBra 3
+ 14     a
+ 16     \2
+ 18   7 Ket
+ 20  11 Alt
+ 22   5 CBra 4
+ 25     a*
+ 27   5 Ket
+ 29  22 Recurse
+ 31  23 Ket
+ 33  27 Ket
+ 35  32 KetRmax
+ 37     a?+
+ 39  39 Ket
+ 41     End
+------------------------------------------------------------------
+
 /-- End of testinput11 --/


Modified: code/trunk/testdata/testoutput11-8
===================================================================
--- code/trunk/testdata/testoutput11-8    2014-07-10 16:38:05 UTC (rev 1494)
+++ code/trunk/testdata/testoutput11-8    2014-07-12 18:22:54 UTC (rev 1495)
@@ -709,4 +709,28 @@
  76     End
 ------------------------------------------------------------------


+/(((a\2)|(a*)\g<-1>))*a?/B
+------------------------------------------------------------------
+  0  57 Bra
+  3     Brazero
+  4  48 SCBra 1
+  9  40 Once
+ 12  18 CBra 2
+ 17  10 CBra 3
+ 22     a
+ 24     \2
+ 27  10 Ket
+ 30  16 Alt
+ 33   7 CBra 4
+ 38     a*
+ 40   7 Ket
+ 43  33 Recurse
+ 46  34 Ket
+ 49  40 Ket
+ 52  48 KetRmax
+ 55     a?+
+ 57  57 Ket
+ 60     End
+------------------------------------------------------------------
+
 /-- End of testinput11 --/


Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2014-07-10 16:38:05 UTC (rev 1494)
+++ code/trunk/testdata/testoutput2    2014-07-12 18:22:54 UTC (rev 1495)
@@ -14093,6 +14093,30 @@
 /(?(R&6yh)abc)/
 Failed: group name must start with a non-digit at offset 5


+/(((a\2)|(a*)\g<-1>))*a?/BZ
+------------------------------------------------------------------
+        Bra
+        Brazero
+        SCBra 1
+        Once
+        CBra 2
+        CBra 3
+        a
+        \2
+        Ket
+        Alt
+        CBra 4
+        a*
+        Ket
+        Recurse
+        Ket
+        Ket
+        KetRmax
+        a?+
+        Ket
+        End
+------------------------------------------------------------------
+
 /-- Test the ugly "start or end of word" compatibility syntax --/


/[[:<:]]red[[:>:]]/BZ