[Pcre-svn] [1528] code/trunk: Fix bad compile for groups lik…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [1528] code/trunk: Fix bad compile for groups like "((?2){0, 1999}())?".
Revision: 1528
          http://vcs.pcre.org/viewvc?view=rev&revision=1528
Author:   ph10
Date:     2015-02-28 11:48:09 +0000 (Sat, 28 Feb 2015)


Log Message:
-----------
Fix bad compile for groups like "((?2){0,1999}())?".

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_compile.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2015-02-23 07:50:11 UTC (rev 1527)
+++ code/trunk/ChangeLog    2015-02-28 11:48:09 UTC (rev 1528)
@@ -74,6 +74,14 @@
     (b) Another crash if the option to print captured substrings in a callout
     was combined with setting a null ovector, for example \O\C+ as a subject
     string.
+    
+14. A pattern such as "((?2){0,1999}())?", which has a group containing a
+    forward reference repeated a large (but limited) number of times within a
+    repeated outer group that has a zero minimum quantifier, caused incorrect
+    code to be compiled, leading to the error "internal error:
+    previously-checked referenced subpattern not found" when an incorrect
+    memory address was read. This bug was reported as "heap overflow",
+    discovered by Kai Lu of Fortinet's FortiGuard Labs.



Version 8.36 26-September-2014

Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c    2015-02-23 07:50:11 UTC (rev 1527)
+++ code/trunk/pcre_compile.c    2015-02-28 11:48:09 UTC (rev 1528)
@@ -3983,14 +3983,14 @@
   adjust     the amount by which the group is to be moved
   utf        TRUE in UTF-8 / UTF-16 / UTF-32 mode
   cd         contains pointers to tables etc.
-  save_hwm   the hwm forward reference pointer at the start of the group
+  save_hwm_offset   the hwm forward reference offset at the start of the group


 Returns:     nothing
 */


static void
adjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd,
- pcre_uchar *save_hwm)
+ size_t save_hwm_offset)
{
pcre_uchar *ptr = group;

@@ -4002,7 +4002,8 @@
/* See if this recursion is on the forward reference list. If so, adjust the
reference. */

-  for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE)
+  for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm; 
+       hc += LINK_SIZE)
     {
     offset = (int)GET(hc, 0);
     if (cd->start_code + offset == ptr + 1)
@@ -4447,7 +4448,7 @@
 const pcre_uchar *nestptr = NULL;
 pcre_uchar *previous = NULL;
 pcre_uchar *previous_callout = NULL;
-pcre_uchar *save_hwm = NULL;
+size_t save_hwm_offset = 0;
 pcre_uint8 classbits[32];


 /* We can fish out the UTF-8 setting once and for all into a BOOL, but we
@@ -5960,7 +5961,7 @@
         if (repeat_max <= 1)    /* Covers 0, 1, and unlimited */
           {
           *code = OP_END;
-          adjust_recurse(previous, 1, utf, cd, save_hwm);
+          adjust_recurse(previous, 1, utf, cd, save_hwm_offset);
           memmove(previous + 1, previous, IN_UCHARS(len));
           code++;
           if (repeat_max == 0)
@@ -5984,7 +5985,7 @@
           {
           int offset;
           *code = OP_END;
-          adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, save_hwm);
+          adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, save_hwm_offset);
           memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len));
           code += 2 + LINK_SIZE;
           *previous++ = OP_BRAZERO + repeat_type;
@@ -6047,26 +6048,25 @@
             for (i = 1; i < repeat_min; i++)
               {
               pcre_uchar *hc;
-              pcre_uchar *this_hwm = cd->hwm;
+              size_t this_hwm_offset = cd->hwm - cd->start_workspace;
               memcpy(code, previous, IN_UCHARS(len));


               while (cd->hwm > cd->start_workspace + cd->workspace_size -
-                     WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm))
+                     WORK_SIZE_SAFETY_MARGIN - 
+                     (this_hwm_offset - save_hwm_offset))
                 {
-                size_t save_offset = save_hwm - cd->start_workspace;
-                size_t this_offset = this_hwm - cd->start_workspace;
                 *errorcodeptr = expand_workspace(cd);
                 if (*errorcodeptr != 0) goto FAILED;
-                save_hwm = (pcre_uchar *)cd->start_workspace + save_offset;
-                this_hwm = (pcre_uchar *)cd->start_workspace + this_offset;
                 }


-              for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
+              for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; 
+                   hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset; 
+                   hc += LINK_SIZE)
                 {
                 PUT(cd->hwm, 0, GET(hc, 0) + len);
                 cd->hwm += LINK_SIZE;
                 }
-              save_hwm = this_hwm;
+              save_hwm_offset = this_hwm_offset;
               code += len;
               }
             }
@@ -6111,7 +6111,7 @@
         else for (i = repeat_max - 1; i >= 0; i--)
           {
           pcre_uchar *hc;
-          pcre_uchar *this_hwm = cd->hwm;
+          size_t this_hwm_offset = cd->hwm - cd->start_workspace;


           *code++ = OP_BRAZERO + repeat_type;


@@ -6133,22 +6133,21 @@
           copying them. */


           while (cd->hwm > cd->start_workspace + cd->workspace_size -
-                 WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm))
+                 WORK_SIZE_SAFETY_MARGIN - 
+                 (this_hwm_offset - save_hwm_offset))
             {
-            size_t save_offset = save_hwm - cd->start_workspace;
-            size_t this_offset = this_hwm - cd->start_workspace;
             *errorcodeptr = expand_workspace(cd);
             if (*errorcodeptr != 0) goto FAILED;
-            save_hwm = (pcre_uchar *)cd->start_workspace + save_offset;
-            this_hwm = (pcre_uchar *)cd->start_workspace + this_offset;
             }


-          for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
+          for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; 
+               hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset; 
+               hc += LINK_SIZE)
             {
             PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));
             cd->hwm += LINK_SIZE;
             }
-          save_hwm = this_hwm;
+          save_hwm_offset = this_hwm_offset;
           code += len;
           }


@@ -6244,7 +6243,7 @@
               {
               int nlen = (int)(code - bracode);
               *code = OP_END;
-              adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, save_hwm);
+              adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);
               memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen));
               code += 1 + LINK_SIZE;
               nlen += 1 + LINK_SIZE;
@@ -6378,7 +6377,7 @@
         else
           {
           *code = OP_END;
-          adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm);
+          adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);
           memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
           code += 1 + LINK_SIZE;
           len += 1 + LINK_SIZE;
@@ -6427,7 +6426,7 @@


         default:
         *code = OP_END;
-        adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm);
+        adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);
         memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
         code += 1 + LINK_SIZE;
         len += 1 + LINK_SIZE;
@@ -6459,7 +6458,7 @@
     newoptions = options;
     skipbytes = 0;
     bravalue = OP_CBRA;
-    save_hwm = cd->hwm;
+    save_hwm_offset = cd->hwm - cd->start_workspace;
     reset_bracount = FALSE;


     /* First deal with various "verbs" that can be introduced by '*'. */
@@ -7520,7 +7519,7 @@
     conditions (Perl also forbids these to be repeated). We copy code into a
     non-register variable (tempcode) in order to be able to pass its address
     because some compilers complain otherwise. At the start of a conditional
-    group whose condition is an assertion, cb->iscondassert is set. We unset it
+    group whose condition is an assertion, cd->iscondassert is set. We unset it
     here so as to allow assertions later in the group to be quantified. */


     if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT &&
@@ -7777,7 +7776,7 @@
         const pcre_uchar *p;
         pcre_uint32 cf;


-        save_hwm = cd->hwm;   /* Normally this is set when '(' is read */
+        save_hwm_offset = cd->hwm - cd->start_workspace;   /* Normally this is set when '(' is read */
         terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
           CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;


@@ -8323,7 +8322,7 @@
         {
         *code = OP_END;
         adjust_recurse(start_bracket, 1 + LINK_SIZE,
-          (options & PCRE_UTF8) != 0, cd, cd->hwm);
+          (options & PCRE_UTF8) != 0, cd, cd->hwm - cd->start_workspace);
         memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
           IN_UCHARS(code - start_bracket));
         *start_bracket = OP_ONCE;


Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2015-02-23 07:50:11 UTC (rev 1527)
+++ code/trunk/testdata/testinput2    2015-02-28 11:48:09 UTC (rev 1528)
@@ -4126,4 +4126,6 @@
 //C
     \O\C+


+"((?2){0,1999}())?"
+
/-- End of testinput2 --/

Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2015-02-23 07:50:11 UTC (rev 1527)
+++ code/trunk/testdata/testoutput2    2015-02-28 11:48:09 UTC (rev 1528)
@@ -14320,4 +14320,6 @@
  +0 ^    
 Matched, but too many substrings


+"((?2){0,1999}())?"
+
/-- End of testinput2 --/