[Pcre-svn] [1213] code/trunk: Limit function recursion in p…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [1213] code/trunk: Limit function recursion in pcre2_study to avoid stack overflow issues.
Revision: 1213
          http://www.exim.org/viewvc/pcre2?view=rev&revision=1213
Author:   ph10
Date:     2020-01-27 10:28:19 +0000 (Mon, 27 Jan 2020)
Log Message:
-----------
Limit function recursion in pcre2_study to avoid stack overflow issues.


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/src/pcre2_study.c


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2020-01-26 15:31:27 UTC (rev 1212)
+++ code/trunk/ChangeLog    2020-01-27 10:28:19 UTC (rev 1213)
@@ -56,7 +56,11 @@
 assertion is not fixed length" and also "internal error: bad code value in 
 parsed_skip()" could result.


+15. Put a limit of 1000 on recursive calls in pcre2_study() when searching
+nested groups for starting code units, in order to avoid stack overflow issues.
+If the limit is reached, it just gives up trying for this optimization.

+
Version 10.34 21-November-2019
------------------------------


Modified: code/trunk/src/pcre2_study.c
===================================================================
--- code/trunk/src/pcre2_study.c    2020-01-26 15:31:27 UTC (rev 1212)
+++ code/trunk/src/pcre2_study.c    2020-01-27 10:28:19 UTC (rev 1213)
@@ -7,7 +7,7 @@


                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2019 University of Cambridge
+          New API code Copyright (c) 2016-2020 University of Cambridge


-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -58,7 +58,7 @@

/* Returns from set_start_bits() */

-enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN };
+enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN, SSB_TOODEEP };


/*************************************************
@@ -924,19 +924,24 @@
must continue at the outer level to find at least one mandatory code unit. At
the outermost level, this function fails unless the result is SSB_DONE.

+We restrict recursion (for nested groups) to 1000 to avoid stack overflow
+issues.
+
 Arguments:
   re           points to the compiled regex block
   code         points to an expression
   utf          TRUE if in UTF mode
+  depthptr     pointer to recurse depth


 Returns:       SSB_FAIL     => Failed to find any starting code units
                SSB_DONE     => Found mandatory starting code units
                SSB_CONTINUE => Found optional starting code units
                SSB_UNKNOWN  => Hit an unrecognized opcode
+               SSB_TOODEEP  => Recursion is too deep
 */


static int
-set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf)
+set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf, int *depthptr)
{
uint32_t c;
int yield = SSB_DONE;
@@ -947,6 +952,9 @@
int table_limit = 32;
#endif

+*depthptr += 1;
+if (*depthptr > 1000) return SSB_TOODEEP;
+
 do
   {
   BOOL try_next = TRUE;
@@ -1103,13 +1111,17 @@
       case OP_SCRIPT_RUN:
       case OP_ASSERT:
       case OP_ASSERT_NA:
-      rc = set_start_bits(re, tcode, utf);
-      if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
-      if (rc == SSB_DONE) try_next = FALSE; else
+      rc = set_start_bits(re, tcode, utf, depthptr);
+      if (rc == SSB_DONE)
         {
+        try_next = FALSE;
+        }
+      else if (rc == SSB_CONTINUE)
+        {
         do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
         tcode += 1 + LINK_SIZE;
         }
+      else return rc;   /* FAIL, UNKNOWN, or TOODEEP */
       break;


       /* If we hit ALT or KET, it means we haven't found anything mandatory in
@@ -1155,8 +1167,8 @@
       case OP_BRAZERO:
       case OP_BRAMINZERO:
       case OP_BRAPOSZERO:
-      rc = set_start_bits(re, ++tcode, utf);
-      if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
+      rc = set_start_bits(re, ++tcode, utf, depthptr);
+      if (rc == SSB_FAIL || rc == SSB_UNKNOWN || rc == SSB_TOODEEP) return rc;
       do tcode += GET(tcode,1); while (*tcode == OP_ALT);
       tcode += 1 + LINK_SIZE;
       break;
@@ -1664,7 +1676,8 @@


if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
{
- int rc = set_start_bits(re, code, utf);
+ int depth = 0;
+ int rc = set_start_bits(re, code, utf, &depth);
if (rc == SSB_UNKNOWN) return 1;

/* If a list of starting code units was set up, scan the list to see if only