[pcre-dev] Calculated match recursion stack size

Top Page
Delete this message
Author: Graycode
Date:  
To: pcre-dev
New-Topics: Re: [pcre-dev] Calculated match recursion stack size, Re: [pcre-dev] Calculated match recursion stack size
Subject: [pcre-dev] Calculated match recursion stack size
This is a feature request to enable PCRE to calculate and tell the net
impact on the stack for each recursive call in the internal match()
function used by pcre_exec().

Below is a modified version of what I've been using for some time.
I tried to make this similar to other PCRE code style, hopefully it
got close enough to seem familiar. There is still one "//" commented
line that should have been removed.

Adding this may help people like me who need to avoid the potential
for a PCRE library to suffer a stack fault (seemingly more probable in
Windows). The calculated stack value can be used to establish a
threshold for setting PCRE's match_limit_recursion based upon the
application's knowledge of its own thread stack size.

pcrestack.3 currently states (quote)
As a very rough rule of thumb, you should reckon on about 500 bytes per
recursion.

Using this patch with "pcretest -C" in my WIN32 build yields:
Match recursion uses stack
Match recursion Each stack size = 348

I suspect that the work being done to add support for UTF-16 may have
an impact on the stack for new PCRE deployments.


Regards,
Graycode


--- pcre.h (version 8.21-RC1)
+++ pcre.h (working copy)
@@ -231,6 +231,7 @@
 #define PCRE_CONFIG_MATCH_LIMIT_RECURSION   7
 #define PCRE_CONFIG_BSR                     8
 #define PCRE_CONFIG_JIT                     9
+#define PCRE_CONFIG_MATCH_RECURSION_STACK   10


 /* Request types for pcre_study(). Do not re-arrange, in order to remain
 compatible. */
--- pcre_internal.h (version 8.21-RC1)
+++ pcre_internal.h (working copy)
@@ -1947,6 +1947,7 @@
 extern int           _pcre_valid_utf8(USPTR, int, int *);
 extern BOOL          _pcre_was_newline(USPTR, int, USPTR, int *, BOOL);
 extern BOOL          _pcre_xclass(int, const uschar *);
+extern int           _pcre_match_recursion_stacksize (void);


 #ifdef SUPPORT_JIT
 extern void          _pcre_jit_compile(const real_pcre *, pcre_extra *);
--- pcre_config.c (version 8.21-RC1)
+++ pcre_config.c (working copy)
@@ -127,6 +127,12 @@
 #endif
   break;


+#ifdef PCRE_CONFIG_MATCH_RECURSION_STACK
+ case PCRE_CONFIG_MATCH_RECURSION_STACK:
+ *((int *)where) = _pcre_match_recursion_stacksize ();
+ break;
+#endif
+
default: return PCRE_ERROR_BADOPTION;
}

--- pcre_exec.c (version 8.21-RC1)
+++ pcre_exec.c (working copy)
@@ -403,6 +403,13 @@
#endif


+/* an option to calculate the match() recursion stack size */
+
+#ifdef PCRE_CONFIG_MATCH_RECURSION_STACK
+static unsigned char X_Calc_StakSize = '\0';
+#endif
+
+
/***************************************************************************
***************************************************************************/

@@ -610,6 +617,41 @@
 eptrblock newptrb;
 #endif     /* NO_RECURSE */


+/* an option to calculate the match() recursion stack size.
+To use this option the pointer value is compared, not what it points to, hence
+there is no chance of an accidental mis-interpretation.  The result calculated
+is the net size effect of a single recursive call. Then account for a couple of
+(int) vars that may be added on the stack when RMATCH() is later used.
+When the stack is not used for recursion (NO_RECURSE), the result is zero */
+
+#ifdef PCRE_CONFIG_MATCH_RECURSION_STACK
+if (eptr == (USPTR)&X_Calc_StakSize)   /* compare the ptr, not what it points to */
+  {
+#ifdef NO_RECURSE
+  RRETURN(0);                          /* not possible, here for defense only */
+#else
+  static char * ptr_StakPrev;          /* this static holds a previous stack ptr */
+  int iRet;
+  if (rdepth == 0)
+    {
+    ptr_StakPrev = (char *)&rdepth;    /* first time remember a stack ptr */
+//  // iRet = match (eptr, ecode, mstart, offset_top, md, eptrb, rdepth + 1);
+    iRet = RMATCH (eptr, ecode, offset_top, md, eptrb, 0);
+    }
+  else
+    {
+    char * ptr_Stak2nd = (char *)&rdepth;
+    if (ptr_Stak2nd > ptr_StakPrev)   /* stack direction depends on architecture */
+       iRet = (int)( ptr_Stak2nd - ptr_StakPrev );
+    else
+       iRet = (int)( ptr_StakPrev - ptr_Stak2nd );
+    iRet += 2 * sizeof(int);          /* account for 2 more nested (int) vars */
+    }
+  RRETURN(iRet);
+#endif
+  }
+#endif
+
 /* To save space on the stack and in the heap frame, I have doubled up on some
 of the local variables that are used only in localised parts of the code, but
 still need to be preserved over recursive calls of match(). These macros define
@@ -5842,6 +5884,33 @@
 #undef LBL
 #endif  /* NO_RECURSE */
 }
+
+
+/*************************************************
+*         Calculate recursive match() stack size *
+*************************************************/
+
+/* This function calculates the stack size of a single match() recursion.
+It is invoked as an option of pcre_config().
+
+Arguments:
+  none.
+
+Returns:
+  A positive number is the stack size used by match() for each recursive call.
+  When the stack is not used for recursion (NO_RECURSE), the result is zero.
+*/
+
+#ifdef PCRE_CONFIG_MATCH_RECURSION_STACK
+int _pcre_match_recursion_stacksize (void)
+{
+#ifdef NO_RECURSE
+  return(0);
+#else
+  return match( (USPTR)&X_Calc_StakSize, "", "", 0, NULL, NULL, 0 );
+#endif
+}
+#endif



 /***************************************************************************
--- pcretest.c (version 8.21-RC1)
+++ pcretest.c (working copy)
@@ -1386,6 +1386,11 @@
     printf("  Default recursion depth limit = %ld\n", lrc);
     (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
     printf("  Match recursion uses %s\n", rc? "stack" : "heap");
+#ifdef PCRE_CONFIG_MATCH_RECURSION_STACK
+    (void)pcre_config(PCRE_CONFIG_MATCH_RECURSION_STACK, &rc);
+    if (rc != 0)
+      printf("  Match recursion Each stack size = %d\n", rc);
+#endif
     goto EXIT;
     }
   else if (strcmp(argv[op], "-help") == 0 ||