Re: [pcre-dev] 3x-4x slowdown in pcre_match

Top Page
Delete this message
Author: enh
Date:  
To: pcre-dev
CC: Martijn Coenen, Stephen Hines
Subject: Re: [pcre-dev] 3x-4x slowdown in pcre_match
On Thu, Apr 16, 2020 at 8:28 AM <ph10@???> wrote:
>
> On Wed, 15 Apr 2020, enh via Pcre-dev wrote:
>
> > -PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)];
> > +PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)]
> > __attribute__((uninitialized));
> > mb->stack_frames = (heapframe *)stack_frames_vector;
> >
> > i'm happy to try to send you a patch that also includes the relevant
> > configure "does this compiler have this attribute?" stuff, but thought
> > i'd check first for your opinions about what naming you'd like to see,
> > and what you'd like this use-site to look like. just #ifdef
> > HAVE_ATTRIBUTE_UNINITIALIZED or a PCRE2_KEEP_UNINITIALIZED that
> > expands to nothing on systems that don't support this.
>
> Yes, please. I am a novice at "configure" stuff. Can you do CMake too;
> if not, perhaps I can figure out what to do. Perhaps set up
> HAVE_ATTRIBUTE_UNINITIALIZED in config.h (because that is the same as
> other HAVE_s for the C system), and then define PCRE2_KEEP_UNINITIALIZED
> in pcre2_internal.h so it keeps the vector definition clean. Does that
> sound sensible? Thanks.


done. i've attached a patch that includes both the configure and cmake
bits. i tested both with CC=gcc and CC=clang (for representative
failure and success cases respectively). sadly i have to mess about
with -Werror because GCC only warns rather than errors for
unrecognized attributes by default, but otherwise it's relatively
straightforward.

> I've just put out a Release Candidate for 10.35; I usually wait a month
> for comments before the real release, so it should be possible to get
> this patch in before than.
>
> Regards,
> Philip
>
> --
> Philip Hazel

Index: CMakeLists.txt
===================================================================
--- CMakeLists.txt    (revision 1244)
+++ CMakeLists.txt    (working copy)
@@ -87,6 +87,7 @@
 # 2020-03-16 PH renamed dftables as pcre2_dftables (as elsewhere)
 # 2020-03-24 PH changed CMAKE_MODULE_PATH definition to add, not replace
 # 2020-04-08 Carlo added function check for secure_getenv, fixed strerror
+# 2020-04-16 enh added check for __attribute__((uninitialized))


PROJECT(PCRE2 C)

@@ -113,8 +114,9 @@

# Configuration checks

+INCLUDE(CheckCSourceCompiles)
+INCLUDE(CheckFunctionExists)
INCLUDE(CheckIncludeFile)
-INCLUDE(CheckFunctionExists)
INCLUDE(CheckTypeSize)

 CHECK_INCLUDE_FILE(dirent.h     HAVE_DIRENT_H)
@@ -130,6 +132,14 @@
 CHECK_FUNCTION_EXISTS(strerror      HAVE_STRERROR)
 CHECK_FUNCTION_EXISTS(secure_getenv HAVE_SECURE_GETENV)


+set(ORIG_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
+set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror")
+CHECK_C_SOURCE_COMPILES(
+  "int main() { char buf[128] __attribute__((uninitialized)); return 0; }"
+  HAVE_ATTRIBUTE_UNINITIALIZED
+)
+set(CMAKE_REQUIRED_FLAGS ${ORIG_CMAKE_REQUIRED_FLAGS})
+
 # User-configurable options
 #
 # Note: CMakeSetup displays these in alphabetical order, regardless of
Index: configure.ac
===================================================================
--- configure.ac    (revision 1244)
+++ configure.ac    (working copy)
@@ -72,6 +72,24 @@


PCRE2_VISIBILITY

+# Check for Clang __attribute__((uninitialized)) feature
+
+AC_MSG_CHECKING([for __attribute__((uninitialized))])
+AC_LANG_PUSH([C])
+tmp_CFLAGS=$CFLAGS
+CFLAGS="$CFLAGS -Werror"
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,
+                   [[char buf[128] __attribute__((uninitialized))]])],
+                   [pcre2_cc_cv_attribute_uninitialized=yes],
+                   [pcre2_cc_cv_attribute_uninitialized=no])
+AC_MSG_RESULT([$pcre2_cc_cv_attribute_uninitialized])
+if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then
+  AC_DEFINE([HAVE_ATTRIBUTE_UNINITIALIZED], 1, [Define this if your compiler
+             supports __attribute__((uninitialized))])
+fi
+CFLAGS=$tmp_CFLAGS
+AC_LANG_POP([C])
+
 # Versioning


 PCRE2_MAJOR="pcre2_major"
Index: src/config.h.in
===================================================================
--- src/config.h.in    (revision 1244)
+++ src/config.h.in    (working copy)
@@ -52,6 +52,9 @@
    LF does in an ASCII/Unicode environment. */
 #undef EBCDIC_NL25


+/* Define this if your compiler supports __attribute__((uninitialized)) */
+#undef HAVE_ATTRIBUTE_UNINITIALIZED
+
/* Define to 1 if you have the `bcopy' function. */
#undef HAVE_BCOPY

Index: src/pcre2_internal.h
===================================================================
--- src/pcre2_internal.h    (revision 1244)
+++ src/pcre2_internal.h    (working copy)
@@ -76,6 +76,17 @@
 #include <valgrind/memcheck.h>
 #endif


+/* -ftrivial-auto-var-init support supports initializing all local variables
+to avoid some classes of bug, but this can cause an unacceptable slowdown
+for large on-stack arrays in hot functions. This macro lets us annotate
+such arrays. */
+
+#ifdef HAVE_ATTRIBUTE_UNINITIALIZED
+#define PCRE2_KEEP_UNINITIALIZED __attribute__((uninitialized))
+#else
+#define PCRE2_KEEP_UNINITIALIZED
+#endif
+
 /* Older versions of MSVC lack snprintf(). This define allows for
 warning/error-free compilation and testing with MSVC compilers back to at least
 MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
Index: src/pcre2_match.c
===================================================================
--- src/pcre2_match.c    (revision 1244)
+++ src/pcre2_match.c    (working copy)
@@ -6164,7 +6164,8 @@
 vector of the size required that is aligned for pointers, allocate it as a
 vector of pointers. */


-PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)];
+PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)]
+    PCRE2_KEEP_UNINITIALIZED;
 mb->stack_frames = (heapframe *)stack_frames_vector;


/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated