Revision: 685
http://vcs.pcre.org/viewvc?view=rev&revision=685
Author: ph10
Date: 2011-09-06 16:02:07 +0100 (Tue, 06 Sep 2011)
Log Message:
-----------
Update pcregrep to use JIT by default with options to disable.
Modified Paths:
--------------
code/trunk/CMakeLists.txt
code/trunk/README
code/trunk/RunGrepTest
code/trunk/config-cmake.h.in
code/trunk/configure.ac
code/trunk/doc/pcrebuild.3
code/trunk/doc/pcregrep.1
code/trunk/doc/pcrejit.3
code/trunk/pcregrep.c
code/trunk/testdata/grepoutput
Modified: code/trunk/CMakeLists.txt
===================================================================
--- code/trunk/CMakeLists.txt 2011-09-06 10:44:57 UTC (rev 684)
+++ code/trunk/CMakeLists.txt 2011-09-06 15:02:07 UTC (rev 685)
@@ -40,6 +40,7 @@
# 2011-08-01 PH added PCREGREP_BUFSIZE
# 2011-08-22 PH added PCRE_SUPPORT_JIT
# 2011-09-06 PH modified WIN32 ADD_TEST line as suggested by Sergey Cherepanov
+# 2011-09-06 PH added PCRE_SUPPORT_PCREGREP_JIT
PROJECT(PCRE C CXX)
@@ -117,6 +118,9 @@
SET(PCRE_SUPPORT_JIT OFF CACHE BOOL
"Enable support for Just-in-time compiling.")
+SET(PCRE_SUPPORT_PCREGREP_JIT ON CACHE BOOL
+ "Enable use of Just-in-time compiling in pcregrep.")
+
SET(PCRE_SUPPORT_UNICODE_PROPERTIES OFF CACHE BOOL
"Enable support for Unicode properties. (If set, UTF-8 support will be enabled as well)")
@@ -213,8 +217,14 @@
IF(PCRE_SUPPORT_JIT)
SET(SUPPORT_JIT 1)
+ELSE
+ SET(PCRE_SUPPORT_PCREGREP_JIT 0)
ENDIF(PCRE_SUPPORT_JIT)
+IF(PCRE_SUPPORT_PCREGREP_JIT)
+ SET(SUPPORT_PCREGREP_JIT 1)
+ENDIF(PCRE_SUPPORT_PCREGREP_JIT)
+
# This next one used to contain
# SET(PCRETEST_LIBS ${READLINE_LIBRARY})
# but I was advised to add the NCURSES test as well, along with
@@ -586,6 +596,7 @@
MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")
MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}")
MESSAGE(STATUS " Build pcregrep .................. : ${PCRE_BUILD_PCREGREP}")
+ MESSAGE(STATUS " Enable JIT in pcregrep .......... : ${PCRE_SUPPORT_PCREGREP_JIT}")
MESSAGE(STATUS " Buffer size for pcregrep ........ : ${PCREGREP_BUFSIZE}")
MESSAGE(STATUS " Build tests (implies pcretest) .. : ${PCRE_BUILD_TESTS}")
IF(ZLIB_FOUND)
Modified: code/trunk/README
===================================================================
--- code/trunk/README 2011-09-06 10:44:57 UTC (rev 684)
+++ code/trunk/README 2011-09-06 15:02:07 UTC (rev 685)
@@ -179,6 +179,9 @@
"configure" command. This support is available only for certain hardware
architectures. If you try to enable it on an unsupported architecture, there
will be a compile time error.
+
+. When JIT support is enabled, pcregrep automatically makes use of it, unless
+ you add --disable-pcregrep-jit to the "configure" command.
. If you want to make use of the support for UTF-8 Unicode character strings in
PCRE, you must add --enable-utf8 to the "configure" command. Without it, the
@@ -839,4 +842,4 @@
Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
-Last updated: 27 August 2011
+Last updated: 06 September 2011
Modified: code/trunk/RunGrepTest
===================================================================
--- code/trunk/RunGrepTest 2011-09-06 10:44:57 UTC (rev 684)
+++ code/trunk/RunGrepTest 2011-09-06 15:02:07 UTC (rev 685)
@@ -305,11 +305,11 @@
echo "RC=$?" >>testtry
echo "---------------------------- Test 62 -----------------------------" >>testtry
-(cd $srcdir; $valgrind $pcregrep --match-limit=1000 -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1
+(cd $srcdir; $valgrind $pcregrep --match-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 63 -----------------------------" >>testtry
-(cd $srcdir; $valgrind $pcregrep --recursion-limit=1000 -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1
+(cd $srcdir; $valgrind $pcregrep --recursion-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 64 ------------------------------" >>testtry
Modified: code/trunk/config-cmake.h.in
===================================================================
--- code/trunk/config-cmake.h.in 2011-09-06 10:44:57 UTC (rev 684)
+++ code/trunk/config-cmake.h.in 2011-09-06 15:02:07 UTC (rev 685)
@@ -19,6 +19,7 @@
#cmakedefine PCRE_STATIC 1
#cmakedefine SUPPORT_JIT 1
+#cmakedefine SUPPORT_PCREGREP_JIT 1
#cmakedefine SUPPORT_UTF8 1
#cmakedefine SUPPORT_UCP 1
#cmakedefine EBCDIC 1
Modified: code/trunk/configure.ac
===================================================================
--- code/trunk/configure.ac 2011-09-06 10:44:57 UTC (rev 684)
+++ code/trunk/configure.ac 2011-09-06 15:02:07 UTC (rev 685)
@@ -118,6 +118,12 @@
[enable Just-In-Time compiling support]),
, enable_jit=no)
+# Handle --disable-pcregrep-jit (enabled by default)
+AC_ARG_ENABLE(pcregrep-jit,
+ AS_HELP_STRING([--disable-pcregrep-jit],
+ [disable JIT support in pcregrep]),
+ , enable_pcregrep_jit=yes)
+
# Handle --enable-rebuild-chartables
AC_ARG_ENABLE(rebuild-chartables,
AS_HELP_STRING([--enable-rebuild-chartables],
@@ -478,8 +484,15 @@
if test "$enable_jit" = "yes"; then
AC_DEFINE([SUPPORT_JIT], [], [
Define to enable support for Just-In-Time compiling.])
+else
+ enable_pcregrep_jit="no"
fi
+if test "$enable_pcregrep_jit" = "yes"; then
+ AC_DEFINE([SUPPORT_PCREGREP_JIT], [], [
+ Define to enable JIT support in pcregrep.])
+fi
+
if test "$enable_utf8" = "yes"; then
AC_DEFINE([SUPPORT_UTF8], [], [
Define to enable support for the UTF-8 Unicode encoding. This will
@@ -757,6 +770,7 @@
Match limit recursion ........... : ${with_match_limit_recursion}
Build shared libs ............... : ${enable_shared}
Build static libs ............... : ${enable_static}
+ Use JIT in pcregrep ............. : ${enable_pcregrep_jit}
Buffer size for pcregrep ........ : ${with_pcregrep_bufsize}
Link pcregrep with libz ......... : ${enable_pcregrep_libz}
Link pcregrep with libbz2 ....... : ${enable_pcregrep_libbz2}
Modified: code/trunk/doc/pcrebuild.3
===================================================================
--- code/trunk/doc/pcrebuild.3 2011-09-06 10:44:57 UTC (rev 684)
+++ code/trunk/doc/pcrebuild.3 2011-09-06 15:02:07 UTC (rev 685)
@@ -111,7 +111,12 @@
.\" HREF
\fBpcrejit\fP
.\"
-documentation for a discussion of JIT usage.
+documentation for a discussion of JIT usage. When JIT support is enabled,
+pcregrep automatically makes use of it, unless you add
+.sp
+ --disable-pcregrep-jit
+.sp
+to the "configure" command.
.
.
.SH "CODE VALUE OF NEWLINE"
@@ -383,6 +388,6 @@
.rs
.sp
.nf
-Last updated: 27 August 2011
+Last updated: 06 September 2011
Copyright (c) 1997-2011 University of Cambridge.
.fi
Modified: code/trunk/doc/pcregrep.1
===================================================================
--- code/trunk/doc/pcregrep.1 2011-09-06 10:44:57 UTC (rev 684)
+++ code/trunk/doc/pcregrep.1 2011-09-06 15:02:07 UTC (rev 685)
@@ -83,6 +83,7 @@
\fBpcregrep\fP uses the value to set a locale when calling the PCRE library.
The \fB--locale\fP option can be used to override this.
.
+.
.SH "SUPPORT FOR COMPRESSED FILES"
.rs
.sp
@@ -387,6 +388,13 @@
output, it precedes the line number. This option is forced if
\fB--line-offsets\fP is used.
.TP
+\fB--no-jit\fP
+If the PCRE library is built with support for just-in-time compiling (which
+speeds up matching), \fBpcregrep\fP automatically makes use of this, unless it
+was explicitly disabled at build time. This option can be used to disable the
+use of JIT at run time. It is provided for testing and working round problems.
+It should never be needed in normal use.
+.TP
\fB-o\fP, \fB--only-matching\fP
Show only the part of the line that matched a pattern instead of the whole
line. In this mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and
@@ -569,6 +577,6 @@
.rs
.sp
.nf
-Last updated: 30 July 2011
+Last updated: 06 September 2011
Copyright (c) 1997-2011 University of Cambridge.
.fi
Modified: code/trunk/doc/pcrejit.3
===================================================================
--- code/trunk/doc/pcrejit.3 2011-09-06 10:44:57 UTC (rev 684)
+++ code/trunk/doc/pcrejit.3 2011-09-06 15:02:07 UTC (rev 685)
@@ -232,6 +232,7 @@
/* Check results */
pcre_free(re);
pcre_free_study(extra);
+ pcre_jit_stack_free(jit_stack);
.sp
.
.
Modified: code/trunk/pcregrep.c
===================================================================
--- code/trunk/pcregrep.c 2011-09-06 10:44:57 UTC (rev 684)
+++ code/trunk/pcregrep.c 2011-09-06 15:02:07 UTC (rev 685)
@@ -168,7 +168,12 @@
static int filenames = FN_DEFAULT;
static int only_matching = -1;
static int process_options = 0;
+
+#ifdef SUPPORT_PCREGREP_JIT
+static int study_options = PCRE_STUDY_JIT_COMPILE;
+#else
static int study_options = 0;
+#endif
static unsigned long int match_limit = 0;
static unsigned long int match_limit_recursion = 0;
@@ -219,6 +224,7 @@
#define N_M_LIMIT (-13)
#define N_M_LIMIT_REC (-14)
#define N_BUFSIZE (-15)
+#define N_NOJIT (-16)
static option_item optionlist[] = {
{ OP_NODATA, N_NULL, NULL, "", " terminate options" },
@@ -239,7 +245,11 @@
{ OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
{ OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
{ OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
- { OP_NODATA, 'j', NULL, "jit", "use JIT compiler if available" },
+#ifdef SUPPORT_PCREGREP_JIT
+ { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
+#else
+ { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcregrep does not support JIT" },
+#endif
{ OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
{ OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
{ OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
@@ -317,8 +327,9 @@
{
if (resource_error)
{
- fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
- "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
+ fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
+ "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
+ PCRE_ERROR_JIT_STACKLIMIT);
fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
}
@@ -977,7 +988,8 @@
fprintf(stderr, "%s", msg);
FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
fprintf(stderr, "\n\n");
- if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
+ if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
+ *mrc == PCRE_ERROR_JIT_STACKLIMIT)
resource_error = TRUE;
if (error_count++ > 20)
{
@@ -1857,14 +1869,14 @@
{
case N_FOFFSETS: file_offsets = TRUE; break;
case N_HELP: help(); pcregrep_exit(0);
+ case N_LBUFFER: line_buffered = TRUE; break;
case N_LOFFSETS: line_offsets = number = TRUE; break;
- case N_LBUFFER: line_buffered = TRUE; break;
+ case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
case 'c': count_only = TRUE; break;
case 'F': process_options |= PO_FIXED_STRINGS; break;
case 'H': filenames = FN_FORCE; break;
case 'h': filenames = FN_NONE; break;
case 'i': options |= PCRE_CASELESS; break;
- case 'j': study_options |= PCRE_STUDY_JIT_COMPILE; break;
case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
case 'L': filenames = FN_NOMATCH_ONLY; break;
case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
@@ -2048,6 +2060,10 @@
const char *locale_from = "--locale";
const char *error;
+#ifdef SUPPORT_PCREGREP_JIT
+pcre_jit_stack *jit_stack = NULL;
+#endif
+
/* Set the default line ending value from the default in the PCRE library;
"lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
Note that the return values from pcre_config(), though derived from the ASCII
@@ -2570,8 +2586,14 @@
if (f != stdin) fclose(f);
}
-/* Study the regular expressions, as we will be running them many times */
+/* Study the regular expressions, as we will be running them many times. Unless
+JIT has been explicitly disabled, arrange a stack for it to use. */
+#ifdef SUPPORT_PCREGREP_JIT
+if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
+ jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
+#endif
+
for (j = 0; j < pattern_count; j++)
{
hints_list[j] = pcre_study(pattern_list[j], study_options, &error);
@@ -2583,6 +2605,10 @@
goto EXIT2;
}
hint_count++;
+#ifdef SUPPORT_PCREGREP_JIT
+ if (jit_stack != NULL && hints_list[j] != NULL)
+ pcre_assign_jit_stack(hints_list[j], NULL, jit_stack);
+#endif
}
/* If --match-limit or --recursion-limit was set, put the value(s) into the
@@ -2689,6 +2715,9 @@
}
EXIT:
+#ifdef SUPPORT_PCREGREP_JIT
+if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
+#endif
if (main_buffer != NULL) free(main_buffer);
if (pattern_list != NULL)
{
Modified: code/trunk/testdata/grepoutput
===================================================================
--- code/trunk/testdata/grepoutput 2011-09-06 10:44:57 UTC (rev 684)
+++ code/trunk/testdata/grepoutput 2011-09-06 15:02:07 UTC (rev 685)
@@ -391,7 +391,7 @@
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
-pcregrep: Error -8 or -21 means that a resource limit was exceeded.
+pcregrep: Error -8, -21 or -27 means that a resource limit was exceeded.
pcregrep: Check your regex for nested unlimited loops.
---------------------------- Test 38 ------------------------------
This line contains a binary zero here >?< for testing.
@@ -515,7 +515,7 @@
that the pcregrep command is working correctly. The file must be more than 24K
long so that it needs more than a single read
-pcregrep: Error -8 or -21 means that a resource limit was exceeded.
+pcregrep: Error -8, -21 or -27 means that a resource limit was exceeded.
pcregrep: Check your regex for nested unlimited loops.
RC=1
---------------------------- Test 63 -----------------------------
@@ -525,7 +525,7 @@
that the pcregrep command is working correctly. The file must be more than 24K
long so that it needs more than a single read
-pcregrep: Error -8 or -21 means that a resource limit was exceeded.
+pcregrep: Error -8, -21 or -27 means that a resource limit was exceeded.
pcregrep: Check your regex for nested unlimited loops.
RC=1
---------------------------- Test 64 ------------------------------