[Pcre-svn] [685] code/trunk: Update pcregrep to use JIT by d…

トップ ページ
このメッセージを削除
著者: Subversion repository
日付:  
To: pcre-svn
題目: [Pcre-svn] [685] code/trunk: Update pcregrep to use JIT by default with options to disable.
Revision: 685
          http://vcs.pcre.org/viewvc?view=rev&revision=685
Author:   ph10
Date:     2011-09-06 16:02:07 +0100 (Tue, 06 Sep 2011)


Log Message:
-----------
Update pcregrep to use JIT by default with options to disable.

Modified Paths:
--------------
    code/trunk/CMakeLists.txt
    code/trunk/README
    code/trunk/RunGrepTest
    code/trunk/config-cmake.h.in
    code/trunk/configure.ac
    code/trunk/doc/pcrebuild.3
    code/trunk/doc/pcregrep.1
    code/trunk/doc/pcrejit.3
    code/trunk/pcregrep.c
    code/trunk/testdata/grepoutput


Modified: code/trunk/CMakeLists.txt
===================================================================
--- code/trunk/CMakeLists.txt    2011-09-06 10:44:57 UTC (rev 684)
+++ code/trunk/CMakeLists.txt    2011-09-06 15:02:07 UTC (rev 685)
@@ -40,6 +40,7 @@
 # 2011-08-01 PH added PCREGREP_BUFSIZE
 # 2011-08-22 PH added PCRE_SUPPORT_JIT
 # 2011-09-06 PH modified WIN32 ADD_TEST line as suggested by Sergey Cherepanov
+# 2011-09-06 PH added PCRE_SUPPORT_PCREGREP_JIT


PROJECT(PCRE C CXX)

@@ -117,6 +118,9 @@
 SET(PCRE_SUPPORT_JIT OFF CACHE BOOL
     "Enable support for Just-in-time compiling.")


+SET(PCRE_SUPPORT_PCREGREP_JIT ON CACHE BOOL
+    "Enable use of Just-in-time compiling in pcregrep.")
+     
 SET(PCRE_SUPPORT_UNICODE_PROPERTIES OFF CACHE BOOL
     "Enable support for Unicode properties. (If set, UTF-8 support will be enabled as well)")


@@ -213,8 +217,14 @@

 IF(PCRE_SUPPORT_JIT)
         SET(SUPPORT_JIT 1)
+ELSE
+        SET(PCRE_SUPPORT_PCREGREP_JIT 0)         
 ENDIF(PCRE_SUPPORT_JIT)         


+IF(PCRE_SUPPORT_PCREGREP_JIT)
+        SET(SUPPORT_PCREGREP_JIT 1)
+ENDIF(PCRE_SUPPORT_PCREGREP_JIT)         
+
 # This next one used to contain
 #       SET(PCRETEST_LIBS ${READLINE_LIBRARY})
 # but I was advised to add the NCURSES test as well, along with
@@ -586,6 +596,7 @@
   MESSAGE(STATUS "  Build shared libs ............... : ${BUILD_SHARED_LIBS}")
   MESSAGE(STATUS "  Build static libs ............... : ${BUILD_STATIC_LIBS}")
   MESSAGE(STATUS "  Build pcregrep .................. : ${PCRE_BUILD_PCREGREP}")
+  MESSAGE(STATUS "  Enable JIT in pcregrep .......... : ${PCRE_SUPPORT_PCREGREP_JIT}")
   MESSAGE(STATUS "  Buffer size for pcregrep ........ : ${PCREGREP_BUFSIZE}")
   MESSAGE(STATUS "  Build tests (implies pcretest) .. : ${PCRE_BUILD_TESTS}")
   IF(ZLIB_FOUND)


Modified: code/trunk/README
===================================================================
--- code/trunk/README    2011-09-06 10:44:57 UTC (rev 684)
+++ code/trunk/README    2011-09-06 15:02:07 UTC (rev 685)
@@ -179,6 +179,9 @@
   "configure" command. This support is available only for certain hardware 
   architectures. If you try to enable it on an unsupported architecture, there 
   will be a compile time error.
+  
+. When JIT support is enabled, pcregrep automatically makes use of it, unless
+  you add --disable-pcregrep-jit to the "configure" command. 


. If you want to make use of the support for UTF-8 Unicode character strings in
PCRE, you must add --enable-utf8 to the "configure" command. Without it, the
@@ -839,4 +842,4 @@
Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
-Last updated: 27 August 2011
+Last updated: 06 September 2011

Modified: code/trunk/RunGrepTest
===================================================================
--- code/trunk/RunGrepTest    2011-09-06 10:44:57 UTC (rev 684)
+++ code/trunk/RunGrepTest    2011-09-06 15:02:07 UTC (rev 685)
@@ -305,11 +305,11 @@
 echo "RC=$?" >>testtry


echo "---------------------------- Test 62 -----------------------------" >>testtry
-(cd $srcdir; $valgrind $pcregrep --match-limit=1000 -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1
+(cd $srcdir; $valgrind $pcregrep --match-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1
echo "RC=$?" >>testtry

echo "---------------------------- Test 63 -----------------------------" >>testtry
-(cd $srcdir; $valgrind $pcregrep --recursion-limit=1000 -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1
+(cd $srcdir; $valgrind $pcregrep --recursion-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1
echo "RC=$?" >>testtry

echo "---------------------------- Test 64 ------------------------------" >>testtry

Modified: code/trunk/config-cmake.h.in
===================================================================
--- code/trunk/config-cmake.h.in    2011-09-06 10:44:57 UTC (rev 684)
+++ code/trunk/config-cmake.h.in    2011-09-06 15:02:07 UTC (rev 685)
@@ -19,6 +19,7 @@
 #cmakedefine PCRE_STATIC 1


#cmakedefine SUPPORT_JIT 1
+#cmakedefine SUPPORT_PCREGREP_JIT 1
#cmakedefine SUPPORT_UTF8 1
#cmakedefine SUPPORT_UCP 1
#cmakedefine EBCDIC 1

Modified: code/trunk/configure.ac
===================================================================
--- code/trunk/configure.ac    2011-09-06 10:44:57 UTC (rev 684)
+++ code/trunk/configure.ac    2011-09-06 15:02:07 UTC (rev 685)
@@ -118,6 +118,12 @@
                              [enable Just-In-Time compiling support]),
               , enable_jit=no)


+# Handle --disable-pcregrep-jit (enabled by default)
+AC_ARG_ENABLE(pcregrep-jit,
+              AS_HELP_STRING([--disable-pcregrep-jit],
+                             [disable JIT support in pcregrep]),
+              , enable_pcregrep_jit=yes)
+
 # Handle --enable-rebuild-chartables
 AC_ARG_ENABLE(rebuild-chartables,
               AS_HELP_STRING([--enable-rebuild-chartables],
@@ -478,8 +484,15 @@
 if test "$enable_jit" = "yes"; then
   AC_DEFINE([SUPPORT_JIT], [], [
     Define to enable support for Just-In-Time compiling.])
+else
+  enable_pcregrep_jit="no"
 fi


+if test "$enable_pcregrep_jit" = "yes"; then
+  AC_DEFINE([SUPPORT_PCREGREP_JIT], [], [
+    Define to enable JIT support in pcregrep.])    
+fi
+
 if test "$enable_utf8" = "yes"; then
   AC_DEFINE([SUPPORT_UTF8], [], [
     Define to enable support for the UTF-8 Unicode encoding. This will
@@ -757,6 +770,7 @@
     Match limit recursion ........... : ${with_match_limit_recursion}
     Build shared libs ............... : ${enable_shared}
     Build static libs ............... : ${enable_static}
+    Use JIT in pcregrep ............. : ${enable_pcregrep_jit} 
     Buffer size for pcregrep ........ : ${with_pcregrep_bufsize}
     Link pcregrep with libz ......... : ${enable_pcregrep_libz}
     Link pcregrep with libbz2 ....... : ${enable_pcregrep_libbz2}


Modified: code/trunk/doc/pcrebuild.3
===================================================================
--- code/trunk/doc/pcrebuild.3    2011-09-06 10:44:57 UTC (rev 684)
+++ code/trunk/doc/pcrebuild.3    2011-09-06 15:02:07 UTC (rev 685)
@@ -111,7 +111,12 @@
 .\" HREF
 \fBpcrejit\fP
 .\"
-documentation for a discussion of JIT usage.
+documentation for a discussion of JIT usage. When JIT support is enabled,
+pcregrep automatically makes use of it, unless you add
+.sp
+  --disable-pcregrep-jit 
+.sp  
+to the "configure" command. 
 .
 .
 .SH "CODE VALUE OF NEWLINE"
@@ -383,6 +388,6 @@
 .rs
 .sp
 .nf
-Last updated: 27 August 2011
+Last updated: 06 September 2011
 Copyright (c) 1997-2011 University of Cambridge.
 .fi


Modified: code/trunk/doc/pcregrep.1
===================================================================
--- code/trunk/doc/pcregrep.1    2011-09-06 10:44:57 UTC (rev 684)
+++ code/trunk/doc/pcregrep.1    2011-09-06 15:02:07 UTC (rev 685)
@@ -83,6 +83,7 @@
 \fBpcregrep\fP uses the value to set a locale when calling the PCRE library.
 The \fB--locale\fP option can be used to override this.
 .
+.
 .SH "SUPPORT FOR COMPRESSED FILES"
 .rs
 .sp
@@ -387,6 +388,13 @@
 output, it precedes the line number. This option is forced if
 \fB--line-offsets\fP is used.
 .TP
+\fB--no-jit\fP
+If the PCRE library is built with support for just-in-time compiling (which 
+speeds up matching), \fBpcregrep\fP automatically makes use of this, unless it
+was explicitly disabled at build time. This option can be used to disable the
+use of JIT at run time. It is provided for testing and working round problems.
+It should never be needed in normal use.
+.TP
 \fB-o\fP, \fB--only-matching\fP
 Show only the part of the line that matched a pattern instead of the whole
 line. In this mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and
@@ -569,6 +577,6 @@
 .rs
 .sp
 .nf
-Last updated: 30 July 2011
+Last updated: 06 September 2011
 Copyright (c) 1997-2011 University of Cambridge.
 .fi


Modified: code/trunk/doc/pcrejit.3
===================================================================
--- code/trunk/doc/pcrejit.3    2011-09-06 10:44:57 UTC (rev 684)
+++ code/trunk/doc/pcrejit.3    2011-09-06 15:02:07 UTC (rev 685)
@@ -232,6 +232,7 @@
   /* Check results */
   pcre_free(re);
   pcre_free_study(extra);
+  pcre_jit_stack_free(jit_stack); 
 .sp
 .
 .


Modified: code/trunk/pcregrep.c
===================================================================
--- code/trunk/pcregrep.c    2011-09-06 10:44:57 UTC (rev 684)
+++ code/trunk/pcregrep.c    2011-09-06 15:02:07 UTC (rev 685)
@@ -168,7 +168,12 @@
 static int filenames = FN_DEFAULT;
 static int only_matching = -1;
 static int process_options = 0;
+
+#ifdef SUPPORT_PCREGREP_JIT
+static int study_options = PCRE_STUDY_JIT_COMPILE;
+#else
 static int study_options = 0;
+#endif


 static unsigned long int match_limit = 0;
 static unsigned long int match_limit_recursion = 0;
@@ -219,6 +224,7 @@
 #define N_M_LIMIT      (-13)
 #define N_M_LIMIT_REC  (-14)
 #define N_BUFSIZE      (-15)
+#define N_NOJIT        (-16)


 static option_item optionlist[] = {
   { OP_NODATA,     N_NULL,   NULL,              "",              "  terminate options" },
@@ -239,7 +245,11 @@
   { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
   { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
   { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
-  { OP_NODATA,     'j',      NULL,              "jit",           "use JIT compiler if available" },
+#ifdef SUPPORT_PCREGREP_JIT
+  { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
+#else
+  { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
+#endif
   { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
   { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
   { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
@@ -317,8 +327,9 @@
 {
 if (resource_error)
   {
-  fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
-    "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
+  fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
+    "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
+    PCRE_ERROR_JIT_STACKLIMIT);
   fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
   }


@@ -977,7 +988,8 @@
   fprintf(stderr, "%s", msg);
   FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
   fprintf(stderr, "\n\n");
-  if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
+  if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
+      *mrc == PCRE_ERROR_JIT_STACKLIMIT)
     resource_error = TRUE;
   if (error_count++ > 20)
     {
@@ -1857,14 +1869,14 @@
   {
   case N_FOFFSETS: file_offsets = TRUE; break;
   case N_HELP: help(); pcregrep_exit(0);
+  case N_LBUFFER: line_buffered = TRUE; break;
   case N_LOFFSETS: line_offsets = number = TRUE; break;
-  case N_LBUFFER: line_buffered = TRUE; break;
+  case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break; 
   case 'c': count_only = TRUE; break;
   case 'F': process_options |= PO_FIXED_STRINGS; break;
   case 'H': filenames = FN_FORCE; break;
   case 'h': filenames = FN_NONE; break;
   case 'i': options |= PCRE_CASELESS; break;
-  case 'j': study_options |= PCRE_STUDY_JIT_COMPILE; break; 
   case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
   case 'L': filenames = FN_NOMATCH_ONLY; break;
   case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
@@ -2048,6 +2060,10 @@
 const char *locale_from = "--locale";
 const char *error;


+#ifdef SUPPORT_PCREGREP_JIT
+pcre_jit_stack *jit_stack = NULL;
+#endif
+
/* Set the default line ending value from the default in the PCRE library;
"lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
Note that the return values from pcre_config(), though derived from the ASCII
@@ -2570,8 +2586,14 @@
if (f != stdin) fclose(f);
}

-/* Study the regular expressions, as we will be running them many times */
+/* Study the regular expressions, as we will be running them many times. Unless
+JIT has been explicitly disabled, arrange a stack for it to use. */

+#ifdef SUPPORT_PCREGREP_JIT
+if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
+  jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
+#endif   
+  
 for (j = 0; j < pattern_count; j++)
   {
   hints_list[j] = pcre_study(pattern_list[j], study_options, &error);
@@ -2583,6 +2605,10 @@
     goto EXIT2;
     }
   hint_count++;
+#ifdef SUPPORT_PCREGREP_JIT
+  if (jit_stack != NULL && hints_list[j] != NULL) 
+    pcre_assign_jit_stack(hints_list[j], NULL, jit_stack);
+#endif
   }


/* If --match-limit or --recursion-limit was set, put the value(s) into the
@@ -2689,6 +2715,9 @@
}

EXIT:
+#ifdef SUPPORT_PCREGREP_JIT
+if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
+#endif
if (main_buffer != NULL) free(main_buffer);
if (pattern_list != NULL)
{

Modified: code/trunk/testdata/grepoutput
===================================================================
--- code/trunk/testdata/grepoutput    2011-09-06 10:44:57 UTC (rev 684)
+++ code/trunk/testdata/grepoutput    2011-09-06 15:02:07 UTC (rev 685)
@@ -391,7 +391,7 @@


aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa

-pcregrep: Error -8 or -21 means that a resource limit was exceeded.
+pcregrep: Error -8, -21 or -27 means that a resource limit was exceeded.
pcregrep: Check your regex for nested unlimited loops.
---------------------------- Test 38 ------------------------------
This line contains a binary zero here >?< for testing.
@@ -515,7 +515,7 @@
that the pcregrep command is working correctly. The file must be more than 24K
long so that it needs more than a single read

-pcregrep: Error -8 or -21 means that a resource limit was exceeded.
+pcregrep: Error -8, -21 or -27 means that a resource limit was exceeded.
pcregrep: Check your regex for nested unlimited loops.
RC=1
---------------------------- Test 63 -----------------------------
@@ -525,7 +525,7 @@
that the pcregrep command is working correctly. The file must be more than 24K
long so that it needs more than a single read

-pcregrep: Error -8 or -21 means that a resource limit was exceeded.
+pcregrep: Error -8, -21 or -27 means that a resource limit was exceeded.
pcregrep: Check your regex for nested unlimited loops.
RC=1
---------------------------- Test 64 ------------------------------