[Pcre-svn] [563] code/trunk: Implement buffer expansion in p…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [563] code/trunk: Implement buffer expansion in pcre2grep.
Revision: 563
          http://www.exim.org/viewvc/pcre2?view=rev&revision=563
Author:   ph10
Date:     2016-10-11 17:40:09 +0100 (Tue, 11 Oct 2016)
Log Message:
-----------
Implement buffer expansion in pcre2grep.


Modified Paths:
--------------
    code/trunk/CMakeLists.txt
    code/trunk/ChangeLog
    code/trunk/README
    code/trunk/RunGrepTest
    code/trunk/config-cmake.h.in
    code/trunk/configure.ac
    code/trunk/doc/pcre2build.3
    code/trunk/doc/pcre2grep.1
    code/trunk/src/config.h.generic
    code/trunk/src/config.h.in
    code/trunk/src/pcre2grep.c
    code/trunk/testdata/grepoutput


Modified: code/trunk/CMakeLists.txt
===================================================================
--- code/trunk/CMakeLists.txt    2016-10-07 15:50:39 UTC (rev 562)
+++ code/trunk/CMakeLists.txt    2016-10-11 16:40:09 UTC (rev 563)
@@ -76,6 +76,7 @@
 #            a new option instead of being unconditional.
 # 2016-10-05 PH fixed a typo (PCRE should be PCRE2) in above patch
 #            fix by David Gaussmann
+# 2016-10-07 PH added PCREGREP_MAX_BUFSIZE


PROJECT(PCRE2 C)

@@ -148,8 +149,11 @@
     "Default limit on internal recursion. See MATCH_LIMIT_RECURSION in config.h.in for details.")


 SET(PCRE2GREP_BUFSIZE "20480" CACHE STRING
-    "Buffer size parameter for pcre2grep. See PCRE2GREP_BUFSIZE in config.h.in for details.")
+    "Buffer starting size parameter for pcre2grep. See PCRE2GREP_BUFSIZE in config.h.in for details.")


+SET(PCRE2GREP_MAX_BUFSIZE "1048576" CACHE STRING
+    "Buffer maximum size parameter for pcre2grep. See PCRE2GREP_MAX_BUFSIZE in config.h.in for details.")
+
 SET(PCRE2_NEWLINE "LF" CACHE STRING
     "What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF).")



Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2016-10-07 15:50:39 UTC (rev 562)
+++ code/trunk/ChangeLog    2016-10-11 16:40:09 UTC (rev 563)
@@ -61,7 +61,11 @@
 9. Change 19 for 10.22 had a typo (PCRE_STATIC_RUNTIME should be
 PCRE2_STATIC_RUNTIME). Fix from David Gaussmann.


+10. Added --max-buffer-size to pcre2grep, to allow for automatic buffer
+expansion when long lines are encountered. Original patch by Dmitry
+Cherniachenko.

+
Version 10.22 29-July-2016
--------------------------


Modified: code/trunk/README
===================================================================
--- code/trunk/README    2016-10-07 15:50:39 UTC (rev 562)
+++ code/trunk/README    2016-10-11 16:40:09 UTC (rev 563)
@@ -339,12 +339,22 @@


Of course, the relevant libraries must be installed on your system.

-. The default size (in bytes) of the internal buffer used by pcre2grep can be
- set by, for example:
+. The default starting size (in bytes) of the internal buffer used by pcre2grep
+ can be set by, for example:

--with-pcre2grep-bufsize=51200

- The value must be a plain integer. The default is 20480.
+ The value must be a plain integer. The default is 20480. The amount of memory
+ used by pcre2grep is actually three times this number, to allow for "before"
+ and "after" lines.
+
+. The default maximum size of pcre2grep's internal buffer can be set by, for
+ example:
+
+ --with-pcre2grep-max-bufsize=2097152
+
+ The default is either 1048576 or the value of --with-pcre2grep-bufsize,
+ whichever is the larger.

. It is possible to compile pcre2test so that it links with the libreadline
or libedit libraries, by specifying, respectively,
@@ -845,4 +855,4 @@
Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
-Last updated: 01 April 2016
+Last updated: 07 October 2016

Modified: code/trunk/RunGrepTest
===================================================================
--- code/trunk/RunGrepTest    2016-10-07 15:50:39 UTC (rev 562)
+++ code/trunk/RunGrepTest    2016-10-11 16:40:09 UTC (rev 563)
@@ -440,7 +440,7 @@
 echo "RC=$?" >>testtrygrep


echo "---------------------------- Test 83 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep --buffer-size=100 "^a" ./testdata/grepinput3) >>testtrygrep 2>&1
+(cd $srcdir; $valgrind $vjs $pcre2grep --buffer-size=10 --max-buffer-size=100 "^a" ./testdata/grepinput3) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep

echo "---------------------------- Test 84 -----------------------------" >>testtrygrep

Modified: code/trunk/config-cmake.h.in
===================================================================
--- code/trunk/config-cmake.h.in    2016-10-07 15:50:39 UTC (rev 562)
+++ code/trunk/config-cmake.h.in    2016-10-11 16:40:09 UTC (rev 563)
@@ -41,6 +41,7 @@
 #define NEWLINE_DEFAULT         @NEWLINE_DEFAULT@
 #define PARENS_NEST_LIMIT       @PCRE2_PARENS_NEST_LIMIT@
 #define PCRE2GREP_BUFSIZE       @PCRE2GREP_BUFSIZE@
+#define PCRE2GREP_MAX_BUFSIZE   @PCRE2GREP_MAX_BUFSIZE@


 #define MAX_NAME_SIZE    32
 #define MAX_NAME_COUNT    10000


Modified: code/trunk/configure.ac
===================================================================
--- code/trunk/configure.ac    2016-10-07 15:50:39 UTC (rev 562)
+++ code/trunk/configure.ac    2016-10-11 16:40:09 UTC (rev 563)
@@ -240,9 +240,15 @@
 # Handle --with-pcre2grep-bufsize=N
 AC_ARG_WITH(pcre2grep-bufsize,
               AS_HELP_STRING([--with-pcre2grep-bufsize=N],
-                             [pcre2grep buffer size (default=20480, minimum=8192)]),
+                             [pcre2grep initial buffer size (default=20480, minimum=8192)]),
               , with_pcre2grep_bufsize=20480)


+# Handle --with-pcre2grep-max-bufsize=N
+AC_ARG_WITH(pcre2grep-max-bufsize,
+              AS_HELP_STRING([--with-pcre2grep-max-bufsize=N],
+                             [pcre2grep maximum buffer size (default=1048576, minimum=8192)]),
+              , with_pcre2grep_max_bufsize=1048576)
+
 # Handle --enable-pcre2test-libedit
 AC_ARG_ENABLE(pcre2test-libedit,
               AS_HELP_STRING([--enable-pcre2test-libedit],
@@ -608,16 +614,31 @@
   with_pcre2grep_bufsize="8192"
 else
   if test $? -gt 1 ; then
-  AC_MSG_ERROR([Bad value for  --with-pcre2grep-bufsize])
+  AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize])
   fi
 fi


+if test $with_pcre2grep_max_bufsize -lt $with_pcre2grep_bufsize ; then
+ with_pcre2grep_max_bufsize="$with_pcre2grep_bufsize"
+else
+ if test $? -gt 1 ; then
+ AC_MSG_ERROR([Bad value for --with-pcre2grep-max-bufsize])
+ fi
+fi
+
AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [
- The value of PCRE2GREP_BUFSIZE determines the size of buffer used by pcre2grep
- to hold parts of the file it is searching. This is also the minimum value.
- The actual amount of memory used by pcre2grep is three times this number,
- because it allows for the buffering of "before" and "after" lines.])
+ The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by
+ pcre2grep to hold parts of the file it is searching. The buffer will be
+ expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing very
+ long lines. The actual amount of memory used by pcre2grep is three times this
+ number, because it allows for the buffering of "before" and "after" lines.])

+AC_DEFINE_UNQUOTED([PCRE2GREP_MAX_BUFSIZE], [$with_pcre2grep_max_bufsize], [
+  The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer
+  used by pcre2grep to hold parts of the file it is searching. The actual
+  amount of memory used by pcre2grep is three times this number, because it
+  allows for the buffering of "before" and "after" lines.])
+
 if test "$enable_pcre2test_libedit" = "yes"; then
   AC_DEFINE([SUPPORT_LIBEDIT], [], [
     Define to any value to allow pcre2test to be linked with libedit.])
@@ -906,44 +927,45 @@


$PACKAGE-$VERSION configuration summary:

-    Install prefix .................. : ${prefix}
-    C preprocessor .................. : ${CPP}
-    C compiler ...................... : ${CC}
-    Linker .......................... : ${LD}
-    C preprocessor flags ............ : ${CPPFLAGS}
-    C compiler flags ................ : ${CFLAGS} ${VISIBILITY_CFLAGS}
-    Linker flags .................... : ${LDFLAGS}
-    Extra libraries ................. : ${LIBS}
+    Install prefix ..................... : ${prefix}
+    C preprocessor ..................... : ${CPP}
+    C compiler ......................... : ${CC}
+    Linker ............................. : ${LD}
+    C preprocessor flags ............... : ${CPPFLAGS}
+    C compiler flags ................... : ${CFLAGS} ${VISIBILITY_CFLAGS}
+    Linker flags ....................... : ${LDFLAGS}
+    Extra libraries .................... : ${LIBS}
+                                        
+    Build 8-bit pcre2 library .......... : ${enable_pcre2_8}
+    Build 16-bit pcre2 library ......... : ${enable_pcre2_16}
+    Build 32-bit pcre2 library ......... : ${enable_pcre2_32}
+    Include debugging code ............. : ${enable_debug}
+    Enable JIT compiling support ....... : ${enable_jit}
+    Enable Unicode support ............. : ${enable_unicode}
+    Newline char/sequence .............. : ${enable_newline}
+    \R matches only ANYCRLF ............ : ${enable_bsr_anycrlf}
+    \C is disabled ..................... : ${enable_never_backslash_C}
+    EBCDIC coding ...................... : ${enable_ebcdic}
+    EBCDIC code for NL ................. : ${ebcdic_nl_code}
+    Rebuild char tables ................ : ${enable_rebuild_chartables}
+    Use stack recursion ................ : ${enable_stack_for_recursion}
+    Internal link size ................. : ${with_link_size}
+    Nested parentheses limit ........... : ${with_parens_nest_limit}
+    Match limit ........................ : ${with_match_limit}
+    Match limit recursion .............. : ${with_match_limit_recursion}
+    Build shared libs .................. : ${enable_shared}
+    Build static libs .................. : ${enable_static}
+    Use JIT in pcre2grep ............... : ${enable_pcre2grep_jit}
+    Enable callouts in pcre2grep ....... : ${enable_pcre2grep_callout}
+    Initial buffer size for pcre2grep .. : ${with_pcre2grep_bufsize}
+    Maximum buffer size for pcre2grep .. : ${with_pcre2grep_max_bufsize}
+    Link pcre2grep with libz ........... : ${enable_pcre2grep_libz}
+    Link pcre2grep with libbz2 ......... : ${enable_pcre2grep_libbz2}
+    Link pcre2test with libedit ........ : ${enable_pcre2test_libedit}
+    Link pcre2test with libreadline .... : ${enable_pcre2test_libreadline}
+    Valgrind support ................... : ${enable_valgrind}
+    Code coverage ...................... : ${enable_coverage}


-    Build 8-bit pcre2 library ....... : ${enable_pcre2_8}
-    Build 16-bit pcre2 library ...... : ${enable_pcre2_16}
-    Build 32-bit pcre2 library ...... : ${enable_pcre2_32}
-    Include debugging code .......... : ${enable_debug}
-    Enable JIT compiling support .... : ${enable_jit}
-    Enable Unicode support .......... : ${enable_unicode}
-    Newline char/sequence ........... : ${enable_newline}
-    \R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}
-    \C is disabled .................. : ${enable_never_backslash_C}
-    EBCDIC coding ................... : ${enable_ebcdic}
-    EBCDIC code for NL .............. : ${ebcdic_nl_code}
-    Rebuild char tables ............. : ${enable_rebuild_chartables}
-    Use stack recursion ............. : ${enable_stack_for_recursion}
-    Internal link size .............. : ${with_link_size}
-    Nested parentheses limit ........ : ${with_parens_nest_limit}
-    Match limit ..................... : ${with_match_limit}
-    Match limit recursion ........... : ${with_match_limit_recursion}
-    Build shared libs ............... : ${enable_shared}
-    Build static libs ............... : ${enable_static}
-    Use JIT in pcre2grep ............ : ${enable_pcre2grep_jit}
-    Enable callouts in pcre2grep .... : ${enable_pcre2grep_callout}
-    Buffer size for pcre2grep ....... : ${with_pcre2grep_bufsize}
-    Link pcre2grep with libz ........ : ${enable_pcre2grep_libz}
-    Link pcre2grep with libbz2 ...... : ${enable_pcre2grep_libbz2}
-    Link pcre2test with libedit ..... : ${enable_pcre2test_libedit}
-    Link pcre2test with libreadline . : ${enable_pcre2test_libreadline}
-    Valgrind support ................ : ${enable_valgrind}
-    Code coverage ................... : ${enable_coverage}
-
 EOF


dnl end configure.ac

Modified: code/trunk/doc/pcre2build.3
===================================================================
--- code/trunk/doc/pcre2build.3    2016-10-07 15:50:39 UTC (rev 562)
+++ code/trunk/doc/pcre2build.3    2016-10-11 16:40:09 UTC (rev 563)
@@ -1,4 +1,4 @@
-.TH PCRE2BUILD 3 "01 April 2016" "PCRE2 10.22"
+.TH PCRE2BUILD 3 "07 October 2016" "PCRE2 10.23"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .
@@ -385,16 +385,19 @@
 .sp
 \fBpcre2grep\fP uses an internal buffer to hold a "window" on the file it is
 scanning, in order to be able to output "before" and "after" lines when it
-finds a match. The size of the buffer is controlled by a parameter whose
-default value is 20K. The buffer itself is three times this size, but because
-of the way it is used for holding "before" lines, the longest line that is
-guaranteed to be processable is the parameter size. You can change the default
-parameter value by adding, for example,
+finds a match. The starting size of the buffer is controlled by a parameter
+whose default value is 20K. The buffer itself is three times this size, but
+because of the way it is used for holding "before" lines, the longest line that
+is guaranteed to be processable is the parameter size. If a longer line is 
+encountered, \fBpcre2grep\fP automatically expands the buffer, up to a
+specified maximum size, whose default is 1M or the starting size, whichever is
+the larger. You can change the default parameter values by adding, for example,
 .sp
-  --with-pcre2grep-bufsize=50K
+  --with-pcre2grep-bufsize=51200
+  --with-pcre2grep-max-bufsize=2097152 
 .sp
-to the \fBconfigure\fP command. The caller of \fPpcre2grep\fP can override this
-value by using --buffer-size on the command line.
+to the \fBconfigure\fP command. The caller of \fPpcre2grep\fP can override 
+these values by using --buffer-size and --max-buffer-size on the command line.
 .
 .
 .SH "PCRE2TEST OPTION FOR LIBREADLINE SUPPORT"
@@ -532,6 +535,6 @@
 .rs
 .sp
 .nf
-Last updated: 01 April 2016
+Last updated: 07 October 2016
 Copyright (c) 1997-2016 University of Cambridge.
 .fi


Modified: code/trunk/doc/pcre2grep.1
===================================================================
--- code/trunk/doc/pcre2grep.1    2016-10-07 15:50:39 UTC (rev 562)
+++ code/trunk/doc/pcre2grep.1    2016-10-11 16:40:09 UTC (rev 563)
@@ -1,4 +1,4 @@
-.TH PCRE2GREP 1 "19 June 2016" "PCRE2 10.22"
+.TH PCRE2GREP 1 "11 October 2016" "PCRE2 10.23"
 .SH NAME
 pcre2grep - a grep with Perl-compatible regular expressions.
 .SH SYNOPSIS
@@ -52,12 +52,19 @@
 \fB-N\fP (\fB--newline\fP) option.
 .P
 The amount of memory used for buffering files that are being scanned is
-controlled by a parameter that can be set by the \fB--buffer-size\fP option.
-The default value for this parameter is specified when \fBpcre2grep\fP is
-built, with the default default being 20K. A block of memory three times this
-size is used (to allow for buffering "before" and "after" lines). An error
-occurs if a line overflows the buffer.
+controlled by parameters that can be set by the \fB--buffer-size\fP and
+\fB--max-buffer-size\fP options. The first of these sets the size of buffer
+that is obtained at the start of processing. If an input file contains very
+long lines, a larger buffer may be needed; this is handled by automatically
+extending the buffer, up to the limit specified by \fB--max-buffer-size\fP. The
+default values for these parameters are specified when \fBpcre2grep\fP is
+built, with the default defaults being 20K and 1M respectively. An error occurs
+if a line is too long and the buffer can no longer be expanded.
 .P
+The block of memory that is actually used is three times the "buffer size", to
+allow for buffering "before" and "after" lines. If the buffer size is too 
+small, fewer than requested "before" and "after" lines may be output.
+.P
 Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the greater.
 BUFSIZ is defined in \fB<stdio.h>\fP. When there is more than one pattern
 (specified by the use of \fB-e\fP and/or \fB-f\fP), each pattern is applied to
@@ -126,12 +133,14 @@
 processing of patterns and file names that start with hyphens.
 .TP
 \fB-A\fP \fInumber\fP, \fB--after-context=\fP\fInumber\fP
-Output \fInumber\fP lines of context after each matching line. If file names
-and/or line numbers are being output, a hyphen separator is used instead of a
-colon for the context lines. A line containing "--" is output between each
-group of lines, unless they are in fact contiguous in the input file. The value
-of \fInumber\fP is expected to be relatively small. However, \fBpcre2grep\fP
-guarantees to have up to 8K of following text available for context output.
+Output up to \fInumber\fP lines of context after each matching line. Fewer
+lines are output if the next match or the end of the file is reached, or if the
+processing buffer size has been set too small. If file names and/or line
+numbers are being output, a hyphen separator is used instead of a colon for the
+context lines. A line containing "--" is output between each group of lines,
+unless they are in fact contiguous in the input file. The value of \fInumber\fP
+is expected to be relatively small. However, \fBpcre2grep\fP guarantees to have
+up to 8K of following text available for context output.
 .TP
 \fB-a\fP, \fB--text\fP
 Treat binary files as text. This is equivalent to
@@ -138,12 +147,15 @@
 \fB--binary-files\fP=\fItext\fP.
 .TP
 \fB-B\fP \fInumber\fP, \fB--before-context=\fP\fInumber\fP
-Output \fInumber\fP lines of context before each matching line. If file names
-and/or line numbers are being output, a hyphen separator is used instead of a
-colon for the context lines. A line containing "--" is output between each
-group of lines, unless they are in fact contiguous in the input file. The value
-of \fInumber\fP is expected to be relatively small. However, \fBpcre2grep\fP
-guarantees to have up to 8K of preceding text available for context output.
+Output up to \fInumber\fP lines of context before each matching line. Fewer 
+lines are output if the previous match or the start of the file is within 
+\fInumber\fP lines, or if the processing buffer size has been set too small. If
+file names and/or line numbers are being output, a hyphen separator is used
+instead of a colon for the context lines. A line containing "--" is output
+between each group of lines, unless they are in fact contiguous in the input
+file. The value of \fInumber\fP is expected to be relatively small. However,
+\fBpcre2grep\fP guarantees to have up to 8K of preceding text available for
+context output.
 .TP
 \fB--binary-files=\fP\fIword\fP
 Specify how binary files are to be processed. If the word is "binary" (the
@@ -158,8 +170,9 @@
 return code.
 .TP
 \fB--buffer-size=\fP\fInumber\fP
-Set the parameter that controls how much memory is used for buffering files
-that are being scanned.
+Set the parameter that controls how much memory is obtained at the start of 
+processing for buffering files that are being scanned. See also 
+\fB--max-buffer-size\fP below.
 .TP
 \fB-C\fP \fInumber\fP, \fB--context=\fP\fInumber\fP
 Output \fInumber\fP lines of context both before and after each matching line.
@@ -432,6 +445,11 @@
 There are no short forms for these options. The default settings are specified
 when the PCRE2 library is compiled, with the default default being 10 million.
 .TP
+\fB--max-buffer-size=\fInumber\fP
+This limits the expansion of the processing buffer, whose initial size can be 
+set by \fB--buffer-size\fP. The maximum buffer size is silently forced to be no 
+smaller than the starting buffer size.
+.TP
 \fB-M\fP, \fB--multiline\fP
 Allow patterns to match more than one line. When this option is given, patterns
 may usefully contain literal newline characters and internal occurrences of ^
@@ -757,6 +775,6 @@
 .rs
 .sp
 .nf
-Last updated: 19 June 2016
+Last updated: 11 October 2016
 Copyright (c) 1997-2016 University of Cambridge.
 .fi


Modified: code/trunk/src/config.h.generic
===================================================================
--- code/trunk/src/config.h.generic    2016-10-07 15:50:39 UTC (rev 562)
+++ code/trunk/src/config.h.generic    2016-10-11 16:40:09 UTC (rev 563)
@@ -206,7 +206,7 @@
 #define PACKAGE_NAME "PCRE2"


/* Define to the full name and version of this package. */
-#define PACKAGE_STRING "PCRE2 10.22"
+#define PACKAGE_STRING "PCRE2 10.23-RC1"

/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "pcre2"
@@ -215,7 +215,7 @@
#define PACKAGE_URL ""

/* Define to the version of this package. */
-#define PACKAGE_VERSION "10.22"
+#define PACKAGE_VERSION "10.23-RC1"

 /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
    parentheses (of any kind) in a pattern. This limits the amount of system
@@ -224,15 +224,24 @@
 #define PARENS_NEST_LIMIT 250
 #endif


-/* The value of PCRE2GREP_BUFSIZE determines the size of buffer used by
- pcre2grep to hold parts of the file it is searching. This is also the
- minimum value. The actual amount of memory used by pcre2grep is three times
- this number, because it allows for the buffering of "before" and "after"
- lines. */
+/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by
+ pcre2grep to hold parts of the file it is searching. The buffer will be
+ expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing
+ very long lines. The actual amount of memory used by pcre2grep is three
+ times this number, because it allows for the buffering of "before" and
+ "after" lines. */
#ifndef PCRE2GREP_BUFSIZE
#define PCRE2GREP_BUFSIZE 20480
#endif

+/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer
+ used by pcre2grep to hold parts of the file it is searching. The actual
+ amount of memory used by pcre2grep is three times this number, because it
+ allows for the buffering of "before" and "after" lines. */
+#ifndef PCRE2GREP_MAX_BUFSIZE
+#define PCRE2GREP_MAX_BUFSIZE 1048576
+#endif
+
/* Define to any value to include debugging code. */
/* #undef PCRE2_DEBUG */

@@ -299,7 +308,7 @@
/* #undef SUPPORT_VALGRIND */

/* Version number of package */
-#define VERSION "10.22"
+#define VERSION "10.23-RC1"

/* Define to empty if `const' does not conform to ANSI C. */
/* #undef const */

Modified: code/trunk/src/config.h.in
===================================================================
--- code/trunk/src/config.h.in    2016-10-07 15:50:39 UTC (rev 562)
+++ code/trunk/src/config.h.in    2016-10-11 16:40:09 UTC (rev 563)
@@ -207,13 +207,20 @@
    stack that is used while compiling a pattern. */
 #undef PARENS_NEST_LIMIT


-/* The value of PCRE2GREP_BUFSIZE determines the size of buffer used by
- pcre2grep to hold parts of the file it is searching. This is also the
- minimum value. The actual amount of memory used by pcre2grep is three times
- this number, because it allows for the buffering of "before" and "after"
- lines. */
+/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by
+ pcre2grep to hold parts of the file it is searching. The buffer will be
+ expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing
+ very long lines. The actual amount of memory used by pcre2grep is three
+ times this number, because it allows for the buffering of "before" and
+ "after" lines. */
#undef PCRE2GREP_BUFSIZE

+/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer
+ used by pcre2grep to hold parts of the file it is searching. The actual
+ amount of memory used by pcre2grep is three times this number, because it
+ allows for the buffering of "before" and "after" lines. */
+#undef PCRE2GREP_MAX_BUFSIZE
+
/* to make a symbol visible */
#undef PCRE2POSIX_EXP_DECL


Modified: code/trunk/src/pcre2grep.c
===================================================================
--- code/trunk/src/pcre2grep.c    2016-10-07 15:50:39 UTC (rev 562)
+++ code/trunk/src/pcre2grep.c    2016-10-11 16:40:09 UTC (rev 563)
@@ -173,6 +173,7 @@
 static int binary_files = BIN_BINARY;
 static int both_context = 0;
 static int bufthird = PCRE2GREP_BUFSIZE;
+static int max_bufthird = PCRE2GREP_MAX_BUFSIZE;
 static int bufsize = 3*PCRE2GREP_BUFSIZE;
 static int endlinetype;


@@ -344,6 +345,7 @@
#define N_EXCLUDE_FROM (-19)
#define N_INCLUDE_FROM (-20)
#define N_OM_SEPARATOR (-21)
+#define N_MAX_BUFSIZE (-22)

 static option_item optionlist[] = {
   { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
@@ -352,7 +354,8 @@
   { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
   { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
   { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
-  { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
+  { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer starting size" },
+  { OP_NUMBER,     N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number",  "set processing buffer maximum size" },
   { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
   { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
   { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
@@ -952,8 +955,9 @@
   printf("%.*s%s" STDOUT_NL, n, "                           ", op->help_text);
   }


-printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --buffer-size=100K." STDOUT_NL);
+printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL);
 printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
+printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE);
 printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
 printf("space is removed and blank lines are ignored." STDOUT_NL);
 printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
@@ -1100,12 +1104,12 @@
 *            Read one line of input              *
 *************************************************/


-/* Normally, input is read using fread() into a large buffer, so many lines may
-be read at once. However, doing this for tty input means that no output appears
-until a lot of input has been typed. Instead, tty input is handled line by
-line. We cannot use fgets() for this, because it does not stop at a binary
-zero, and therefore there is no way of telling how many characters it has read,
-because there may be binary zeros embedded in the data.
+/* Normally, input is read using fread() (or gzread, or BZ2_read) into a large
+buffer, so many lines may be read at once. However, doing this for tty input
+means that no output appears until a lot of input has been typed. Instead, tty
+input is handled line by line. We cannot use fgets() for this, because it does
+not stop at a binary zero, and therefore there is no way of telling how many
+characters it has read, because there may be binary zeros embedded in the data.

 Arguments:
   buffer     the buffer to read into
@@ -1424,17 +1428,18 @@
 if (after_context > 0 && lastmatchnumber > 0)
   {
   int count = 0;
-  while (lastmatchrestart < endptr && count++ < after_context)
+  while (lastmatchrestart < endptr && count < after_context)
     {
     int ellength;
-    char *pp = lastmatchrestart;
+    char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
+    if (ellength == 0 && pp == main_buffer + bufsize) break;
     if (printname != NULL) fprintf(stdout, "%s-", printname);
     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
-    pp = end_of_line(pp, endptr, &ellength);
     FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
     lastmatchrestart = pp;
+    count++;
     }
-  hyphenpending = TRUE;
+  if (count > 0) hyphenpending = TRUE;
   }
 }


@@ -1770,6 +1775,33 @@


 /*************************************************
+*     Read a portion of the file into buffer     *
+*************************************************/
+
+static int
+fill_buffer(void *handle, int frtype, char *buffer, int length,
+  BOOL input_line_buffered)
+{
+#ifdef SUPPORT_LIBZ
+if (frtype == FR_LIBZ)
+  return gzread((gzFile)handle, buffer, length);
+else
+#endif
+
+#ifdef SUPPORT_LIBBZ2
+if (frtype == FR_LIBBZ2)
+  return BZ2_bzread((BZFILE *)handle, buffer, length);
+else
+#endif
+
+return (input_line_buffered ?
+  read_one_line(buffer, length, (FILE *)handle) :
+  fread(buffer, 1, length, (FILE *)handle));
+}
+
+
+
+/*************************************************
 *            Grep an individual file             *
 *************************************************/


@@ -1813,49 +1845,24 @@
 BOOL input_line_buffered = line_buffered;
 FILE *in = NULL;                    /* Ensure initialized */


-#ifdef SUPPORT_LIBZ
-gzFile ingz = NULL;
-#endif
-
-#ifdef SUPPORT_LIBBZ2
-BZFILE *inbz2 = NULL;
-#endif
-
-
/* Do the first read into the start of the buffer and set up the pointer to end
of what we have. In the case of libz, a non-zipped .gz file will be read as a
plain file. However, if a .bz2 file isn't actually bzipped, the first read will
fail. */

-(void)frtype;
-
-#ifdef SUPPORT_LIBZ
-if (frtype == FR_LIBZ)
+if (frtype != FR_LIBZ && frtype != FR_LIBBZ2)
{
- ingz = (gzFile)handle;
- bufflength = gzread (ingz, main_buffer, bufsize);
+ in = (FILE *)handle;
+ if (is_file_tty(in)) input_line_buffered = TRUE;
}
-else
-#endif

+bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
+  input_line_buffered);
+
 #ifdef SUPPORT_LIBBZ2
-if (frtype == FR_LIBBZ2)
-  {
-  inbz2 = (BZFILE *)handle;
-  bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
-  if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
-  }                                    /* without the cast it is unsigned. */
-else
+if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
 #endif


-  {
-  in = (FILE *)handle;
-  if (is_file_tty(in)) input_line_buffered = TRUE;
-  bufflength = input_line_buffered?
-    read_one_line(main_buffer, bufsize, in) :
-    fread(main_buffer, 1, bufsize, in);
-  }
-
 endptr = main_buffer + bufflength;


/* Unless binary-files=text, see if we have a binary file. This uses the same
@@ -1899,18 +1906,61 @@

/* Check to see if the line we are looking at extends right to the very end
of the buffer without a line terminator. This means the line is too long to
- handle. */
+ handle at the current buffer size. Until the buffer reaches its maximum size,
+ try doubling it and reading more data. */

   if (endlinelength == 0 && t == main_buffer + bufsize)
     {
-    fprintf(stderr, "pcre2grep: line %d%s%s is too long for the internal buffer\n"
-                    "pcre2grep: the buffer size is %d\n"
-                    "pcre2grep: use the --buffer-size option to change it\n",
-                    linenumber,
-                    (filename == NULL)? "" : " of file ",
-                    (filename == NULL)? "" : filename,
-                    bufthird);
-    return 2;
+    if (bufthird < max_bufthird)
+      {
+      char *new_buffer;
+      int new_bufthird = 2*bufthird;
+
+      if (new_bufthird > max_bufthird) new_bufthird = max_bufthird;
+      new_buffer = (char *)malloc(3*new_bufthird);
+
+      if (new_buffer == NULL)
+        {
+        fprintf(stderr,
+          "pcre2grep: line %d%s%s is too long for the internal buffer\n"
+          "pcre2grep: not enough memory to increase the buffer size to %d\n",
+          linenumber,
+          (filename == NULL)? "" : " of file ",
+          (filename == NULL)? "" : filename,
+          new_bufthird);
+        return 2;
+        }
+
+      /* Copy the data and adjust pointers to the new buffer location. */
+
+      memcpy(new_buffer, main_buffer, bufsize);
+      bufthird = new_bufthird;
+      bufsize = 3*bufthird;
+      ptr = new_buffer + (ptr - main_buffer);
+      lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer);
+      free(main_buffer);
+      main_buffer = new_buffer;
+
+      /* Read more data into the buffer and then try to find the line ending
+      again. */
+
+      bufflength += fill_buffer(handle, frtype, main_buffer + bufflength,
+        bufsize - bufflength, input_line_buffered);
+      endptr = main_buffer + bufflength;
+      continue;
+      }
+    else
+      {
+      fprintf(stderr,
+        "pcre2grep: line %d%s%s is too long for the internal buffer\n"
+        "pcre2grep: the maximum buffer size is %d\n"
+        "pcre2grep: use the --max-buffer-size option to change it\n",
+        linenumber,
+        (filename == NULL)? "" : " of file ",
+        (filename == NULL)? "" : filename,
+        bufthird);
+      return 2;
+      }
     }


   /* Extra processing for Jeffrey Friedl's debugging. */
@@ -2320,8 +2370,9 @@
         lastmatchnumber > 0 &&
         lastmatchrestart < main_buffer + bufthird)
       {
+
       do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
-      lastmatchnumber = 0;
+      lastmatchnumber = 0;  /* Indicates no after lines pending */
       }


     /* Now do the shuffle */
@@ -2329,24 +2380,8 @@
     memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
     ptr -= bufthird;


-#ifdef SUPPORT_LIBZ
-    if (frtype == FR_LIBZ)
-      bufflength = 2*bufthird +
-        gzread (ingz, main_buffer + 2*bufthird, bufthird);
-    else
-#endif
-
-#ifdef SUPPORT_LIBBZ2
-    if (frtype == FR_LIBBZ2)
-      bufflength = 2*bufthird +
-        BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
-    else
-#endif
-
-    bufflength = 2*bufthird +
-      (input_line_buffered?
-       read_one_line(main_buffer + 2*bufthird, bufthird, in) :
-       fread(main_buffer + 2*bufthird, 1, bufthird, in));
+    bufflength = 2*bufthird + fill_buffer(handle, frtype,
+      main_buffer + 2*bufthird, bufthird, input_line_buffered);
     endptr = main_buffer + bufflength;


     /* Adjust any last match point */
@@ -3427,6 +3462,12 @@


/* Get memory for the main buffer. */

+if (bufthird <= 0)
+ {
+ fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n");
+ goto EXIT2;
+ }
+
bufsize = 3*bufthird;
main_buffer = (char *)malloc(bufsize);


Modified: code/trunk/testdata/grepoutput
===================================================================
(Binary files differ)