[Pcre-svn] [644] code/trunk: Changes to pcregrep concerned w…

トップ ページ
このメッセージを削除
著者: Subversion repository
日付:  
To: pcre-svn
題目: [Pcre-svn] [644] code/trunk: Changes to pcregrep concerned with very long lines.
Revision: 644
          http://vcs.pcre.org/viewvc?view=rev&revision=644
Author:   ph10
Date:     2011-07-30 18:13:00 +0100 (Sat, 30 Jul 2011)


Log Message:
-----------
Changes to pcregrep concerned with very long lines.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/RunGrepTest
    code/trunk/configure.ac
    code/trunk/doc/pcregrep.1
    code/trunk/pcregrep.c
    code/trunk/testdata/grepoutput


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2011-07-29 15:56:39 UTC (rev 643)
+++ code/trunk/ChangeLog    2011-07-30 17:13:00 UTC (rev 644)
@@ -212,6 +212,28 @@
 41. It seems that Perl allows any characters other than a closing parenthesis
     to be part of the NAME in (*MARK:NAME) and other backtracking verbs. PCRE 
     has been changed to be the same. 
+    
+42. Updated configure.ac to put in more quoting round AC_LANG_PROGRAM etc. so
+    as not to get warnings when autogen.sh is called. Also changed 
+    AC_PROG_LIBTOOL (deprecated) to LT_INIT (the current macro).
+    
+43. To help people who use pcregrep to scan files containing exceedingly long 
+    lines, the following changes have been made:
+    
+    (a) The default value of the buffer size parameter has been increased from
+        8K to 20K. (A buffer three times this size is actually used.)
+        
+    (b) The default can be changed by ./configure --with-pcregrep-bufsiz when
+        PCRE is built.
+        
+    (c) A --buffer-size=n option has been added to pcregrep, to allow the size
+        to be set at run time.
+        
+    (d) Numerical values in pcregrep options can be followed by K or M, for
+        example --buffer-size=50K.
+        
+    (e) If a line being scanned overflows pcregrep's buffer, an error is now 
+        given and the return code is set to 2.



Version 8.12 15-Jan-2011

Modified: code/trunk/RunGrepTest
===================================================================
--- code/trunk/RunGrepTest    2011-07-29 15:56:39 UTC (rev 643)
+++ code/trunk/RunGrepTest    2011-07-30 17:13:00 UTC (rev 644)
@@ -388,6 +388,10 @@
 (cd $srcdir; $valgrind $pcregrep -o --colour=always "\\b01|\\b02" ./testdata/grepinput) >>testtry
 echo "RC=$?" >>testtry


+echo "---------------------------- Test 83 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep --buffer-size=100 "^a" ./testdata/grepinput3) >>testtry 2>&1
+echo "RC=$?" >>testtry
+
# Now compare the results.

$cf $srcdir/testdata/grepoutput testtry

Modified: code/trunk/configure.ac
===================================================================
--- code/trunk/configure.ac    2011-07-29 15:56:39 UTC (rev 643)
+++ code/trunk/configure.ac    2011-07-30 17:13:00 UTC (rev 644)
@@ -68,7 +68,7 @@
 # AC_PROG_CXX will return "g++" even if no c++ compiler is installed.
 # Check for that case, and just disable c++ code if g++ doesn't run.
 AC_LANG_PUSH(C++)
-AC_COMPILE_IFELSE(AC_LANG_PROGRAM([],[]),, CXX=""; CXXCP=""; CXXFLAGS="")
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])],, CXX=""; CXXCP=""; CXXFLAGS="")
 AC_LANG_POP


# Check for a 64-bit integer type
@@ -76,7 +76,7 @@

AC_PROG_INSTALL
AC_LIBTOOL_WIN32_DLL
-AC_PROG_LIBTOOL
+LT_INIT
AC_PROG_LN_S

 PCRE_MAJOR="pcre_major"
@@ -190,6 +190,12 @@
                              [link pcregrep with libbz2 to handle .bz2 files]),
               , enable_pcregrep_libbz2=no)


+# Handle --with-pcregrep-bufsize=N
+AC_ARG_WITH(pcregrep-bufsize,
+              AS_HELP_STRING([--with-pcregrep-bufsize=N],
+                             [pcregrep buffer size (default=20480)]),
+              , with_pcregrep_bufsize=20480)
+
 # Handle --enable-pcretest-libreadline
 AC_ARG_ENABLE(pcretest-libreadline,
               AS_HELP_STRING([--enable-pcretest-libreadline],
@@ -320,11 +326,11 @@
   LDFLAGS="$OLD_LDFLAGS -Wl,$flag"
   # We try to run the linker with this new ld flag.  If the link fails,
   # we give up and remove the new flag from LDFLAGS.
-  AC_LINK_IFELSE(AC_LANG_PROGRAM([namespace pcrecpp {
+  AC_LINK_IFELSE([AC_LANG_PROGRAM([namespace pcrecpp {
                                     class RE { static int no_arg; };
                                     int RE::no_arg;
                                   }],
-                                 []),
+                                 [])],
                  [AC_MSG_RESULT([yes]);
                   EXTRA_LIBPCRECPP_LDFLAGS="$EXTRA_LIBPCRECPP_LDFLAGS -Wl,$flag";
                   break;],
@@ -361,8 +367,8 @@
   else
     include=stdlib.h
   fi
-  AC_COMPILE_IFELSE(AC_LANG_PROGRAM([#include <$include>],
-                                    [char* e; return $fn("100", &e, 10)]),
+  AC_COMPILE_IFELSE([AC_LANG_PROGRAM([#include <$include>],
+                                    [char* e; return $fn("100", &e, 10)])],
                     [AC_MSG_RESULT(yes)
                      AC_DEFINE_UNQUOTED(HAVE_`echo $fn | tr abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ`, 1,
                                         [Define to 1 if you have `$fn'.])
@@ -434,11 +440,11 @@
 AC_MSG_CHECKING([for libbz2])
 OLD_LIBS="$LIBS"
 LIBS="$LIBS -lbz2"
-AC_LINK_IFELSE( AC_LANG_PROGRAM([[
+AC_LINK_IFELSE([AC_LANG_PROGRAM([[
 #ifdef HAVE_BZLIB_H
 #include <bzlib.h>
 #endif]],
-[[return (int)BZ2_bzopen("conftest", "rb");]]),
+[[return (int)BZ2_bzopen("conftest", "rb");]])],
 [AC_MSG_RESULT([yes]);HAVE_LIBBZ2=1; break;],
 AC_MSG_RESULT([no]))
 LIBS="$OLD_LIBS"
@@ -500,6 +506,18 @@
     able to handle .bz2 files.])
 fi


+if test $with_pcregrep_bufsize -lt 8192 ; then
+  with_pcregrep_bufsize="8192"
+fi
+   
+AC_DEFINE_UNQUOTED([PCREGREP_BUFSIZE], [$with_pcregrep_bufsize], [
+  The value of PCREGREP_BUFSIZE determines the size of buffer used by
+  pcregrep to hold parts of the file it is searching. On systems that
+  support it, "configure" can be used to override the default, which is
+  8192. This is also the minimum value. The actual amount of memory used by
+  pcregrep is three times this number, because it allows for the buffering of
+  "before" and "after" lines.])
+
 if test "$enable_pcretest_libreadline" = "yes"; then
   AC_DEFINE([SUPPORT_LIBREADLINE], [], [
     Define to allow pcretest to be linked with libreadline.])
@@ -726,6 +744,7 @@
     Match limit recursion ........... : ${with_match_limit_recursion}
     Build shared libs ............... : ${enable_shared}
     Build static libs ............... : ${enable_static}
+    Buffer size for pcregrep ........ : ${with_pcregrep_bufsize} 
     Link pcregrep with libz ......... : ${enable_pcregrep_libz}
     Link pcregrep with libbz2 ....... : ${enable_pcregrep_libbz2}
     Link pcretest with libreadline .. : ${enable_pcretest_libreadline}


Modified: code/trunk/doc/pcregrep.1
===================================================================
--- code/trunk/doc/pcregrep.1    2011-07-29 15:56:39 UTC (rev 643)
+++ code/trunk/doc/pcregrep.1    2011-07-30 17:13:00 UTC (rev 644)
@@ -46,12 +46,19 @@
 possible to search for patterns that span line boundaries. What defines a line
 boundary is controlled by the \fB-N\fP (\fB--newline\fP) option.
 .P
-Patterns are limited to 8K or BUFSIZ characters, whichever is the greater.
-BUFSIZ is defined in \fB<stdio.h>\fP. When there is more than one pattern
-(specified by the use of \fB-e\fP and/or \fB-f\fP), each pattern is applied to
-each line in the order in which they are defined, except that all the \fB-e\fP
-patterns are tried before the \fB-f\fP patterns.
+The amount of memory used for buffering files that are being scanned is 
+controlled by a parameter that can be set by the \fB--buffer-size\fP option.
+The default value for this parameter is specified when \fBpcregrep\fP is built, 
+with the default default being 20K. A block of memory three times this size is 
+used (to allow for buffering "before" and "after" lines). An error occurs if a 
+line overflows the buffer.
 .P
+Patterns are limited to 8K or BUFSIZ bytes, whichever is the greater. BUFSIZ is
+defined in \fB<stdio.h>\fP. When there is more than one pattern (specified by
+the use of \fB-e\fP and/or \fB-f\fP), each pattern is applied to each line in
+the order in which they are defined, except that all the \fB-e\fP patterns are
+tried before the \fB-f\fP patterns.
+.P
 By default, as soon as one pattern matches (or fails to match when \fB-v\fP is
 used), no further patterns are considered. However, if \fB--colour\fP (or
 \fB--color\fP) is used to colour the matching substrings, or if
@@ -86,16 +93,18 @@
 appropriate support is not present, files are treated as plain text. The
 standard input is always so treated.
 .
+.
 .SH OPTIONS
 .rs
 .sp
 The order in which some of the options appear can affect the output. For
 example, both the \fB-h\fP and \fB-l\fP options affect the printing of file
 names. Whichever comes later in the command line will be the one that takes
-effect.
+effect. Numerical values for options may be followed by K or M, to signify 
+multiplication by 1024 or 1024*1024 respectively.
 .TP 10
 \fB--\fP
-This terminate the list of options. It is useful if the next item on the
+This terminates the list of options. It is useful if the next item on the
 command line starts with a hyphen but is not an option. This allows for the
 processing of patterns and filenames that start with hyphens.
 .TP
@@ -115,6 +124,10 @@
 of \fInumber\fP is expected to be relatively small. However, \fBpcregrep\fP
 guarantees to have up to 8K of preceding text available for context output.
 .TP
+\fB--buffer-size=\fP\fInumber\fP
+Set the parameter that controls how much memory is used for buffering files
+that are being scanned.
+.TP
 \fB-C\fP \fInumber\fP, \fB--context=\fP\fInumber\fP
 Output \fInumber\fP lines of context both before and after each matching line.
 This is equivalent to setting both \fB-A\fP and \fB-B\fP to the same value.
@@ -530,10 +543,10 @@
 .rs
 .sp
 Exit status is 0 if any matches were found, 1 if no matches were found, and 2
-for syntax errors and non-existent or inaccessible files (even if matches were
-found in other files) or too many matching errors. Using the \fB-s\fP option to
-suppress error messages about inaccessible files does not affect the return
-code.
+for syntax errors, overlong lines, non-existent or inaccessible files (even if
+matches were found in other files) or too many matching errors. Using the
+\fB-s\fP option to suppress error messages about inaccessible files does not
+affect the return code.
 .
 .
 .SH "SEE ALSO"
@@ -556,6 +569,6 @@
 .rs
 .sp
 .nf
-Last updated: 07 May 2011
+Last updated: 30 July 2011
 Copyright (c) 1997-2011 University of Cambridge.
 .fi


Modified: code/trunk/pcregrep.c
===================================================================
--- code/trunk/pcregrep.c    2011-07-29 15:56:39 UTC (rev 643)
+++ code/trunk/pcregrep.c    2011-07-30 17:13:00 UTC (rev 644)
@@ -74,9 +74,9 @@
 #define OFFSET_SIZE 99


#if BUFSIZ > 8192
-#define MBUFTHIRD BUFSIZ
+#define PATBUFSIZE BUFSIZ
#else
-#define MBUFTHIRD 8192
+#define PATBUFSIZE 8192
#endif

 /* Values for the "filenames" variable, which specifies options for file name
@@ -135,6 +135,7 @@
 static char *colour_option = NULL;
 static char *dee_option = NULL;
 static char *DEE_option = NULL;
+static char *main_buffer = NULL;
 static char *newline = NULL;
 static char *pattern_filename = NULL;
 static char *stdin_name = (char *)"(standard input)";
@@ -159,6 +160,8 @@
 static int after_context = 0;
 static int before_context = 0;
 static int both_context = 0;
+static int bufthird = PCREGREP_BUFSIZE;
+static int bufsize = 3*PCREGREP_BUFSIZE;
 static int dee_action = dee_READ;
 static int DEE_action = DEE_READ;
 static int error_count = 0;
@@ -214,12 +217,14 @@
 #define N_LBUFFER      (-12)
 #define N_M_LIMIT      (-13)
 #define N_M_LIMIT_REC  (-14)
+#define N_BUFSIZE      (-15)


 static option_item optionlist[] = {
   { OP_NODATA,     N_NULL,   NULL,              "",              "  terminate options" },
   { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
   { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
   { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
+  { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
   { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
   { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
   { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
@@ -634,7 +639,8 @@
   endptr    end of available data
   lenptr    where to put the length of the eol sequence


-Returns:    pointer to the last byte of the line, including the newline byte(s)
+Returns:    pointer after the last byte of the line, 
+            including the newline byte(s)
 */


static char *
@@ -989,7 +995,7 @@
*************************************************/

 /* This is called from grep_or_recurse() below. It uses a buffer that is three
-times the value of MBUFTHIRD. The matching point is never allowed to stray into
+times the value of bufthird. The matching point is never allowed to stray into
 the top third of the buffer, thus keeping more of the file available for
 context printing or for multiline scanning. For large files, the pointer will
 be in the middle third most of the time, so the bottom third is available for
@@ -1000,17 +1006,19 @@
                the gzFile pointer when reading is via libz
                the BZFILE pointer when reading is via libbz2
   frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
+  filename     the file name or NULL (for errors)
   printname    the file name if it is to be printed for each match
                or NULL if the file name is not to be printed
                it cannot be NULL if filenames[_nomatch]_only is set


 Returns:       0 if there was at least one match
                1 otherwise (no matches)
-               2 if there is a read error on a .bz2 file
+               2 if an overlong line is encountered 
+               3 if there is a read error on a .bz2 file
 */


 static int
-pcregrep(void *handle, int frtype, char *printname)
+pcregrep(void *handle, int frtype, char *filename, char *printname)
 {
 int rc = 1;
 int linenumber = 1;
@@ -1019,8 +1027,7 @@
 int filepos = 0;
 int offsets[OFFSET_SIZE];
 char *lastmatchrestart = NULL;
-char buffer[3*MBUFTHIRD];
-char *ptr = buffer;
+char *ptr = main_buffer;
 char *endptr;
 size_t bufflength;
 BOOL endhyphenpending = FALSE;
@@ -1045,7 +1052,7 @@
 if (frtype == FR_LIBZ)
   {
   ingz = (gzFile)handle;
-  bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
+  bufflength = gzread (ingz, main_buffer, bufsize);
   }
 else
 #endif
@@ -1054,7 +1061,7 @@
 if (frtype == FR_LIBBZ2)
   {
   inbz2 = (BZFILE *)handle;
-  bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
+  bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
   if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
   }                                    /* without the cast it is unsigned. */
 else
@@ -1064,11 +1071,11 @@
   in = (FILE *)handle;
   if (is_file_tty(in)) input_line_buffered = TRUE;
   bufflength = input_line_buffered?
-    read_one_line(buffer, 3*MBUFTHIRD, in) :
-    fread(buffer, 1, 3*MBUFTHIRD, in);
+    read_one_line(main_buffer, bufsize, in) :
+    fread(main_buffer, 1, bufsize, in);
   }


-endptr = buffer + bufflength;
+endptr = main_buffer + bufflength;

 /* Loop while the current pointer is not at the end of the file. For large
 files, endptr will be at the end of the buffer when we are in the middle of the
@@ -1096,6 +1103,20 @@
   t = end_of_line(t, endptr, &endlinelength);
   linelength = t - ptr - endlinelength;
   length = multiline? (size_t)(endptr - ptr) : linelength;
+  
+  /* Check to see if the line we are looking at extends right to the very end 
+  of the buffer without a line terminator. This means the line is too long to 
+  handle. */
+  
+  if (endlinelength == 0 && t == main_buffer + bufsize)
+    {
+    fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
+                    "pcregrep: check the --buffer_size option\n",
+                    linenumber, 
+                    (filename == NULL)? "" : " of file ",
+                    (filename == NULL)? "" : filename);
+    return 2;
+    }                      


/* Extra processing for Jeffrey Friedl's debugging. */

@@ -1281,11 +1302,11 @@
         int linecount = 0;
         char *p = ptr;


-        while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
+        while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
                linecount < before_context)
           {
           linecount++;
-          p = previous_line(p, buffer);
+          p = previous_line(p, main_buffer);
           }


         if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
@@ -1425,9 +1446,9 @@
   /* If input is line buffered, and the buffer is not yet full, read another
   line and add it into the buffer. */


-  if (input_line_buffered && bufflength < sizeof(buffer))
+  if (input_line_buffered && bufflength < bufsize)
     {
-    int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
+    int add = read_one_line(ptr, bufsize - (ptr - main_buffer), in);
     bufflength += add;
     endptr += add;
     }
@@ -1437,11 +1458,11 @@
   1/3 and refill it. Before we do this, if some unprinted "after" lines are
   about to be lost, print them. */


-  if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
+  if (bufflength >= bufsize && ptr > main_buffer + 2*bufthird)
     {
     if (after_context > 0 &&
         lastmatchnumber > 0 &&
-        lastmatchrestart < buffer + MBUFTHIRD)
+        lastmatchrestart < main_buffer + bufthird)
       {
       do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
       lastmatchnumber = 0;
@@ -1449,32 +1470,32 @@


     /* Now do the shuffle */


-    memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
-    ptr -= MBUFTHIRD;
+    memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
+    ptr -= bufthird;


 #ifdef SUPPORT_LIBZ
     if (frtype == FR_LIBZ)
-      bufflength = 2*MBUFTHIRD +
-        gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
+      bufflength = 2*bufthird +
+        gzread (ingz, main_buffer + 2*bufthird, bufthird);
     else
 #endif


 #ifdef SUPPORT_LIBBZ2
     if (frtype == FR_LIBBZ2)
-      bufflength = 2*MBUFTHIRD +
-        BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
+      bufflength = 2*bufthird +
+        BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
     else
 #endif


-    bufflength = 2*MBUFTHIRD +
+    bufflength = 2*bufthird +
       (input_line_buffered?
-       read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
-       fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
-    endptr = buffer + bufflength;
+       read_one_line(main_buffer + 2*bufthird, bufthird, in) :
+       fread(main_buffer + 2*bufthird, 1, bufthird, in));
+    endptr = main_buffer + bufflength;


     /* Adjust any last match point */


-    if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
+    if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
     }
   }     /* Loop through the whole file */


@@ -1554,7 +1575,7 @@

 if (strcmp(pathname, "-") == 0)
   {
-  return pcregrep(stdin, FR_PLAIN,
+  return pcregrep(stdin, FR_PLAIN, stdin_name,
     (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
       stdin_name : NULL);
   }
@@ -1686,7 +1707,7 @@


/* Now grep the file */

-rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
+rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
(filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);

/* Close in an appropriate manner. */
@@ -1697,14 +1718,14 @@
else
#endif

-/* If it is a .bz2 file and the result is 2, it means that the first attempt to
+/* If it is a .bz2 file and the result is 3, it means that the first attempt to
read failed. If the error indicates that the file isn't in fact bzipped, try
again as a normal file. */

 #ifdef SUPPORT_LIBBZ2
 if (frtype == FR_LIBBZ2)
   {
-  if (rc == 2)
+  if (rc == 3)
     {
     int errnum;
     const char *err = BZ2_bzerror(inbz2, &errnum);
@@ -1716,6 +1737,7 @@
     else if (!silent)
       fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
         pathname, err);
+    rc = 2;    /* The normal "something went wrong" code */     
     }
   BZ2_bzclose(inbz2);
   }
@@ -1808,9 +1830,12 @@
   printf("%.*s%s\n", n, "                     ", op->help_text);
   }


-printf("\nWhen reading patterns from a file instead of using a command line option,\n");
+printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
+printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
+printf("When reading patterns from a file instead of using a command line option,\n");
printf("trailing white space is removed and blank lines are ignored.\n");
-printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
+printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
+ MAX_PATTERN_COUNT, PATBUFSIZE);

printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
@@ -1912,7 +1937,7 @@
static BOOL
compile_single_pattern(char *pattern, int options, char *filename, int count)
{
-char buffer[MBUFTHIRD + 16];
+char buffer[PATBUFSIZE];
const char *error;
int errptr;

@@ -1923,7 +1948,7 @@
return FALSE;
}

-sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
+sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
   suffix[process_options]);
 pattern_list[pattern_count] =
   pcre_compile(buffer, options, &error, &errptr, pcretables);
@@ -1982,7 +2007,7 @@
 if ((process_options & PO_FIXED_STRINGS) != 0)
   {
   char *eop = pattern + strlen(pattern);
-  char buffer[MBUFTHIRD];
+  char buffer[PATBUFSIZE];
   for(;;)
     {
     int ellength;
@@ -2294,6 +2319,16 @@
     while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
     while (isdigit((unsigned char)(*endptr)))
       n = n * 10 + (int)(*endptr++ - '0');
+    if (toupper(*endptr) == 'K')
+      {
+      n *= 1024; 
+      endptr++; 
+      }  
+    else if (toupper(*endptr) == 'M')
+      {
+      n *= 1024*1024; 
+      endptr++; 
+      }  
     if (*endptr != 0)
       {
       if (longop)
@@ -2460,12 +2495,14 @@
   }
 #endif


-/* Get memory to store the pattern and hints lists. */
+/* Get memory for the main buffer, and to store the pattern and hints lists. */

+bufsize = 3*bufthird;
+main_buffer = (char *)malloc(bufsize);
pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));

-if (pattern_list == NULL || hints_list == NULL)
+if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
{
fprintf(stderr, "pcregrep: malloc failed\n");
goto EXIT2;
@@ -2497,7 +2534,7 @@
int linenumber = 0;
FILE *f;
char *filename;
- char buffer[MBUFTHIRD];
+ char buffer[PATBUFSIZE];

   if (strcmp(pattern_filename, "-") == 0)
     {
@@ -2516,7 +2553,7 @@
     filename = pattern_filename;
     }


-  while (fgets(buffer, MBUFTHIRD, f) != NULL)
+  while (fgets(buffer, PATBUFSIZE, f) != NULL)
     {
     char *s = buffer + (int)strlen(buffer);
     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
@@ -2628,7 +2665,8 @@


 if (i >= argc)
   {
-  rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
+  rc = pcregrep(stdin, FR_PLAIN, stdin_name, 
+    (filenames > FN_DEFAULT)? stdin_name : NULL);
   goto EXIT;
   }


@@ -2648,6 +2686,7 @@
}

EXIT:
+if (main_buffer != NULL) free(main_buffer);
if (pattern_list != NULL)
{
for (i = 0; i < pattern_count; i++) free(pattern_list[i]);

Modified: code/trunk/testdata/grepoutput
===================================================================
--- code/trunk/testdata/grepoutput    2011-07-29 15:56:39 UTC (rev 643)
+++ code/trunk/testdata/grepoutput    2011-07-30 17:13:00 UTC (rev 644)
@@ -634,3 +634,7 @@
 ---------------------------- Test 82 -----------------------------
 ?[1;31m01?[00m
 RC=0
+---------------------------- Test 83 -----------------------------
+pcregrep: line 4 of file ./testdata/grepinput3 is too long for the internal buffer
+pcregrep: check the --buffer_size option
+RC=2