[Pcre-svn] [519] code/trunk: Added --line-buffered to pcregr…

Página Inicial
Delete this message
Autor: Subversion repository
Data:  
Para: pcre-svn
Assunto: [Pcre-svn] [519] code/trunk: Added --line-buffered to pcregrep.
Revision: 519
          http://vcs.pcre.org/viewvc?view=rev&revision=519
Author:   ph10
Date:     2010-05-21 17:43:17 +0100 (Fri, 21 May 2010)


Log Message:
-----------
Added --line-buffered to pcregrep.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/doc/pcregrep.1
    code/trunk/pcregrep.c


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2010-05-18 15:47:01 UTC (rev 518)
+++ code/trunk/ChangeLog    2010-05-21 16:43:17 UTC (rev 519)
@@ -36,6 +36,8 @@
     use Unicode properties. (*UCP) at the start of a pattern can be used to set 
     this option. Modified pcretest to add /W to test this facility. Added
     REG_UCP to make it available via the POSIX interface.
+    
+10. Added --line-buffered to pcregrep. 



Version 8.02 19-Mar-2010

Modified: code/trunk/doc/pcregrep.1
===================================================================
--- code/trunk/doc/pcregrep.1    2010-05-18 15:47:01 UTC (rev 518)
+++ code/trunk/doc/pcregrep.1    2010-05-21 16:43:17 UTC (rev 519)
@@ -141,7 +141,7 @@
 connected to a terminal. More resources are used when colouring is enabled,
 because \fBpcregrep\fP has to search for all possible matches in a line, not
 just one, in order to colour them all.
-
+.sp
 The colour that is used can be specified by setting the environment variable
 PCREGREP_COLOUR or PCREGREP_COLOR. The value of this variable should be a
 string of two numbers, separated by a semicolon. They are copied directly into
@@ -282,6 +282,16 @@
 are being output. If not supplied, "(standard input)" is used. There is no
 short form for this option.
 .TP
+\fB--line-buffered\fP
+When this option is given, input is read and processed line by line, and the 
+output is flushed after each write. By default, input is read in large chunks, 
+unless \fBpcregrep\fP can determine that it is reading from a terminal (which 
+is currently possible only in Unix environments). Output to terminal is 
+normally automatically flushed by the operating system. This option can be 
+useful when the input or output is attached to a pipe and you do not want 
+\fBpcregrep\fP to buffer up large amounts of data. However, its use will affect 
+performance, and the \fB-M\fP (multiline) option ceases to work.
+.TP
 \fB--line-offsets\fP
 Instead of showing lines or parts of lines that match, show each match as a
 line number, the offset from the start of the line, and a length. The line
@@ -307,7 +317,8 @@
 \fBpcregrep\fP ensures that at least 8K characters or the rest of the document
 (whichever is the shorter) are available for forward matching, and similarly
 the previous 8K characters (or all the previous characters, if fewer than 8K)
-are guaranteed to be available for lookbehind assertions.
+are guaranteed to be available for lookbehind assertions. This option does not 
+work when input is read line by line (see \fP--line-buffered\fP.)
 .TP
 \fB-N\fP \fInewline-type\fP, \fB--newline=\fP\fInewline-type\fP
 The PCRE library supports five different conventions for indicating
@@ -485,6 +496,6 @@
 .rs
 .sp
 .nf
-Last updated: 13 September 2009
-Copyright (c) 1997-2009 University of Cambridge.
+Last updated: 21 May 2010
+Copyright (c) 1997-2010 University of Cambridge.
 .fi


Modified: code/trunk/pcregrep.c
===================================================================
--- code/trunk/pcregrep.c    2010-05-18 15:47:01 UTC (rev 518)
+++ code/trunk/pcregrep.c    2010-05-21 16:43:17 UTC (rev 519)
@@ -170,6 +170,7 @@
 static BOOL file_offsets = FALSE;
 static BOOL hyphenpending = FALSE;
 static BOOL invert = FALSE;
+static BOOL line_buffered = FALSE;
 static BOOL line_offsets = FALSE;
 static BOOL multiline = FALSE;
 static BOOL number = FALSE;
@@ -206,6 +207,7 @@
 #define N_NULL         (-9)
 #define N_LOFFSETS     (-10)
 #define N_FOFFSETS     (-11)
+#define N_LBUFFER      (-12)


 static option_item optionlist[] = {
   { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
@@ -228,6 +230,7 @@
   { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
   { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
   { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
+  { OP_NODATA,    N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
   { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
   { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
   { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
@@ -339,7 +342,7 @@
 }



-/************* Test stdout for being a terminal in Unix **********/
+/************* Test for a terminal in Unix **********/

static BOOL
is_stdout_tty(void)
@@ -347,7 +350,13 @@
return isatty(fileno(stdout));
}

+static BOOL
+is_file_tty(FILE *f)
+{
+return isatty(fileno(f));
+}

+
/************* Directory scanning in Win32 ***********/

/* I (Philip Hazel) have no means of testing this code. It was contributed by
@@ -459,7 +468,7 @@
}


-/************* Test stdout for being a terminal in Win32 **********/
+/************* Test for a terminal in Win32 **********/

/* I don't know how to do this; assume never */

@@ -469,7 +478,13 @@
return FALSE;
}

+static BOOL
+is_file_tty(FILE *f)
+{
+return FALSE;
+}

+
/************* Directory scanning when we can't do it ***********/

/* The type is void, and apart from isdirectory(), the functions do nothing. */
@@ -491,7 +506,7 @@
int isregfile(char *filename) { return 1; }


-/************* Test stdout for being a terminal when we can't do it **********/
+/************* Test for a terminal when we can't do it **********/

static BOOL
is_stdout_tty(void)
@@ -499,6 +514,11 @@
return FALSE;
}

+static BOOL
+is_file_tty(FILE *f)
+{
+return FALSE;
+}

#endif

@@ -527,6 +547,40 @@


 /*************************************************
+*            Read one line of input              *
+*************************************************/
+
+/* Normally, input is read using fread() into a large buffer, so many lines may 
+be read at once. However, doing this for tty input means that no output appears 
+until a lot of input has been typed. Instead, tty input is handled line by
+line. We cannot use fgets() for this, because it does not stop at a binary
+zero, and therefore there is no way of telling how many characters it has read, 
+because there may be binary zeros embedded in the data.
+
+Arguments:
+  buffer     the buffer to read into
+  length     the maximum number of characters to read
+  f          the file
+  
+Returns:     the number of characters read, zero at end of file
+*/   
+
+static int
+read_one_line(char *buffer, int length, FILE *f)
+{
+int c;
+int yield = 0;
+while ((c = fgetc(f)) != EOF)
+  {
+  buffer[yield++] = c;
+  if (c == '\n' || yield >= length) break; 
+  } 
+return yield;   
+}
+
+
+
+/*************************************************
 *             Find end of line                   *
 *************************************************/


@@ -924,6 +978,7 @@
 char *endptr;
 size_t bufflength;
 BOOL endhyphenpending = FALSE;
+BOOL input_line_buffered = line_buffered;
 FILE *in = NULL;                    /* Ensure initialized */


#ifdef SUPPORT_LIBZ
@@ -961,9 +1016,12 @@

   {
   in = (FILE *)handle;
-  bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
+  if (is_file_tty(in)) input_line_buffered = TRUE;
+  bufflength = input_line_buffered? 
+    read_one_line(buffer, 3*MBUFTHIRD, in) :
+    fread(buffer, 1, 3*MBUFTHIRD, in);
   }
-
+  
 endptr = buffer + bufflength;


 /* Loop while the current pointer is not at the end of the file. For large
@@ -1272,8 +1330,10 @@
       else FWRITE(ptr, 1, linelength + endlinelength, stdout);
       }


-    /* End of doing what has to be done for a match */
+    /* End of doing what has to be done for a match. If --line-buffered was
+    given, flush the output. */


+    if (line_buffered) fflush(stdout);
     rc = 0;    /* Had some success */


     /* Remember where the last match happened for after_context. We remember
@@ -1307,6 +1367,16 @@
   ptr += linelength + endlinelength;
   filepos += linelength + endlinelength;
   linenumber++;
+  
+  /* If input is line buffered, and the buffer is not yet full, read another 
+  line and add it into the buffer. */
+  
+  if (input_line_buffered && bufflength < sizeof(buffer))
+    {
+    int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
+    bufflength += add;
+    endptr += add; 
+    }   


   /* If we haven't yet reached the end of the file (the buffer is full), and
   the current point is in the top 1/3 of the buffer, slide the buffer down by
@@ -1342,8 +1412,10 @@
     else
 #endif


-    bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
-
+    bufflength = 2*MBUFTHIRD + 
+      (input_line_buffered? 
+       read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) : 
+       fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
     endptr = buffer + bufflength;


     /* Adjust any last match point */
@@ -1694,6 +1766,7 @@
   case N_FOFFSETS: file_offsets = TRUE; break;
   case N_HELP: help(); exit(0);
   case N_LOFFSETS: line_offsets = number = TRUE; break;
+  case N_LBUFFER: line_buffered = TRUE; break; 
   case 'c': count_only = TRUE; break;
   case 'F': process_options |= PO_FIXED_STRINGS; break;
   case 'H': filenames = FN_FORCE; break;
@@ -2215,7 +2288,7 @@
     if (cs != NULL) colour_string = cs;
     }
   }
-
+  
 /* Interpret the newline type; the default settings are Unix-like. */


if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)