Revision: 378
http://vcs.pcre.org/viewvc?view=rev&revision=378
Author: ph10
Date: 2009-03-01 14:13:34 +0000 (Sun, 01 Mar 2009)
Log Message:
-----------
Make pcregrep with --colour show all matches in a line in colour.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/RunGrepTest
code/trunk/doc/pcregrep.1
code/trunk/pcregrep.c
code/trunk/testdata/grepinputv
code/trunk/testdata/grepinputx
code/trunk/testdata/grepoutput
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2009-03-01 12:07:19 UTC (rev 377)
+++ code/trunk/ChangeLog 2009-03-01 14:13:34 UTC (rev 378)
@@ -24,9 +24,13 @@
lines. This is not true; no spaces are inserted. I have also clarified the
wording for the --colour (or --color) option.
-5. When --colour was used with -o, the list of matching strings was not
- coloured; this is different to GNU grep, so I have changed it to be the
- same.
+5. In pcregrep, when --colour was used with -o, the list of matching strings
+ was not coloured; this is different to GNU grep, so I have changed it to be
+ the same.
+
+6. When --colo(u)r was used in pcregrep, only the first matching substring in
+ each matching line was coloured. Now it goes on to look for further matches
+ of any of the test patterns, which is the same behaviour as GNU grep.
Version 7.8 05-Sep-08
Modified: code/trunk/RunGrepTest
===================================================================
--- code/trunk/RunGrepTest 2009-03-01 12:07:19 UTC (rev 377)
+++ code/trunk/RunGrepTest 2009-03-01 14:13:34 UTC (rev 378)
@@ -214,6 +214,9 @@
echo "---------------------------- Test 53 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --line-offsets 'before|zero|after' ./testdata/grepinput) >>testtry
+echo "---------------------------- Test 54 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist --color=always ./testdata/grepinputx) >>testtry
+
# Now compare the results.
$cf $srcdir/testdata/grepoutput testtry
Modified: code/trunk/doc/pcregrep.1
===================================================================
--- code/trunk/doc/pcregrep.1 2009-03-01 12:07:19 UTC (rev 377)
+++ code/trunk/doc/pcregrep.1 2009-03-01 14:13:34 UTC (rev 378)
@@ -119,12 +119,16 @@
a pattern should be coloured in the output. By default, the output is not
coloured. The value (which is optional, see above) may be "never", "always", or
"auto". In the latter case, colouring happens only if the standard output is
-connected to a terminal. The colour that is used can be specified by setting
-the environment variable PCREGREP_COLOUR or PCREGREP_COLOR. The value of this
-variable should be a string of two numbers, separated by a semicolon. They are
-copied directly into the control string for setting colour on a terminal, so it
-is your responsibility to ensure that they make sense. If neither of the
-environment variables is set, the default is "1;31", which gives red.
+connected to a terminal. More resources are used when colouring is enabled,
+because \fBpcregrep\fP has to search for all possible matches in a line, not
+just one, in order to colour them all.
+
+The colour that is used can be specified by setting the environment variable
+PCREGREP_COLOUR or PCREGREP_COLOR. The value of this variable should be a
+string of two numbers, separated by a semicolon. They are copied directly into
+the control string for setting colour on a terminal, so it is your
+responsibility to ensure that they make sense. If neither of the environment
+variables is set, the default is "1;31", which gives red.
.TP
\fB-D\fP \fIaction\fP, \fB--devices=\fP\fIaction\fP
If an input path is not a regular file or a directory, "action" specifies how
Modified: code/trunk/pcregrep.c
===================================================================
--- code/trunk/pcregrep.c 2009-03-01 12:07:19 UTC (rev 377)
+++ code/trunk/pcregrep.c 2009-03-01 14:13:34 UTC (rev 378)
@@ -71,6 +71,7 @@
typedef int BOOL;
#define MAX_PATTERN_COUNT 100
+#define OFFSET_SIZE 99
#if BUFSIZ > 8192
#define MBUFTHIRD BUFSIZ
@@ -821,6 +822,60 @@
/*************************************************
+* Apply patterns to subject till one matches *
+*************************************************/
+
+/* This function is called to run through all patterns, looking for a match. It
+is used multiple times for the same subject when colouring is enabled, in order
+to find all possible matches.
+
+Arguments:
+ matchptr the start of the subject
+ length the length of the subject to match
+ offsets the offets vector to fill in
+ mrc address of where to put the result of pcre_exec()
+
+Returns: TRUE if there was a match
+ FALSE if there was no match
+ invert if there was a non-fatal error
+*/
+
+static BOOL
+match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
+{
+int i;
+for (i = 0; i < pattern_count; i++)
+ {
+ *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
+ offsets, OFFSET_SIZE);
+ if (*mrc >= 0) return TRUE;
+ if (*mrc == PCRE_ERROR_NOMATCH) continue;
+ fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
+ if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
+ fprintf(stderr, "this text:\n");
+ fwrite(matchptr, 1, length, stderr); /* In case binary zero included */
+ fprintf(stderr, "\n");
+ if (error_count == 0 &&
+ (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
+ {
+ fprintf(stderr, "pcregrep: error %d means that a resource limit "
+ "was exceeded\n", *mrc);
+ fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
+ }
+ if (error_count++ > 20)
+ {
+ fprintf(stderr, "pcregrep: too many errors - abandoned\n");
+ exit(2);
+ }
+ return invert; /* No more matching; don't show the line again */
+ }
+
+return FALSE; /* No match, no errors */
+}
+
+
+
+/*************************************************
* Grep an individual file *
*************************************************/
@@ -853,7 +908,7 @@
int lastmatchnumber = 0;
int count = 0;
int filepos = 0;
-int offsets[99];
+int offsets[OFFSET_SIZE];
char *lastmatchrestart = NULL;
char buffer[3*MBUFTHIRD];
char *ptr = buffer;
@@ -909,9 +964,9 @@
while (ptr < endptr)
{
- int i, endlinelength;
+ int endlinelength;
int mrc = 0;
- BOOL match = FALSE;
+ BOOL match;
char *matchptr = ptr;
char *t = ptr;
size_t length, linelength;
@@ -919,9 +974,10 @@
/* At this point, ptr is at the start of a line. We need to find the length
of the subject string to pass to pcre_exec(). In multiline mode, it is the
length remainder of the data in the buffer. Otherwise, it is the length of
- the next line. After matching, we always advance by the length of the next
- line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
- that any match is constrained to be in the first line. */
+ the next line, excluding the terminating newline. After matching, we always
+ advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
+ option is used for compiling, so that any match is constrained to be in the
+ first line. */
t = end_of_line(t, endptr, &endlinelength);
linelength = t - ptr - endlinelength;
@@ -936,6 +992,7 @@
#include <time.h>
struct timeval start_time, end_time;
struct timezone dummy;
+ int i;
if (jfriedl_XT)
{
@@ -961,7 +1018,7 @@
for (i = 0; i < jfriedl_XR; i++)
- match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
+ match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, OFFSET_SIZE) >= 0);
if (gettimeofday(&end_time, &dummy) != 0)
perror("bad gettimeofday");
@@ -980,38 +1037,12 @@
ONLY_MATCHING_RESTART:
- /* Run through all the patterns until one matches. Note that we don't include
- the final newline in the subject string. */
+ /* Run through all the patterns until one matches or there is an error other
+ than NOMATCH. This code is in a subroutine so that it can be re-used for
+ finding subsequent matches when colouring matched lines. */
+
+ match = match_patterns(matchptr, length, offsets, &mrc);
- for (i = 0; i < pattern_count; i++)
- {
- mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
- offsets, 99);
- if (mrc >= 0) { match = TRUE; break; }
- if (mrc != PCRE_ERROR_NOMATCH)
- {
- fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
- if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
- fprintf(stderr, "this line:\n");
- fwrite(matchptr, 1, linelength, stderr); /* In case binary zero included */
- fprintf(stderr, "\n");
- if (error_count == 0 &&
- (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
- {
- fprintf(stderr, "pcregrep: error %d means that a resource limit "
- "was exceeded\n", mrc);
- fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
- }
- if (error_count++ > 20)
- {
- fprintf(stderr, "pcregrep: too many errors - abandoned\n");
- exit(2);
- }
- match = invert; /* No more matching; don't show the line again */
- break;
- }
- }
-
/* If it's a match or a not-match (as required), do what's wanted. */
if (match != invert)
@@ -1201,17 +1232,33 @@
else
#endif
- /* We have to split the line(s) up if colouring. */
+ /* We have to split the line(s) up if colouring, and search for further
+ matches. */
if (do_colour)
{
+ int last_offset = 0;
fwrite(ptr, 1, offsets[0], stdout);
fprintf(stdout, "%c[%sm", 0x1b, colour_string);
fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
fprintf(stdout, "%c[00m", 0x1b);
- fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
+ for (;;)
+ {
+ last_offset += offsets[1];
+ matchptr += offsets[1];
+ length -= offsets[1];
+ if (!match_patterns(matchptr, length, offsets, &mrc)) break;
+ fwrite(matchptr, 1, offsets[0], stdout);
+ fprintf(stdout, "%c[%sm", 0x1b, colour_string);
+ fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
+ fprintf(stdout, "%c[00m", 0x1b);
+ }
+ fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset,
stdout);
}
+
+ /* Not colouring; no need to search for further matches */
+
else fwrite(ptr, 1, linelength + endlinelength, stdout);
}
Modified: code/trunk/testdata/grepinputv
===================================================================
--- code/trunk/testdata/grepinputv 2009-03-01 12:07:19 UTC (rev 377)
+++ code/trunk/testdata/grepinputv 2009-03-01 14:13:34 UTC (rev 378)
@@ -1,3 +1,4 @@
The quick brown
fox jumps
over the lazy dog.
+This time it jumps and jumps and jumps.
Modified: code/trunk/testdata/grepinputx
===================================================================
--- code/trunk/testdata/grepinputx 2009-03-01 12:07:19 UTC (rev 377)
+++ code/trunk/testdata/grepinputx 2009-03-01 14:13:34 UTC (rev 378)
@@ -39,4 +39,5 @@
nineteen
twenty
+This line contains pattern not on a line by itself.
This is the last line of this file.
Modified: code/trunk/testdata/grepoutput
===================================================================
--- code/trunk/testdata/grepoutput 2009-03-01 12:07:19 UTC (rev 377)
+++ code/trunk/testdata/grepoutput 2009-03-01 14:13:34 UTC (rev 378)
@@ -18,6 +18,7 @@
./testdata/grepinput:608:Check up on PATTERN near the end.
./testdata/grepinputx:3:Here is the pattern again.
./testdata/grepinputx:5:Pattern
+./testdata/grepinputx:42:This line contains pattern not on a line by itself.
---------------------------- Test 6 ------------------------------
7:PATTERN at the start of a line.
8:In the middle of a line, PATTERN appears.
@@ -25,6 +26,7 @@
608:Check up on PATTERN near the end.
3:Here is the pattern again.
5:Pattern
+42:This line contains pattern not on a line by itself.
---------------------------- Test 7 ------------------------------
./testdata/grepinput
./testdata/grepinputx
@@ -75,12 +77,13 @@
39:nineteen
40:twenty
41:
-42:This is the last line of this file.
+43:This is the last line of this file.
---------------------------- Test 12 -----------------------------
Pattern
---------------------------- Test 13 -----------------------------
Here is the pattern again.
That time it was on a line by itself.
+This line contains pattern not on a line by itself.
---------------------------- Test 14 -----------------------------
./testdata/grepinputx:To pat or not to pat, that is the question.
---------------------------- Test 15 -----------------------------
@@ -157,6 +160,7 @@
nineteen
twenty
+This line contains pattern not on a line by itself.
This is the last line of this file.
---------------------------- Test 25 -----------------------------
15-
@@ -207,6 +211,7 @@
nineteen
twenty
+This line contains pattern not on a line by itself.
This is the last line of this file.
---------------------------- Test 27 -----------------------------
four
@@ -227,6 +232,7 @@
nineteen
twenty
+This line contains pattern not on a line by itself.
This is the last line of this file.
---------------------------- Test 28 -----------------------------
14-of lines all by themselves.
@@ -279,6 +285,7 @@
nineteen
twenty
+This line contains pattern not on a line by itself.
This is the last line of this file.
---------------------------- Test 30 -----------------------------
./testdata/grepinput-4-features should be added at the end, because some of the tests involve the
@@ -299,6 +306,11 @@
./testdata/grepinputx:3:Here is the pattern again.
./testdata/grepinputx-4-
./testdata/grepinputx:5:Pattern
+--
+./testdata/grepinputx-39-nineteen
+./testdata/grepinputx-40-twenty
+./testdata/grepinputx-41-
+./testdata/grepinputx:42:This line contains pattern not on a line by itself.
---------------------------- Test 31 -----------------------------
./testdata/grepinput:7:PATTERN at the start of a line.
./testdata/grepinput:8:In the middle of a line, PATTERN appears.
@@ -317,6 +329,9 @@
./testdata/grepinputx-6-That time it was on a line by itself.
./testdata/grepinputx-7-
./testdata/grepinputx-8-To pat or not to pat, that is the question.
+--
+./testdata/grepinputx:42:This line contains pattern not on a line by itself.
+./testdata/grepinputx-43-This is the last line of this file.
---------------------------- Test 32 -----------------------------
./testdata/grepinputx
---------------------------- Test 33 -----------------------------
@@ -336,11 +351,11 @@
aaaaa2
RC=0
======== STDERR ========
-pcregrep: pcre_exec() error -8 while matching this line:
+pcregrep: pcre_exec() error -8 while matching this text:
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
pcregrep: error -8 means that a resource limit was exceeded
pcregrep: check your regex for nested unlimited loops
-pcregrep: pcre_exec() error -8 while matching this line:
+pcregrep: pcre_exec() error -8 while matching this text:
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
---------------------------- Test 38 ------------------------------
This line contains a binary zero here >?< for testing.
@@ -388,8 +403,10 @@
---------------------------- Test 49 ------------------------------
---------------------------- Test 50 ------------------------------
over the lazy dog.
+This time it jumps and jumps and jumps.
---------------------------- Test 51 ------------------------------
fox ?[1;31mjumps?[00m
+This time it ?[1;31mjumps?[00m and ?[1;31mjumps?[00m and ?[1;31mjumps?[00m.
---------------------------- Test 52 ------------------------------
36972,6
36990,4
@@ -402,3 +419,7 @@
596:28,4
597:15,5
597:32,4
+---------------------------- Test 54 -----------------------------
+Here is the ?[1;31mpattern?[00m again.
+That time it was on a ?[1;31mline by itself?[00m.
+This line contains ?[1;31mpattern?[00m not on a ?[1;31mline by itself?[00m.