ph10 2007/02/07 12:23:35 GMT
Modified files:
exim-doc/doc-txt ChangeLog NewStuff
exim-src ACKNOWLEDGMENTS
exim-src/src exigrep.src
Log:
Jori Hamalainen's patch to speed up exigrep, and fix two typos.
Revision Changes Path
1.475 +3 -0 exim/exim-doc/doc-txt/ChangeLog
1.140 +6 -0 exim/exim-doc/doc-txt/NewStuff
1.73 +2 -1 exim/exim-src/ACKNOWLEDGMENTS
1.5 +24 -15 exim/exim-src/src/exigrep.src
Index: ChangeLog
===================================================================
RCS file: /home/cvs/exim/exim-doc/doc-txt/ChangeLog,v
retrieving revision 1.474
retrieving revision 1.475
diff -u -r1.474 -r1.475
--- ChangeLog 7 Feb 2007 11:24:56 -0000 1.474
+++ ChangeLog 7 Feb 2007 12:23:35 -0000 1.475
@@ -1,4 +1,4 @@
-$Cambridge: exim/exim-doc/doc-txt/ChangeLog,v 1.474 2007/02/07 11:24:56 ph10 Exp $
+$Cambridge: exim/exim-doc/doc-txt/ChangeLog,v 1.475 2007/02/07 12:23:35 ph10 Exp $
Change log file for Exim from version 4.21
-------------------------------------------
@@ -90,6 +90,9 @@
PH/21 Long custom messages for fakedefer and fakereject are now split up
into multiline reponses in the same way that messages for "deny" and
other ACL rejections are.
+
+PH/22 Applied Jori Hamalainen's speed-up changes and typo fixes to exigrep,
+ with slight modification.
Exim version 4.66
Index: NewStuff
===================================================================
RCS file: /home/cvs/exim/exim-doc/doc-txt/NewStuff,v
retrieving revision 1.139
retrieving revision 1.140
diff -u -r1.139 -r1.140
--- NewStuff 6 Feb 2007 14:49:13 -0000 1.139
+++ NewStuff 7 Feb 2007 12:23:35 -0000 1.140
@@ -1,4 +1,4 @@
-$Cambridge: exim/exim-doc/doc-txt/NewStuff,v 1.139 2007/02/06 14:49:13 ph10 Exp $
+$Cambridge: exim/exim-doc/doc-txt/NewStuff,v 1.140 2007/02/07 12:23:35 ph10 Exp $
New Features in Exim
--------------------
@@ -285,6 +285,12 @@
14. The smtp transport has a new option called hosts_avoid_pipelining. It can
be used to suppress the use of PIPELINING to certain hosts, while still
supporting the other SMTP extensions (cf hosts_avoid_tls).
+
+15. By default, exigrep does case-insensitive matches. There is now a -I option
+ that makes it case-sensitive. This may give a performance improvement when
+ searching large log files. Without -I, the Perl pattern matches use the /i
+ option; with -I they don't. In both cases it is possible to change the case
+ sensitivity within the pattern using (?i) or (?-i).
Version 4.66
Index: ACKNOWLEDGMENTS
===================================================================
RCS file: /home/cvs/exim/exim-src/ACKNOWLEDGMENTS,v
retrieving revision 1.72
retrieving revision 1.73
diff -u -r1.72 -r1.73
--- ACKNOWLEDGMENTS 6 Feb 2007 10:00:24 -0000 1.72
+++ ACKNOWLEDGMENTS 7 Feb 2007 12:23:35 -0000 1.73
@@ -1,4 +1,4 @@
-$Cambridge: exim/exim-src/ACKNOWLEDGMENTS,v 1.72 2007/02/06 10:00:24 ph10 Exp $
+$Cambridge: exim/exim-src/ACKNOWLEDGMENTS,v 1.73 2007/02/07 12:23:35 ph10 Exp $
EXIM ACKNOWLEDGEMENTS
@@ -20,7 +20,7 @@
Philip Hazel
Lists created: 20 November 2002
-Last updated: 06 February 2007
+Last updated: 07 February 2007
THE OLD LIST
@@ -157,6 +157,7 @@
Thomas Hager Patch for saslauthd crash bug
Richard Hall Fix for file descriptor leak in redirection
Jori Hamalainen Patch to add features to exiqsumm
+ Patch to speed up exigrep
Steve Haslam Lots of stuff, including
HMAC computations
Better error messages for BDB
Index: exigrep.src
===================================================================
RCS file: /home/cvs/exim/exim-src/src/exigrep.src,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- exigrep.src 31 Jan 2007 16:52:12 -0000 1.4
+++ exigrep.src 7 Feb 2007 12:23:35 -0000 1.5
@@ -1,5 +1,5 @@
#! PERL_COMMAND -w
-# $Cambridge: exim/exim-src/src/exigrep.src,v 1.4 2007/01/31 16:52:12 ph10 Exp $
+# $Cambridge: exim/exim-src/src/exigrep.src,v 1.5 2007/02/07 12:23:35 ph10 Exp $
use strict;
@@ -28,6 +28,10 @@
# appears to be compressed, it is passed through zcat. We can't just do this
# for all files, because zcat chokes on non-compressed files.
+# Performance optimized in 02/02/2007 by Jori Hamalainen
+# Typical run time acceleration: 4 times
+
+
use Getopt::Std qw(getopts);
use POSIX qw(mktime);
@@ -38,7 +42,7 @@
sub seconds {
my($year,$month,$day,$hour,$min,$sec,$tzs,$tzh,$tzm) =
- $_[0] =~ /^(\d{4})-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)(?>\s([+-])(\d\d)(\d\d))?/;
+ $_[0] =~ /^(\d{4})-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)(?>\s([+-])(\d\d)(\d\d))?/o;
my $seconds = mktime $sec, $min, $hour, $day, $month - 1, $year - 1900;
@@ -55,22 +59,22 @@
# This subroutine processes a single line (in $_) from a log file. Program
# defensively against short lines finding their way into the log.
-my (%saved, %id_list, $pattern, $queue_time);
+my (%saved, %id_list, $pattern, $queue_time, $insensitive);
sub do_line {
# Convert syslog lines to mainlog format, as in eximstats.
-if (! /^\\d{4}/) { $_ =~ s/^.*? exim\b.*?: //; }
+if (!/^\d{4}-/o) { $_ =~ s/^.*? exim\b.*?: //o; }
return unless
- my($date,$entry) = /^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d (?:[+-]\d{4} )?)(.*)/;
+ my($date,$id) = /^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d (?:[+-]\d{4} )?)(\w{6}\-\w{6}\-\w{2})?/o;
# Handle the case when the log line belongs to a specific message. We save
# lines for specific messages until the message is complete. Then either print
# or discard.
-if (my($id) = $entry =~ /^(?:\[\d+\]\s)?(\w{6}\-\w{6}\-\w{2})/)
+if (defined $id)
{
$saved{$id} = '' unless defined($saved{$id});
@@ -78,17 +82,20 @@
$saved{$id} .= $_;
- # Are we interested in this id ?
+ # Are we interested in this id ? Short circuit if we already were interested.
- $id_list{$id} = 1 if /$pattern/io;
+ $id_list{$id} = 1 if defined $id_list{$id} ||
+ ($insensitive && /$pattern/io) || /$pattern/o;
# See if this is a completion for some message. If it is interesting,
# print it, but in any event, throw away what was saved.
- if ($entry =~
- /(?:Completed|rejected (?:by local_scan|by non-SMTP ACL|after DATA))/)
+ if (index($_, 'Completed') != -1 ||
+ (index($_, 'rejected') != -1 &&
+ /rejected (?:by local_scan|by non-SMTP ACL|after DATA)/o))
{
- if ($saved{$id} =~ /^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d ([+-]\d{4} )?)(\w{6}\-\w{6}\-\w{2})/)
+ if ($queue_time != -1 &&
+ $saved{$id} =~ /^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d ([+-]\d{4} )?)/o)
{
my $old_sec = &seconds($1);
my $sec = &seconds($date);
@@ -108,7 +115,8 @@
# Handle the case where the log line does not belong to a specific message.
# Print it if it is interesting.
-elsif ($entry =~ /$pattern/io) { print "$_\n"; }
+elsif (($insensitive && $_ =~ /$pattern/io) || $_ =~ /$pattern/o)
+ { print "$_\n"; }
}
@@ -116,10 +124,11 @@
# are quoted if the -l flag is given. The -t flag gives a time-on-queue value
# which is an additional condition.
-getopts('lt:',\my %args);
+getopts('Ilt:',\my %args);
$queue_time = $args{'t'}? $args{'t'} : -1;
+$insensitive = $args{'I'}? 0 : 1;
-die "usage: exigrep [-l] [-t <seconds>] <pattern> [<log file>]...\n"
+die "usage: exigrep [-I] [-l] [-t <seconds>] <pattern> [<log file>]...\n"
if ($#ARGV < 0);
$pattern = shift @ARGV;
@@ -134,7 +143,7 @@
foreach (@ARGV)
{
my $filename = $_;
- if ($filename =~ /\.(?:COMPRESS_SUFFIX)$/)
+ if ($filename =~ /\.(?:COMPRESS_SUFFIX)$/o)
{
open(LOG, "ZCAT_COMMAND $filename |") ||
die "Unable to zcat $filename: $!\n";
@@ -154,6 +163,6 @@
# At the end of processing all the input, print any uncompleted data
-for (keys %id_list) { print "+++ $_ not completed +++\n$saved{$_}\n;" }
+for (keys %id_list) { print "+++ $_ not completed +++\n$saved{$_}\n"; }
# End of exigrep