OK - I got your code and grafted it into my system and it's working. The
significant difference - especially with identifying nonspam. People
>Marc Perkel wrote:
>
>
>>OK - here's what I want to do - I want to run a program from the system
>>filter and return the results in a header - something like this:
>>headers add "X-Spamprobe: ${run {/etc/exim/scripts/spamprobe-test
>>$message_id}{$value}{}}"
>>
>>However - I need to pipe the message into the program that I'm trying to
>>get the results from. How do I do the piping part?
>>
>>
>
>Last I knew, you can't, you have to go get the spooled files.
>Any way, here is the latest rendition of what started as a simple
>script for you a couple messages back, as I figure that you are just
>wrapping what I have given you this should do all that you need, plus
>US phone numbers. Let me know what all you come up with out of this.
>
>Cheers!
>
><PERL_SCRIPT>
>#!/usr/bin/perl -w
>
># This can be called from the command line with a pipe as in:
># './script <mbox_file' or 'cat mbox_file |./script'
>#
># This can also be called with two command arguments as in:
># ./script <Exim_message-ID> <sender_address>
>#
># If called with a Exim_message-ID, the message-ID MUST come first
># as we use the message-ID parameter to grab -H and -D files
># from the Exim spool directories, or from the Exiscan 'scan'
># directory.
>
>use strict;
>use IO::File;
>
># Define two constants for the Exim and Exiscan directories.
>use constant EXIM_SPOOL_DIR => '/var/spool/exim-data/input/';
>use constant EXIM_XSCAN_DIR => '/var/spool/exim-data/scan/';
>
># Define the File Handle as STDIN and mbox as ''
>my $FH = 'STDIN';
>
># If we are given an Exim Message-ID and NOT an MBOX file through STDIN
># then we need to make a temporary MBOX file to parse.
># just wrap in an eval{} to help with fatal errors
>if ( $ARGV[0] )
>{
> eval{ ($FH) = make_mbox($ARGV[0],$ARGV[1]) };
> exit 1 if !$FH;
>}
>
># Parse the temporary MBOX file or STDIN and wrap in an eval{} to help
># with fatal errors.
>eval{ parse_mbox_file($FH) };
>
># Exit the script with 0 unless $@ else exit 1
>exit 1 if $@;
>exit 0;
>
># Used to create a temporary MBOX file
>sub make_mbox
>{
># $msgID : Exim Message-ID
># $sender : Message sender as defined by Exim $sender_address
># $dta[12]: Exim message data file location (1 is for split_spool)
># $hdr[12]: Exim message header file location (1 is for split_spool)
># $eml : This can be used to grab an Exiscan eml file
># $dir : This is the dir location ONLY FOR split spool instalations
># $msgfh : The File Handle for the message data
># $hdrfh : The File Handle for the message header
># $mbxfh : The File Handle for the temporary mbox File
># $from : Fake a MBOX From line as 'From $sender localtime()'
>#
># Exit from the sub if we cannot open a message data File Handle.
># Set autoflush on for the temporary mbox file.
> my $msgID = $_[0];
> my $sender = $_[1] || '-NUL-';
> my $dir = $1 if $msgID =~ /^.{5}(.)-.{6}-.{2}/;
> my $dta1 = EXIM_SPOOL_DIR.$dir."/".$msgID."-D";
> my $dta2 = EXIM_SPOOL_DIR."/".$msgID."-D";
> my $hdr1 = EXIM_SPOOL_DIR.$dir."/".$msgID."-H";
> my $hdr2 = EXIM_SPOOL_DIR."/".$msgID."-H";
> my $eml = EXIM_XSCAN_DIR.$msgID."/".$msgID.".eml";
> my $msgfh = IO::File->new($dta1,O_RDONLY) ||
> IO::File->new($dta2,O_RDONLY) ||
> IO::File->new($eml,O_RDONLY) || return('');
> my $hdrfh = IO::File->new($hdr1,O_RDONLY) ||
> IO::File->new($hdr2,O_RDONLY) || '';
> my $mbxfh = IO::File->new_tmpfile; $mbxfh->autoflush(1);
> my $from = "From ".$sender." ".scalar localtime();
>
># Now we need to create a fake MBOX file from the two seperate
># files, or we just use the already made eml file. We check
># to see if we opened an Exim data file, it has the Message-ID
># as the very first line. #Exit from the sub if we have an
># Exim data file but no Exim header file.
>#
># redundant yes, but ensure we have the top line.
> $msgfh->seek(0,0); my $line = $msgfh->getline();
> my $_is_exim_data = '';
> $_is_exim_data = 1 if $line =~ /^$msgID-D$/;
> if ( !$_is_exim_data )
> {
> # We have an Exiscan file, these are easy just add the
> # fake 'From ' line to the top and then dump the Exiscan
> # eml file after that. Close the $msgfh and $hdrfh then
> # return the MBOX File Handle.
> $mbxfh->seek(0,0);
> $mbxfh->print($from);
> # We need to rewind the eml file so that we get the very first line.
> $msgfh->seek(0,0);
> while (defined($_ = $msgfh->getline()))
> {
> $mbxfh->print($_);
> }
> undef $msgfh; undef $hdrfh;
> $mbxfh->seek(0,0);
> return($mbxfh);
> }
>
># We have an Exim message data file, so we need an Exim header file too
># exit from the sub if we do not have one.
> return('') if !$hdrfh;
>
># We can write the fake 'From ' line, then we need to get only the
># message headers from the header file and write those, then we can
># get the Exim data file remove the first line and write the rest.
>#
># Exit from the sub if the header file does not have the message-ID as
># the first line.
>#
># redundant yes, but ensure we have the top line.
> $hdrfh->seek(0,0); $line = $hdrfh->getline();
> return('') if $line !~ /^$msgID-H$/;
> $mbxfh->seek(0,0);
> $mbxfh->print($from);
> my $_firstHeader = 0;
> while (defined($_ = $hdrfh->getline()))
> {
> $_firstHeader = 1 if /^\s*$/;
> next if !$_firstHeader;
> s/^[0-9]{3,}[BCFIPRST* ]{1} //;
> $mbxfh->print($_);
> }
> $mbxfh->print("\n");
> while (defined($_ = $msgfh->getline()))
> {
> $mbxfh->print($_);
> }
> undef $msgfh; undef $hdrfh;
> $mbxfh->seek(0,0);
> return($mbxfh);
>}
>
># Used to parse the temporary MBOX file of STDIN
>sub parse_mbox_file
>{
># Grab the File Handle
> my $fh = shift;
>
># Setup the variables that we are going to use
># $_firstHeader: Keep track of the start of a message
># $_lastHeader : The header//body seperator for a message
># $_isText : Skip any non text MIME parts
># %http : http://<domains>/ found in a message
># %addr : any email\@<domain> found in a message
># %phon : any number sequence that MAY be a phone number
> my ($_firstHeader,$_lastHeader,$_isText,%http,%addr,%phon) = (0,0,1,(),
>(),());
>
># Grap the information from STDIN, such as "script <file"
># or "cat file|script" etc. The output is in the form
>#
># <NEW LINE>
># From ....
># header: ....
># header: ....
># header: ....
># <NEW LINE>
># http://domain ...
># http://domain ...
># email://address ...
># email://address ...
># phone://number ...
># <NEW LINE>
> while (defined($_ = $fh->getline()))
> {
>
># MBOX messages start with "From " and check for End of File
># as we need to print the http domains and email links
># if we found any before exiting.
> if ( /^From / || eof )
> {
> ($_firstHeader,$_lastHeader) = (1,0) if !eof;
> print STDOUT sort keys %http and %http = () if %http;
> print STDOUT sort keys %addr and %addr = () if %addr;
> print STDOUT sort keys %phon and %phon = () if %phon;
> s/^/\n/;
> }
>
># If we are in the headers section of a message print the line.
># Set that we are done with the headers section at the first
># blank line we find, as that should be the message
># header//body seperator.
> if ( $_firstHeader )
> {
> ($_firstHeader,$_lastHeader) = (0,1) if /^\s*$/;
> print STDOUT;
> }
>
># We only want the headers of a message and then only check
># none 'binary' MIME parts, like text/html, text/plain,
># that type of thing. Or just comment these lines if you
># want the http and email stuff run on every part of a message
># no matter what the MIME part type is.
> $_isText = 0 if /^Content-Type: /;
> $_isText = 1 if /^Content-Type: text.*/i;
> next if !$_isText;
>
># While a line contains http://<something> in it, strip out
># everything upto the first / (or space, etc) after http://
># this is not perfect but should do the job in most cases.
># If you only want http:// domains in the body parts only,
># then add '$_lastHeader && ' before the 'm{'. Only add
># unique domain strings.
> while ( m{(http://[A-Za-z0-9.-]+)}g )
> {
> $http{"$1\n"}++ if !$http{"$1\n"};
> }
>
># While a line contains anything@something then grab the stuff
># that looks like it is an email address. This is less
># perfect than the http stuff, but then again, checking for
># 'valid' email addressess in something is near impossible ;)
># We do try and make the email address thing as clean as
># possible. If you only want the email address looking strings
># from the body of a message then add '$_lastHeader && ' before
># the 'm{'. Only add unique email strings.
> while ( $_lastHeader && m{\s+([^[:space:]]+\@[A-Za-z0-9.-]+)}g )
> {
> my $email = $1;
> next if $email !~ /\@[A-Za-z0-9.-]+\..*/ || $email =~ /\@.*\@/;
> $email =~ s/[)<\["'\]>(]|(?i:mailto:|href=)|\.$//g;
> $addr{"email://$email\n"}++ if !$addr{"email://$email\n"};
> }
># While a line contains anything that may be a phone number.
># this is rather loose and mainly for US numbers, I do not
># know how well, if at all, it will catch non-US numbers.
> while ( $_lastHeader && m/(\d\d\d)[ ).-](\d\d\d)[ .-](\d\d\d\d)/g)
> {
> $phon{"phone://$1.$2.$3\n"}++ if !$phon{"phone://$1.$2.$3\n"};
> }
> }
> return 0;
>}
>
>__END__
>
></PERL_SCRIPT>
>
>
>