Re: [exim] Pipe message from filter to return header

Top Page
Delete this message
Reply to this message
Author: Edgar Lovecraft
Date:  
To: Marc Perkel, exim-users
CC: 
Subject: Re: [exim] Pipe message from filter to return header
Marc Perkel wrote:
>
> OK - here's what I want to do - I want to run a program from the system
> filter and return the results in a header - something like this:
> headers add "X-Spamprobe: ${run {/etc/exim/scripts/spamprobe-test
> $message_id}{$value}{}}"
>
> However - I need to pipe the message into the program that I'm trying to
> get the results from. How do I do the piping part?


Last I knew, you can't, you have to go get the spooled files.
Any way, here is the latest rendition of what started as a simple
script for you a couple messages back, as I figure that you are just
wrapping what I have given you this should do all that you need, plus
US phone numbers. Let me know what all you come up with out of this.

Cheers!

<PERL_SCRIPT>
#!/usr/bin/perl -w

# This can be called from the command line with a pipe as in:
#       './script <mbox_file' or 'cat mbox_file |./script'
#
# This can also be called with two command arguments as in:
#       ./script <Exim_message-ID> <sender_address>
#
# If called with a Exim_message-ID, the message-ID MUST come first
#    as we use the message-ID parameter to grab -H and -D files
#    from the Exim spool directories, or from the Exiscan 'scan'
#    directory.


use strict;
use IO::File;

# Define two constants for the Exim and Exiscan directories.
use constant EXIM_SPOOL_DIR => '/var/spool/exim-data/input/';
use constant EXIM_XSCAN_DIR => '/var/spool/exim-data/scan/';

# Define the File Handle as STDIN and mbox as ''
my $FH = 'STDIN';

# If we are given an Exim Message-ID and NOT an MBOX file through STDIN
#    then we need to make a temporary MBOX file to parse.
#    just wrap in an eval{} to help with fatal errors
if ( $ARGV[0] )
{
  eval{ ($FH) = make_mbox($ARGV[0],$ARGV[1]) };
  exit 1 if !$FH;
}


# Parse the temporary MBOX file or STDIN and wrap in an eval{} to help
#    with fatal errors.
eval{ parse_mbox_file($FH) };


# Exit the script with 0 unless $@ else exit 1
exit 1 if $@;
exit 0;

# Used to create a temporary MBOX file
sub make_mbox
{
#    $msgID  : Exim Message-ID
#    $sender : Message sender as defined by Exim $sender_address
#    $dta[12]: Exim message data file location   (1 is for split_spool)
#    $hdr[12]: Exim message header file location (1 is for split_spool)
#    $eml    : This can be used to grab an Exiscan eml file
#    $dir    : This is the dir location ONLY FOR split spool instalations
#    $msgfh  : The File Handle for the message data
#    $hdrfh  : The File Handle for the message header
#    $mbxfh  : The File Handle for the temporary mbox File
#    $from   : Fake a MBOX From line as 'From $sender localtime()'
#
# Exit from the sub if we cannot open a message data File Handle.
#    Set autoflush on for the temporary mbox file.
  my $msgID   = $_[0];
  my $sender  = $_[1] || '-NUL-';
  my $dir     = $1 if $msgID =~ /^.{5}(.)-.{6}-.{2}/;
  my $dta1    = EXIM_SPOOL_DIR.$dir."/".$msgID."-D";
  my $dta2    = EXIM_SPOOL_DIR."/".$msgID."-D";
  my $hdr1    = EXIM_SPOOL_DIR.$dir."/".$msgID."-H";
  my $hdr2    = EXIM_SPOOL_DIR."/".$msgID."-H";
  my $eml     = EXIM_XSCAN_DIR.$msgID."/".$msgID.".eml";
  my $msgfh   = IO::File->new($dta1,O_RDONLY) ||
                IO::File->new($dta2,O_RDONLY) ||
                IO::File->new($eml,O_RDONLY)  || return('');
  my $hdrfh   = IO::File->new($hdr1,O_RDONLY) ||
                IO::File->new($hdr2,O_RDONLY) || '';
  my $mbxfh   = IO::File->new_tmpfile; $mbxfh->autoflush(1);
  my $from    = "From ".$sender." ".scalar localtime();


# Now we need to create a fake MBOX file from the two seperate
#    files, or we just use the already made eml file.  We check
#    to see if we opened an Exim data file, it has the Message-ID
#    as the very first line.  #Exit from the sub if we have an
#    Exim data file but no Exim header file.
#
# redundant yes, but ensure we have the top line.
  $msgfh->seek(0,0); my $line = $msgfh->getline();
  my $_is_exim_data = '';
     $_is_exim_data = 1 if $line =~ /^$msgID-D$/;
  if ( !$_is_exim_data )
  {
    # We have an Exiscan file, these are easy just add the
    # fake 'From ' line to the top and then dump the Exiscan
    # eml file after that.  Close the $msgfh and $hdrfh then
    # return the MBOX File Handle.
    $mbxfh->seek(0,0);
    $mbxfh->print($from);
    # We need to rewind the eml file so that we get the very first line.
    $msgfh->seek(0,0);
    while (defined($_ = $msgfh->getline()))
    {
      $mbxfh->print($_);
    }
    undef $msgfh; undef $hdrfh;
    $mbxfh->seek(0,0);
    return($mbxfh);
  }


# We have an Exim message data file, so we need an Exim header file too
#    exit from the sub if we do not have one.
  return('') if !$hdrfh;


# We can write the fake 'From ' line, then we need to get only the
#    message headers from the header file and write those, then we can
#    get the Exim data file remove the first line and write the rest.
#
# Exit from the sub if the header file does not have the message-ID as
#    the first line.
#
# redundant yes, but ensure we have the top line.
  $hdrfh->seek(0,0); $line = $hdrfh->getline();
  return('') if $line !~ /^$msgID-H$/;
  $mbxfh->seek(0,0);
  $mbxfh->print($from);
  my $_firstHeader = 0;
  while (defined($_ = $hdrfh->getline()))
  {
    $_firstHeader = 1 if /^\s*$/;
    next if !$_firstHeader;
    s/^[0-9]{3,}[BCFIPRST* ]{1} //;
    $mbxfh->print($_);
  }
  $mbxfh->print("\n");
  while (defined($_ = $msgfh->getline()))
  {
    $mbxfh->print($_);
  }
  undef $msgfh; undef $hdrfh;
  $mbxfh->seek(0,0);
  return($mbxfh);
}


# Used to parse the temporary MBOX file of STDIN
sub parse_mbox_file
{
# Grab the File Handle
my $fh = shift;

# Setup the variables that we are going to use
#    $_firstHeader: Keep track of the start of a message
#    $_lastHeader : The header//body seperator for a message
#    $_isText     : Skip any non text MIME parts
#    %http        : http://<domains>/ found in a message
#    %addr        : any email\@<domain> found in a message
#    %phon        : any number sequence that MAY be a phone number
  my ($_firstHeader,$_lastHeader,$_isText,%http,%addr,%phon) = (0,0,1,(),
(),());


# Grap the information from STDIN, such as "script <file"
#    or "cat file|script" etc.  The output is in the form
#
# <NEW LINE>
# From ....
# header: ....
# header: ....
# header: ....
# <NEW LINE>
# http://domain ...
# http://domain ...
# email://address ...
# email://address ...
# phone://number ...
# <NEW LINE>
  while (defined($_ = $fh->getline()))
  {


# MBOX messages start with "From " and check for End of File
#    as we need to print the http domains and email links
#    if we found any before exiting.
    if ( /^From / || eof )
    {
      ($_firstHeader,$_lastHeader)           = (1,0) if !eof;
      print STDOUT sort keys %http and %http = ()    if %http;
      print STDOUT sort keys %addr and %addr = ()    if %addr;
      print STDOUT sort keys %phon and %phon = ()    if %phon;
      s/^/\n/;
    }


# If we are in the headers section of a message print the line.
#    Set that we are done with the headers section at the first
#    blank line we find, as that should be the message
#    header//body seperator.
    if ( $_firstHeader )
    {
      ($_firstHeader,$_lastHeader) = (0,1) if /^\s*$/;
      print STDOUT;
    }


# We only want the headers of a message and then only check
#    none 'binary' MIME parts, like text/html, text/plain,
#    that type of thing.  Or just comment these lines if you
#    want the http and email stuff run on every part of a message
#    no matter what the MIME part type is.
    $_isText = 0 if /^Content-Type: /;
    $_isText = 1 if /^Content-Type: text.*/i;
    next if !$_isText;


# While a line contains http://<something> in it, strip out
#    everything upto the first / (or space, etc) after http://
#    this is not perfect but should do the job in most cases.
#    If you only want http:// domains in the body parts only,
#    then add '$_lastHeader && ' before the 'm{'.  Only add
#    unique domain strings.
    while ( m{(http://[A-Za-z0-9.-]+)}g )
    {
      $http{"$1\n"}++ if !$http{"$1\n"};
    }


# While a line contains anything@something then grab the stuff
#    that looks like it is an email address.  This is less
#    perfect than the http stuff, but then again, checking for
#    'valid' email addressess in something is near impossible ;)
#    We do try and make the email address thing as clean as
#    possible.  If you only want the email address looking strings
#    from the body of a message then add '$_lastHeader && ' before
#    the 'm{'.  Only add unique email strings.
    while ( $_lastHeader && m{\s+([^[:space:]]+\@[A-Za-z0-9.-]+)}g )
    {
      my $email =  $1;
      next if $email !~ /\@[A-Za-z0-9.-]+\..*/ || $email =~ /\@.*\@/;
      $email =~ s/[)<\["'\]>(]|(?i:mailto:|href=)|\.$//g;
      $addr{"email://$email\n"}++ if !$addr{"email://$email\n"};
    }
# While a line contains anything that may be a phone number.
#    this is rather loose and mainly for US numbers, I do not
#    know how well, if at all, it will catch non-US numbers.
    while ( $_lastHeader && m/(\d\d\d)[ ).-](\d\d\d)[ .-](\d\d\d\d)/g)
    {
      $phon{"phone://$1.$2.$3\n"}++ if !$phon{"phone://$1.$2.$3\n"};
    }
  }
  return 0;
}


__END__

</PERL_SCRIPT>

--

--EAL--

--