[exim] I've modified Erik Mugele's exim_surbl.pl utility - N…

Αρχική Σελίδα
Delete this message
Reply to this message
Συντάκτης: Marc Perkel
Ημερομηνία:  
Προς: exim-users
Καινούρια Θέματα: Re: [exim] I've modified Erik Mugele's exim_surbl.pl utility - PREFORMAT
Αντικείμενο: [exim] I've modified Erik Mugele's exim_surbl.pl utility - NEW VERSION
OK - here's a new version of my modification. And - I have a new data
source that is allowing me to generate my own DOB list. So for those of
you who noticed problems with the other one - test this.

This version uses strict and warnings. Thanks to Todd Lyons for his
assistance on helping me with this.

#
# Copyright (c) 2006-2007 Erik Mugele.  All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# NOTES
# -----
#
# 1. This script makes use of the Country Code Top Level # Domains 
(ccTLD) provided by the SURBL group at
# http://spamcheck.freeapp.net/two-level-tlds  # THE VARIABLE 
$cctld_file MUST BE SET TO THE FULL PATH AND # NAME OF THE FILE 
CONTAINING THE CCTLD LIST!  (see below)
#
# 2. This script makes use of whitelisting of popular domains.  The # 
source of the list can be found here: # 
http://spamassassin.apache.org/full/3.1.x/dist/rules/25_uribl.cf
# These are domains that are whitelisted by the SURBL group so it
# doesn't make sense to waste resources doing lookups on them.
# THE VARIABLE $whitelist_file MUST BE SET TO THE FULL PATH AND
# NAME OF THE FILE CONTAINING THE WHITE LIST!  (see below)          # # 
3. Per the guidelines at http://www.surbl.org, if your site processes
# more than 100,000 messages per day, you should NOT be using the # 
public SURBL name servers but should be rsync-ing from them and # 
running your own.  See http://www3.surbl.org/rsync-signup.html
#


use strict;
use warnings;

sub surblspamcheck
{
# Designed and written by Erik Mugele, 2004-2006
# http://www.teuton.org/~ejm
# Version 2.0
# Modified by Marc Perkel - marc@??? -
http://www.junkemailfilter.com

   # The following variable is the full path to the file containing the 
   # list of Country Code Top Level Domains (ccTLD).
   # ---------------------------------------------------------------------
   # THIS VARIABLE MUST BE SET TO THE FULL PATH AND NAME OF THE FILE    
# CONTAINING THE CCTLD LIST!
   # ---------------------------------------------------------------------
   my $cctld_file = "/etc/exim/ccTLD.txt";          # The following 
variable is the full path to the file containing
   # whitelist entries.     # 
---------------------------------------------------------------------
   # THIS VARIABLE MUST BE SET TO THE FULL PATH AND NAME OF THE FILE    
# CONTAINING THE WHITELIST DOMAINS!
   # ---------------------------------------------------------------------
   my $whitelist_file = "/etc/exim/surbl_whitelist.txt";
      # This variable defines the maximum MIME file size that will be 
checked
   # if this script is called by the MIME ACL.  This is primarily to
   # keep the load down on the server.  Size is in bytes.
   my $max_file_size = 50000;
      # The following two variables enable or disable the SURBL and URIBL
   # lookups.  Set to 1 to enable and 0 to disable.
   my $surbl_enable = 1;
   my $uribl_enable = 1;
   my $dob_enable = 1;
   my $karma_enable = 1;
      # Check to see if a decode MIME attachment is being checked or    
# just a plain old text message with no attachments
   my $exim_body = "";
   my $mime_filename = Exim::expand_string('$mime_decoded_filename');
   if ($mime_filename) {
       # DEBUG Statement
       #warn ("MIME FILENAME: $mime_filename\n");
       # If the MIME file is too large, skip it.
       if (-s $mime_filename <= $max_file_size) {
           open(INFILE,"<$mime_filename");
           binmode(INFILE);
           while (read(INFILE,my $buff,1024)) {
               $exim_body .= $buff;
           }
           close (INFILE);
       } else {
           $exim_body = "";
       }
   } else {
       $exim_body = Exim::expand_string('$message_body');
   }
      sub surbllookup {
       # This subroutine does the actual DNS lookup and builds and returns
       # the return message for the SURBL lookup.
       my @params = @_;
       my($return_string);
       my $surbldomain = ".multi.surbl.org";
       my @dnsbladdr=gethostbyname($params[0].$surbldomain);
       # If gethostbyname() returned anything, build a return message.
       $return_string = "";
       if (scalar(@dnsbladdr) != 0) {
           $return_string = "Blacklisted URL in message. 
(".$params[0].") in";
           my @surblipaddr = unpack('C4',($dnsbladdr[4])[0]);
           if ($surblipaddr[3] & 64) {
               $return_string .= " [jp]";
           }
           if ($surblipaddr[3] & 32) {
               $return_string .= " [ab]";
           }
           if ($surblipaddr[3] & 16) {
               $return_string .= " [ob]";
           }
           if ($surblipaddr[3] & 8) {
               $return_string .= " [ph]";
           }
           if ($surblipaddr[3] & 4) {
               $return_string .= " [ws]";
           }
           if ($surblipaddr[3] & 2) {
               $return_string .= " [sc]";
           }
           $return_string .= ". See http://www.surbl.org/lists.html.";
       }
       return $return_string;
   }
      sub uribllookup {
       # This subroutine does the actual DNS lookup and builds and returns
       # the return message for the URIBL check.
       my @params = @_;
       my($return_string);
       my $surbldomain = ".black.uribl.com";
       my @dnsbladdr=gethostbyname($params[0].$surbldomain);
       # If gethostbyname() returned anything, build a return message.
       $return_string = "";
       if (scalar(@dnsbladdr) != 0) {
           $return_string = "Blacklisted URL in message. 
(".$params[0].") in";
           my @surblipaddr = unpack('C4',($dnsbladdr[4])[0]);
           if ($surblipaddr[3] & 8) {
               $return_string .= " [red]";
           }
           if ($surblipaddr[3] & 4) {
               $return_string .= " [grey]";
           }
           if ($surblipaddr[3] & 2) {
               $return_string .= " [black]";
           }
           $return_string .= ". See http://lookup.uribl.com.";
       }
       return $return_string;
   }
      # This list returns domain name under 5 days old
   sub doblookup {
       # This subroutine does the actual DNS lookup and builds and returns
       # the return message for the URIBL check.
       my @params = @_;
       my($return_string);
       my $dobdomain = ".dob.sibl.support-intelligence.net";
       my @dnsbladdr=gethostbyname($params[0].$dobdomain);
       # If gethostbyname() returned anything, build a return message.
       $return_string = "";
       if (scalar(@dnsbladdr) != 0) {
           $return_string = "Blacklisted URL in message. 
(".$params[0].") in DOB.";
       }
       return $return_string;
   }
      # http://wiki.junkemailfilter.com/index.php/Spam_DNS_Lists
   sub karmalookup {
       # If white listed - skip other lookups
       my @params = @_;
       my($return_string);
       my $karmadomain = ".hostkarma.junkemailfilter.com";
       my @dnsbladdr=gethostbyname($params[0].$karmadomain);
       # If gethostbyname() returned anything, build a return message.
       $return_string = "";
       if (scalar(@dnsbladdr) != 0) {
           my @karmaipaddr = unpack('C4',($dnsbladdr[4])[0]);


           # Junk Email Filter now has a Day old Bread List
           if ($karmaipaddr[3] eq 6) {
               $return_string = "Blacklisted URL in message. 
(".$params[0].") in HOSTKARMA DOB.";
           }


           # This is mostly for caching purposes
           if ($karmaipaddr[3] eq 2) {
               $return_string = "Blacklisted URL in message. 
(".$params[0].") in HOSTKARMA.";
           }


           # Exclude HostHarma White List
           if ($karmaipaddr[3] eq 1) {
               $return_string = "whitelisted";
           }


           # Exclude HostHarma Yellow List
           if ($karmaipaddr[3] eq 3) {
               $return_string = "whitelisted";
           }


           # Exclude HostHarma NoBl List
           if ($karmaipaddr[3] eq 5) {
               $return_string = "whitelisted";
           }
       }
       return $return_string;
   }


   sub converthex {
       # This subroutin converts two hex characters to an ASCII character.
       # It is called when ASCII obfuscation or Printed-Quatable characters
       # are found (i.e. %AE or =AE).
       # It should return a converted/plain address after splitting off
       # everything that isn't part of the address portion of the URL.
       my @ob_parts = @_;
       my $address = $ob_parts[0];
       for (my $j=1; $j < scalar(@ob_parts); $j++) {
           $address .= chr(hex(substr($ob_parts[$j],0,2)));
           $address .= substr($ob_parts[$j],2,);
       }
       $address = (split(/[^A-Za-z0-9._\-]/,$address))[0];
       return $address
   }


################
# Main Program #
################

   if ($exim_body) {
       # Find all the URLs in the message by finding the HTTP string
       my @parts = split /[hH][tT][tT][pP]:\/\//,$exim_body;
       if (scalar(@parts) > 1) {
           my(@lookupdomains,@whitelist,@cctlds);
           # Read the entries from the ccTLD file.
           open (cctld_handle,$cctld_file) or die "Can't open 
$cctld_file.\n";
           while (<cctld_handle>) {
               next if (/^#/ || /^$/ || /^\s$/);
               push(@cctlds,$_);
           }
           close (cctld_handle) or die "Close: $!\n";
           my($address);


           # Read the entries from the whitelist file.
           open (whitelist_handle,$whitelist_file) or die "Can't open 
$whitelist_file.\n";
           while (<whitelist_handle>) {
               next if (/^#/ || /^$/ || /^\s$/);
               push(@whitelist,$_);
           }
           close (whitelist_handle) or die "Close: $!\n";


           # Go through each of the HTTP parts that were found in the 
message
           for (my $i=1; $i < scalar(@parts); $i++) {
               # Special case of Quoted Printable EOL marker
               my ($return_result,@domain);
               $parts[$i] =~ s/=\n//g;
                   # Split the parts and find the address portion of the 
URL.
               # Address SHOULD be either a FQDN, IP address, or encoded 
address.
               $address = (split(/[^A-Za-z0-9\._\-%=]/,$parts[$i]))[0];
               # Check for an =.  If it exists, we assume the URL is 
doing                # Quoted-Printable.  Decode it and redine $address
               if ($address =~ /=/) {
                   my @ob_parts = split /=/,$address;
                   $address = converthex(@ob_parts);
               }
               # Check for a %.  If it exists the URL is using % ASCII
               # obfuscation.  Decode it and redefine $address.
               if ($address =~ /%/) {
                   my @ob_parts = split /%/,$address;
                   $address = converthex(@ob_parts);
               }
               # Split the the address into the elements separated by 
periods.
               @domain = split /\./,$address;
               # Check the length of the domain name.  If less then two 
elements
               # at this point it is probably bogus or there is a bug in 
one of                # the decoding/converting routines above.
               if (scalar(@domain) >= 2) {
                   my($spamcheckdomain);
                   $return_result="";
                   # By default, assume that the domain check is on a 
                   # "standard" two level domain
                   $spamcheckdomain=$domain[-2].".".$domain[-1];
                   # Check for a two level domain
                   if (((scalar(@domain) == 2) || (scalar(@domain) >= 
5))  &&                        (grep(/^$spamcheckdomain$/i,@cctlds))) {
                       $return_result="cctld";
                   }
                   # Check for a three level domain
                   if (scalar(@domain) == 3) {
                       if (grep(/^$spamcheckdomain$/i,@cctlds)) {


$spamcheckdomain=$domain[-3].".".$spamcheckdomain;
                           if (grep(/^$spamcheckdomain$/,@cctlds)) {
                               $return_result="cctld";
                           }
                       }
                   }
                   # Check for a four level domain
                   if (scalar(@domain) == 4) {
                       # Check to see if the domain is an IP address
                       if ($domain[-1] =~ /[a-zA-Z]/) {
                           if (grep(/^$spamcheckdomain$/i,@cctlds)) {


$spamcheckdomain=$domain[-3].".".$spamcheckdomain;
                               if (grep(/^$spamcheckdomain$/i,@cctlds)) {


$spamcheckdomain=$domain[-4].".".$spamcheckdomain;
                               }
                           }
                       } else {
                           # Domain is an IP address
                           $spamcheckdomain=$domain[3].".".$domain[2].
                               ".".$domain[1].".".$domain[0];
                       }
                   }
                   # DEBUG statement
                   #warn ("FOUND DOMAIN ($mime_filename): 
$spamcheckdomain\n");
                   # If whitelisting is enabled check domain against the 
                   # whitelist.
                   if ($whitelist_file ne "") {
                       foreach my $whitelist_entry (@whitelist) {
                           chomp($whitelist_entry);
                           if ($spamcheckdomain =~ 
m/^$whitelist_entry$/i) {
                               $return_result="whitelisted";
                               last;
                           }
                       }
                   }
                   # If the domain is whitelisted or in the cctld skip 
adding
                   # it to the lookup list.
                   if ($return_result eq "") {
                       if (scalar(@lookupdomains) > 0) {
                           # Check so see if the domain already is in 
the list.
                           if (not 
grep(/^$spamcheckdomain$/i,@lookupdomains)) {
                                   push(@lookupdomains,$spamcheckdomain);
                           }
                       } else {
                           push(@lookupdomains,$spamcheckdomain);
                       }
                   }
               }
           }
           # If there are items in the lookupdomains list then
           # perform lookups on them.  If there are not, something is wrong
           # and just return false.  There should always be something in 
the list.
           if (scalar(@lookupdomains) > 0) {
               foreach my $i (@lookupdomains) {
                   my $return_result;
                   # DEBUG statement.
                   #warn ("CHECKING DOMAIN ($mime_filename): $i\n");


                   # do a HOSTKARMA lookup - contains whitelist information
                   # 
http://wiki.junkemailfilter.com/index.php/Spam_DNS_Lists
                   if (($karma_enable == 1) && ($return_result eq "")) {
                       $return_result = karmalookup($i);
                   }


                   # do an SURBL lookup
                   if (($surbl_enable == 1) && ($return_result eq "")) {
                       $return_result = surbllookup($i);
                   }


                   # do a URIBL lookup
                   if (($uribl_enable == 1) && ($return_result eq "")) {
                       $return_result = uribllookup($i);
                   }


                   # do a Day Old Bread lookup - domains under 5 days old
                   if (($dob_enable == 1) && ($return_result eq "")) {
                       $return_result = doblookup($i);
                   }


                   # If we got a hit return the result to Exim
                   if ($return_result ne "") {
                       return $return_result;
                   }
               }
           }
       }
   }
   # We didn't find any URLs or the URLs we did find were not
   # listed so return false.
   return -1;
}