OK - I have got spam filtering up to 99% accurate and it actually works
really well. I'm using Spam Assassin and every trich there is. One of
the tricks that has gotten my accuract up so high is external lists of
OR expressions. This is a feature that I have been asking both EXIM and
the SA folks for. Two weeks ago I offered to pay $150 to anyone who
could make this work and someone did. I paid the money and - as I
promised - I'm sharing the results.
Below is a series of lists I read and if it matches I set a temporary
header and then in Spam Assassin I test for the header and score points.
Here is how I implement the lists.
I again request that EXIM build this in.
########################################################
# Messages that have been spam checked #
########################################################
if "$h_X-Spam-Checker-Version:" contains "Spam"
then
# -- Remove useless headers
headers remove "X-Temp-From:"
headers remove "X-Temp-Fromphrase:"
headers remove "X-Temp-Received:"
headers remove "X-Temp-Whitehosts:"
headers remove "X-Temp-Whitesubject:"
headers remove "X-Temp-Blocklink:"
headers remove "X-Temp-Spelling:"
headers remove "X-Temp-Whitephrase:"
headers remove "X-Temp-Whitelink:"
headers remove "X-Temp-Linkphrase:"
headers remove "X-Temp-Fromphrase:"
headers remove "X-Temp-Deadaccount:"
# -- Flag High Scoring Spam
if "$h_X-Spam-Flag:" contains "YES"
then
if "$h_X-Spam-Level:" contains "***************"
then
headers add "X-Spam: [SPAM] - HIGH"
headers add "X-Spam-Bounce-Flag: YES"
else
headers add "X-Spam: [SPAM] - LOW"
endif
endif
########################################################
# Messages that have NOT been spam checked #
########################################################
else
###################################################
# Tests FROM, REPLY-TO and RETURN_PATH for Known Spammers
# (casinomadness.com|enlargeyourpenis.com)
if "${local_part:$h_From:}@${domain:$h_From:} $h_Return-path:
$h_Reply-to:" matches
${sg{${sg{${sg{${readfile{/etc/exim/lists/blockfrom}{|}}}{\\|+}{|}}}{#.*?\\|}{}}}{\\|\$}{}}
then
headers add "X-Temp-From: YES"
endif
###################################################
# Tests FROM name part for PHRASES
# \b(casino|penis|dealz)\b
if "$h_Reply-to:" matches
\\b${sg{${sg{${sg{${readfile{/etc/exim/lists/fromphrase}{|}}}{\\|+}{|}}}{#.*?\\|}{}}}{\\|\$}{}}s?\\b
then
headers add "X-Temp-Fromphrase: YES"
endif
###################################################
# Tests RECEIVED for Known Spammers
# (farmsex|crazybitches|savemoremoney)
if "$h_Received:" matches
${sg{${sg{${sg{${readfile{/etc/exim/lists/blockhosts}{|}}}{\\|+}{|}}}{#.*?\\|}{}}}{\\|\$}{}}
then
headers add "X-Temp-Received: YES"
endif
###################################################
# Tests RECEIVED for Allowed Hosts
# (bluesky|ctyme)
if "$h_Received:" matches
${sg{${sg{${sg{${readfile{/etc/exim/lists/whitehosts}{|}}}{\\|+}{|}}}{#.*?\\|}{}}}{\\|\$}{}}
then
headers add "X-Temp-Whitehosts: YES"
endif
###################################################
# Tests SUBJECT for White Phrases
# \b(Newly matched listings|exim|xml-dev)\b
if "$h_Subject:" matches
\\b\x28${sg{${sg{${sg{${readfile{/etc/exim/lists/whitesubject}{|}}}{\\|+}{|}}}{#.*?\\|}{}}}{\\|\$}{}}\x29\\b
then
headers add "X-Temp-Whitesubject: YES"
endif
###################################################
# Tests for White Phrases - Words Spammers rarely use
# \b(imap|chickenhawk|exim)\b
if "$h_Subject:$message_body:" matches
\\b\x28${sg{${sg{${sg{${readfile{/etc/exim/lists/whitephrase}{|}}}{\\|+}{|}}}{#.*?\\|}{}}}{\\|\$}{}}\x29\\b
then
headers add "X-Temp-Whitephrase: YES"
endif
###################################################
# Tests Deliberately Misspelled Words
# (v1agra|v i a g r a)
if "$h_Subject:$message_body" matches
\x28${sg{${sg{${sg{${readfile{/etc/exim/lists/blockspelling}{|}}}{\\|+}{|}}}{#.*?\\|}{}}}{\\|\$}{}}\x29
then
headers add "X-Temp-Spelling: YES"
endif
###################################################
# Tests for LINKS to Banned Sites - BLACK LINKS
# (
http://|mailto:).{0,15}(farmsex.com|nastybitches.com)
if "$message_body:" matches
\x28http\:\/\/\|mailto\:\x29.{0,15}\x28${sg{${sg{${sg{${readfile{/etc/exim/lists/blocklinks}{|}}}{\\|+}{|}}}{#.*?\\|}{}}}{\\|\$}{}}\x29.{0,10}\/
then
headers add "X-Temp-Blocklink: YES"
endif
###################################################
# Tests for links to good web sites - WHITE LINKS
# (
http://|mailto:).{0,15}(nytimes.com|eff.org)
if "$message_body:" matches
\x28http\:\/\/\|mailto\:\x29.{0,15}\x28${sg{${sg{${sg{${readfile{/etc/exim/lists/whitelink}{|}}}{\\|+}{|}}}{#.*?\\|}{}}}{\\|\$}{}}\x29
then
headers add "X-Temp-Whitelink: YES"
endif
####################################################
# Tests LINKS for phrases that would be likely to spam sites (low scoring)
# (
http://|mailto:).{0,15}(casino|cash|dealz).{0,10}/
if "$message_body:" matches
\x28http\:\/\/\|mailto\:\x29.{0,15}\x28${sg{${sg{${sg{${readfile{/etc/exim/lists/linkphrase}{|}}}{\\|+}{|}}}{#.*?\\|}{}}}{\\|\$}{}}\x29.{0,10}\/
then
headers add "X-Temp-Linkphrase: YES"
endif
###################################################
# This filter tests for references to DEAD ACCOUNTS
# \b(noboby@???|nobody@???)
if "$message_headers" matches
\\b\x28${sg{${sg{${sg{${readfile{/etc/exim/deadaccounts}{|}}}{\\|+}{|}}}{#.*?\\|}{}}}{\\|\$}{}}\x29
then
headers add "X-Temp-Deadaccount: YES"
endif
# ---- Final Endif
endif
Here's the SA rules to go with it.
header BLACKLIST_FROM ALL =~ /X-Temp-From/
describe BLACKLIST_FROM From Address matches Blacklist
header BLACKLIST_FROMPHRASE ALL =~ /X-Temp-Fromphrase/
describe BLACKLIST_FROMPHRASE From address has phrase matching Blacklist
header BLACKLIST_RECEIVED ALL =~ /X-Temp-Received/
describe BLACKLIST_RECEIVED Host matches Blacklist
header BLACKLIST_LINKS ALL =~ /X-Temp-Blocklink/
describe BLACKLIST_LINKS Links to Blacklist Site
header HOST_WHITELIST ALL =~ /X-Temp-Whitehosts/
describe HOST_WHITELIST Host Whitelist
header SUBJ_WHITELIST ALL =~ /X-Temp-Whitesubject/
describe SUBJ_WHITELIST Subject Whitelist
header BAD_SPELLING ALL =~ /X-Temp-Spelling/
describe BAD_SPELLING Deliberately Misspelled Words
header LINK_PHRASE ALL =~ /X-Temp-Linkphrase/
describe LINK_PHRASE Phrase within link
header FROM_PHRASE ALL =~ /X-Temp-Fromphrase/
describe FROM_PHRASE Phrase within From address
header WHITE_LINKS ALL =~ /X-Temp-Whitelink/
describe WHITE_LINKS Links to Nonspam Sites
header WHITE_PHRASE ALL =~ /X-Temp-Whitephrase/
describe WHITE_PHRASE Phrases in non-spam
header DEAD_ACCOUNT ALL =~ /X-Temp-Deadaccount/
describe DEAD_ACCOUNT Mail to Dead Account
score BLACKLIST_FROM 8
score BLACKLIST_FROMPHRASE 3
score BLACKLIST_RECEIVED 8
score BLACKLIST_LINKS 10
score BAD_SPELLING 7
score LINK_PHRASE 2
score FROM_PHRASE 2
score DEAD_ACCOUNT 9
score HOST_WHITELIST -15
score WHITE_LINKS -10
score SUBJ_WHITELIST -5
score WHITE_PHRASE -3