Hi
I have a patch for exim-4.50 to correct a timeout issue with spamd.
The background is that sometimes spamd either never reads data from
a connection it has accepted, or it never writes response data.
The exiscan spam.[ch] uses a 3600(!!) second timeout on spamd socket
reads, further, it blindly assumes that writes won't block so it
may never time out.
My work has been bitten by this several times. The results are
either duplicated email when the sender times out and retries, or
all the available incoming connections are used by sleeping exims
on a long timeout.
I have set the timeout to 120 seconds and barring long tcp connection
timeouts, is reasonably well enforced from the start of the spam
condition to the end. It also fixes the short write issue where
the write is interrupted by a signal.
Please have a look over it and let me know if there is anything
that you would like me to change so that it can be applied to the
exim distribution.
Ian
--
Ian Freislich
diff -udr exim-4.50.orig/src/exim.h exim-4.50/src/exim.h
--- exim-4.50.orig/src/exim.h Thu Feb 17 16:49:11 2005
+++ exim-4.50/src/exim.h Thu Apr 21 13:12:35 2005
@@ -92,6 +92,7 @@
#include <sys/file.h>
#include <dirent.h>
#include <netdb.h>
+#include <poll.h>
#include <pwd.h>
#include <grp.h>
#include <syslog.h>
diff -udr exim-4.50.orig/src/spam.c exim-4.50/src/spam.c
--- exim-4.50.orig/src/spam.c Thu Feb 17 16:49:11 2005
+++ exim-4.50/src/spam.c Fri Apr 22 07:19:29 2005
@@ -30,14 +30,17 @@
FILE *mbox_file;
int spamd_sock;
uschar spamd_buffer[32600];
- int i, j, offset;
+ int i, j, offset, result;
uschar spamd_version[8];
uschar spamd_score_char;
double spamd_threshold, spamd_score;
int spamd_report_offset;
uschar *p,*q;
int override = 0;
+ time_t start;
+ size_t read, wrote;
struct sockaddr_un server;
+ struct pollfd pollfd;
/* find the username from the option list */
if ((user_name = string_nextinlist(&list, &sep,
@@ -77,6 +80,7 @@
return DEFER;
};
+ start = time(NULL);
/* socket does not start with '/' -> network socket */
if (*spamd_address != '/') {
time_t now = time(NULL);
@@ -203,33 +207,67 @@
};
/* now send the file */
+ /* spamd sometimes accepts conections but doesn't read data off
+ * the connection. We make the file descriptor non-blocking so
+ * that the write will only write sufficient data without blocking
+ * and we poll the desciptor to make sure that we can write without
+ * blocking. Short writes are gracefully handled and if the whole
+ * trasaction takes too long it is aborted.
+ */
+ pollfd.fd = spamd_sock;
+ pollfd.events = POLLWRNORM;
+ fcntl(spamd_sock, F_SETFL, O_NONBLOCK);
do {
- j = fread(spamd_buffer,1,sizeof(spamd_buffer),mbox_file);
- if (j > 0) {
- i = send(spamd_sock,spamd_buffer,j,0);
- if (i != j) {
- log_write(0, LOG_MAIN|LOG_PANIC,
- "spam acl condition: error/short send to spamd");
+ read = fread(spamd_buffer,1,sizeof(spamd_buffer),mbox_file);
+ if (read > 0) {
+ offset = 0;
+again:
+ result = poll(&pollfd, 1, 1000);
+ if (result == -1 && errno == EINTR)
+ continue;
+ else if (result < 1) {
+ if (result == -1)
+ log_write(0, LOG_MAIN|LOG_PANIC,
+ "spam acl condition: %s on spamd socket", strerror(errno));
+ else {
+ if (time(NULL) - start < SPAMD_TIMEOUT)
+ goto again;
+ log_write(0, LOG_MAIN|LOG_PANIC,
+ "spam acl condition: timed out writing spamd socket");
+ }
close(spamd_sock);
fclose(mbox_file);
return DEFER;
- };
- };
+ }
+ wrote = send(spamd_sock,spamd_buffer + offset,read - offset,0);
+ if (offset + wrote != read) {
+ offset += wrote;
+ goto again;
+ }
+ }
+ }
+ while (!feof(mbox_file) && !ferror(mbox_file));
+ if (ferror(mbox_file)) {
+ log_write(0, LOG_MAIN|LOG_PANIC,
+ "spam acl condition: error reading spool file: %s", strerror(errno));
+ close(spamd_sock);
+ fclose(mbox_file);
+ return DEFER;
}
- while (j > 0);
fclose(mbox_file);
/* we're done sending, close socket for writing */
shutdown(spamd_sock,SHUT_WR);
- /* read spamd response */
+ /* read spamd response using what's left of the timeout.
+ */
memset(spamd_buffer, 0, sizeof(spamd_buffer));
offset = 0;
while((i = ip_recv(spamd_sock,
spamd_buffer + offset,
sizeof(spamd_buffer) - offset - 1,
- SPAMD_READ_TIMEOUT)) > 0 ) {
+ SPAMD_TIMEOUT - time(NULL) + start)) > 0 ) {
offset += i;
}
diff -udr exim-4.50.orig/src/spam.h exim-4.50/src/spam.h
--- exim-4.50.orig/src/spam.h Thu Feb 17 16:49:11 2005
+++ exim-4.50/src/spam.h Thu Apr 21 18:20:57 2005
@@ -11,8 +11,8 @@
#ifdef WITH_CONTENT_SCAN
-/* timeout for reading from spamd */
-#define SPAMD_READ_TIMEOUT 3600
+/* timeout for reading and writing spamd */
+#define SPAMD_TIMEOUT 120
/* maximum length of the spam bar */
#define MAX_SPAM_BAR_CHARS 50