Re: [exim-dev] Questions about integration of exim with rspa…

Etusivu
Poista viesti
Vastaa
Lähettäjä: Vsevolod Stakhov
Päiväys:  
Vastaanottaja: exim-dev
Aihe: Re: [exim-dev] Questions about integration of exim with rspamd spam filter
On 09/12/2012 05:52 AM, Phil Pennock wrote:
> On 2012-09-11 at 10:40 -0700, Todd Lyons wrote:
>> On Mon, Sep 10, 2012 at 11:13 AM, Vsevolod Stakhov
>> <vsevolod@???> wrote:
>>> I've seen the topic in this mailing list related to integration
>>> of rspamd and exim. I'm the author of rspamd and I'd really
>>> very appreciate if exim will be able to interact with rspamd
>>> natively.
>
> I think rspamd support is a good idea, as long as the code fits.
> I'd be very happy to see this in the 4.81 release (whenever it
> happens).


Excellent, so I've adopted the old patch for new rspamd and tweaked
configuration.

>>> 2) Why do you think that push_line function in the current
>>> patch is not good for exim (it uses writev for effective data
>>> transmission)?
>>
>> It's not that it's "not good for exim", rather it's just
>> different than the way it's normally done in exim. You chose to
>> use a r += push_line() way of adding to a string (adding to a
>> va_item array), whereas in exim everything else tends to either
>> create it all in one command (i.e the string_format() in the
>> current spam() function) or using string_snprintf().


Well, I've kept writev in place as from my opinion it is more efficient.

>>> 3) Can I introduce some new global variables like
>>> <spamd_action> - this can be useful as rspamd has 'actions':
>>> policies for recommended action with this email (like 'accept',
>>> 'reject', 'add header' etc)?
> I think that the best approach is to modify the "spamd_address"
> option parsing, to take a "variant=<spamassassin|rspam>" modifier
> on an end-point, with spamassassin being the default (for
> backwards compatibility). So the admin should be able to say:
>
> spamd_address = /var/run/rspam_socket variant=rspam or:
> spamd_address = 192.168.2.10 783 variant=rspam : \ 192.168.2.11 783
> variant=rspam : \ 192.168.2.12 783 variant=rspam


Well, I've added this option as 'variant=rspamd'. The other case
'variant=spamassassin' is not supported however, so without this
option exim will use SPAMC protocol, and with this option - RSPAMC.

The other thing I'm unsure about is $spamd_report. SA writes very
complex report with all symbols and tons of other (unusable from my
point of view) information. Rspamd can pass symbols data to exim, but
report must be created in exim itself. Currently I've added universal
$spamd_action variable that works for both SA and rspamd, but in case
of SA it can be just 'no action' or 'reject', while rspamd can pass
more advanced actions (like 'add header' or 'rewrite subject').

I've tested this patch on both SA and rspamd and it seems to be
working fine. But it can be surely improved by extending rspamd reports.

- --
Vsevolod Stakhov
diff --git a/src/src/expand.c b/src/src/expand.c
index 7803862..888226f 100644
--- a/src/src/expand.c
+++ b/src/src/expand.c
@@ -608,6 +608,7 @@ static var_entry var_table[] = {
   { "sn8",                 vtype_filter_int,  &filter_sn[8] },
   { "sn9",                 vtype_filter_int,  &filter_sn[9] },
 #ifdef WITH_CONTENT_SCAN
+  { "spam_action",         vtype_stringptr,   &spam_action },
   { "spam_bar",            vtype_stringptr,   &spam_bar },
   { "spam_report",         vtype_stringptr,   &spam_report },
   { "spam_score",          vtype_stringptr,   &spam_score },
diff --git a/src/src/globals.c b/src/src/globals.c
index bcbe12d..6e0d3a4 100644
--- a/src/src/globals.c
+++ b/src/src/globals.c
@@ -1169,6 +1169,7 @@ BOOL    smtp_use_size          = FALSE;
 uschar *spamd_address          = US"127.0.0.1 783";
 uschar *spam_bar               = NULL;
 uschar *spam_report            = NULL;
+uschar *spam_action            = NULL;
 uschar *spam_score             = NULL;
 uschar *spam_score_int         = NULL;
 #endif
diff --git a/src/src/globals.h b/src/src/globals.h
index 16caa41..f26a037 100644
--- a/src/src/globals.h
+++ b/src/src/globals.h
@@ -744,6 +744,7 @@ extern BOOL    smtp_use_size;          /* Global for passed connections */
 extern uschar *spamd_address;          /* address for the spamassassin daemon */
 extern uschar *spam_bar;               /* the spam "bar" (textual representation of spam_score) */
 extern uschar *spam_report;            /* the spamd report (multiline) */
+extern uschar *spam_action;            /* the spamd action */
 extern uschar *spam_score;             /* the spam score (float) */
 extern uschar *spam_score_int;         /* spam_score * 10 (int) */
 #endif
diff --git a/src/src/spam.c b/src/src/spam.c
index 63395f2..1e1e2b9 100644
--- a/src/src/spam.c
+++ b/src/src/spam.c
@@ -14,12 +14,16 @@
 uschar spam_score_buffer[16];
 uschar spam_score_int_buffer[16];
 uschar spam_bar_buffer[128];
+uschar spam_action_buffer[32];
 uschar spam_report_buffer[32600];
 uschar prev_user_name[128] = "";
 int spam_ok = 0;
 int spam_rc = 0;
 uschar *prev_spamd_address_work = NULL;


+/* push formatted line into vector */
+static int spam_push_line(struct iovec *iov, int i, const char *fmt, ...);
+
 int spam(uschar **listptr) {
   int sep = 0;
   uschar *list = *listptr;
@@ -29,10 +33,11 @@ int spam(uschar **listptr) {
   FILE *mbox_file;
   int spamd_sock = -1;
   uschar spamd_buffer[32600];
-  int i, j, offset, result;
+  int i, j, offset, result, is_rspamd;
   uschar spamd_version[8];
+  uschar spamd_short_result[8];
   uschar spamd_score_char;
-  double spamd_threshold, spamd_score;
+  double spamd_threshold, spamd_score, spamd_reject_score;
   int spamd_report_offset;
   uschar *p,*q;
   int override = 0;
@@ -126,8 +131,15 @@ int spam(uschar **listptr) {
       spamd_address_container *this_spamd =
         (spamd_address_container *)store_get(sizeof(spamd_address_container));


+      /* Check for spamd variant */
+      if( Ustrstr(address, "variant=rspamd") != NULL ) {
+        this_spamd->is_rspamd = 1;  
+      }
+      else {
+        this_spamd->is_rspamd = 0;  
+      }
       /* grok spamd address and port */
-      if( sscanf(CS address, "%s %u", this_spamd->tcp_addr, &(this_spamd->tcp_port)) != 2 ) {
+      if( sscanf(CS address, "%s %hu", this_spamd->tcp_addr, &(this_spamd->tcp_port)) != 2 ) {
         log_write(0, LOG_MAIN,
           "spam acl condition: warning - invalid spamd address: '%s'", address);
         continue;
@@ -171,6 +183,7 @@ int spam(uschar **listptr) {
                       spamd_address_vector[current_server]->tcp_port,
                       5 ) > -1) {
         /* connection OK */
+        is_rspamd = spamd_address_vector[current_server]->is_rspamd;
         break;
       };


@@ -207,12 +220,28 @@ int spam(uschar **listptr) {
     }


     server.sun_family = AF_UNIX;
-    Ustrcpy(server.sun_path, spamd_address_work);
+    p = Ustrstr(spamd_address_work, "variant=rspamd");
+    if( p != NULL ) {
+      is_rspamd = TRUE;
+      /* strip spaces */
+      p --;
+      while (p > spamd_address_work && isspace (*p)) {
+        p --;
+      }
+      Ustrncpy(server.sun_path, spamd_address_work, p - spamd_address_work + 1);
+      /* zero terminate */
+      server.sun_path[p - spamd_address_work + 1] = 0;
+    }
+    else {
+      is_rspamd = FALSE;  
+      Ustrcpy(server.sun_path, spamd_address_work);
+    }
+


     if (connect(spamd_sock, (struct sockaddr *) &server, sizeof(struct sockaddr_un)) < 0) {
       log_write(0, LOG_MAIN|LOG_PANIC,
                 "malware acl condition: spamd: unable to connect to UNIX socket %s (%s)",
-                spamd_address_work, strerror(errno) );
+                server.sun_path, strerror(errno) );
       (void)fclose(mbox_file);
       (void)close(spamd_sock);
       return DEFER;
@@ -229,21 +258,53 @@ int spam(uschar **listptr) {
   }


   /* now we are connected to spamd on spamd_sock */
-  (void)string_format(spamd_buffer,
-           sizeof(spamd_buffer),
-           "REPORT SPAMC/1.2\r\nUser: %s\r\nContent-length: %ld\r\n\r\n",
-           user_name,
-           mbox_size);
-
-  /* send our request */
-  if (send(spamd_sock, spamd_buffer, Ustrlen(spamd_buffer), 0) < 0) {
-    (void)close(spamd_sock);
-    log_write(0, LOG_MAIN|LOG_PANIC,
-         "spam acl condition: spamd send failed: %s", strerror(errno));
-    (void)fclose(mbox_file);
-    (void)close(spamd_sock);
-    return DEFER;
-  };
+  if (is_rspamd) { 
+    /* rspamd variant */
+    int r, request_p = 0;
+    const char *helo;
+    struct iovec request_v[64];
+
+    r = 0;
+    r += spam_push_line(request_v, request_p++, "CHECK RSPAMC/1.3\r\n");
+    r += spam_push_line(request_v, request_p++, "Content-length: %lu\r\n", mbox_size);
+    r += spam_push_line(request_v, request_p++, "Queue-Id: %s\r\n", message_id);
+    r += spam_push_line(request_v, request_p++, "From: <%s>\r\n", sender_address);
+    r += spam_push_line(request_v, request_p++, "Recipient-Number: %d\r\n", recipients_count);
+    /* copy all recipients as well */
+    for (i = 0; i < recipients_count && r < 55; i ++)
+        r += spam_push_line(request_v, request_p++, "Rcpt: <%s>\r\n", recipients_list[i].address);
+    if ((helo = expand_string(US"$sender_helo_name")) != NULL && *helo != '\0')
+      r += spam_push_line(request_v, request_p++, "Helo: %s\r\n", helo);
+    if (sender_host_address != NULL)
+      r += spam_push_line(request_v, request_p++, "IP: %s\r\n", sender_host_address);
+    r += spam_push_line(request_v, request_p++, "\r\n");
+    if (writev(spamd_sock, request_v, request_p) < 0) {
+        (void)close(spamd_sock);
+        log_write(0, LOG_MAIN|LOG_PANIC,
+            "spam acl condition: spamd (rspamd) send failed: %s", strerror(errno));
+        (void)fclose(mbox_file);
+        (void)close(spamd_sock);
+        return DEFER;
+    }
+  }
+  else {
+    /* spamassassin variant */
+    (void)string_format(spamd_buffer,
+            sizeof(spamd_buffer),
+            "REPORT SPAMC/1.2\r\nUser: %s\r\nContent-length: %ld\r\n\r\n",
+            user_name,
+            mbox_size);
+    /* send our request */
+    if (send(spamd_sock, spamd_buffer, Ustrlen(spamd_buffer), 0) < 0) {
+        (void)close(spamd_sock);
+        log_write(0, LOG_MAIN|LOG_PANIC,
+            "spam acl condition: spamd send failed: %s", strerror(errno));
+        (void)fclose(mbox_file);
+        (void)close(spamd_sock);
+        return DEFER;
+    };
+  }
+


/* now send the file */
/* spamd sometimes accepts conections but doesn't read data off
@@ -346,60 +407,103 @@ again:
/* reading done */
(void)close(spamd_sock);

-  /* dig in the spamd output and put the report in a multiline header, if requested */
-  if( sscanf(CS spamd_buffer,"SPAMD/%7s 0 EX_OK\r\nContent-length: %*u\r\n\r\n%lf/%lf\r\n%n",
-             spamd_version,&spamd_score,&spamd_threshold,&spamd_report_offset) != 3 ) {
+  if (!is_rspamd) {
+    /* dig in the spamd output and put the report in a multiline header, if requested */
+    if( sscanf(CS spamd_buffer,"SPAMD/%7s 0 EX_OK\r\nContent-length: %*u\r\n\r\n%lf/%lf\r\n%n",
+                spamd_version,&spamd_score,&spamd_threshold,&spamd_report_offset) != 3 ) {


-    /* try to fall back to pre-2.50 spamd output */
-    if( sscanf(CS spamd_buffer,"SPAMD/%7s 0 EX_OK\r\nSpam: %*s ; %lf / %lf\r\n\r\n%n",
-               spamd_version,&spamd_score,&spamd_threshold,&spamd_report_offset) != 3 ) {
-      log_write(0, LOG_MAIN|LOG_PANIC,
-         "spam acl condition: cannot parse spamd output");
-      return DEFER;
+        /* try to fall back to pre-2.50 spamd output */
+        if( sscanf(CS spamd_buffer,"SPAMD/%7s 0 EX_OK\r\nSpam: %*s ; %lf / %lf\r\n\r\n%n",
+                spamd_version,&spamd_score,&spamd_threshold,&spamd_report_offset) != 3 ) {
+        log_write(0, LOG_MAIN|LOG_PANIC,
+            "spam acl condition: cannot parse spamd output");
+        return DEFER;
+        };
     };
-  };


-  /* Create report. Since this is a multiline string,
-  we must hack it into shape first */
-  p = &spamd_buffer[spamd_report_offset];
-  q = spam_report_buffer;
-  while (*p != '\0') {
-    /* skip \r */
-    if (*p == '\r') {
-      p++;
-      continue;
-    };
-    *q = *p;
-    q++;
-    if (*p == '\n') {
-      /* add an extra space after the newline to ensure
-      that it is treated as a header continuation line */
-      *q = ' ';
-      q++;
+    /* Create report. Since this is a multiline string,
+    we must hack it into shape first */
+    p = &spamd_buffer[spamd_report_offset];
+    q = spam_report_buffer;
+    while (*p != '\0') {
+        /* skip \r */
+        if (*p == '\r') {
+        p++;
+        continue;
+        };
+        *q = *p;
+        q++;
+        if (*p == '\n') {
+        /* add an extra space after the newline to ensure
+        that it is treated as a header continuation line */
+        *q = ' ';
+        q++;
+        };
+        p++;
     };
-    p++;
-  };
-  /* NULL-terminate */
-  *q = '\0';
-  q--;
-  /* cut off trailing leftovers */
-  while (*q <= ' ') {
+    /* NULL-terminate */
     *q = '\0';
     q--;
-  };
+    /* cut off trailing leftovers */
+    while (*q <= ' ') {
+        *q = '\0';
+        q--;
+    };
+    if( spamd_score >= spamd_threshold ) {
+      Ustrcpy(spam_action_buffer, "reject");
+    }
+    else {
+      Ustrcpy(spam_action_buffer, "no action");
+    }
+  }
+  else {
+    /* rspamd variant of reply */
+    int r;
+    if( (r = sscanf(CS spamd_buffer,"RSPAMD/%7s 0 EX_OK\r\nMetric: default; %7s %lf / %lf / %lf\r\n%n",
+        spamd_version,spamd_short_result,&spamd_score,&spamd_threshold,&spamd_reject_score,&spamd_report_offset)) != 5 ) {
+      log_write(0, LOG_MAIN|LOG_PANIC,
+            "spam acl condition: cannot parse spamd output: %d", r);
+      return DEFER;
+    };
+    /* now parse action */
+    p = &spamd_buffer[spamd_report_offset];
+
+    if( Ustrncmp(p, "Action: ", sizeof("Action: ") - 1) == 0 ) {
+      p += sizeof("Action: ") - 1;
+      q = &spam_action_buffer[0];
+      while (*p && *p != '\r' && (q - spam_action_buffer) < sizeof(spam_action_buffer) - 1) {
+        *q++ = *p++;
+      }
+      *q = '\0';
+    }
+    /* make a simple report */
+    if( spamd_score >= spamd_threshold ) {
+      p = "likely spam";
+    }
+    else if ( Ustrcmp (spam_action_buffer, "no action") == 0 ) {
+      p = "unlikely spam";
+    }
+    else {
+      p = "probably spam";
+    }
+    string_format(spam_report_buffer, sizeof(spam_report_buffer), "This message is %s.", p);
+  }
+
+  /* common spamd actions */
   spam_report = spam_report_buffer;
+  spam_action = spam_action_buffer;


   /* create spam bar */
   spamd_score_char = spamd_score > 0 ? '+' : '-';
   j = abs((int)(spamd_score));
   i = 0;
   if( j != 0 ) {
-    while((i < j) && (i <= MAX_SPAM_BAR_CHARS))
-       spam_bar_buffer[i++] = spamd_score_char;
+      while((i < j) && (i <= MAX_SPAM_BAR_CHARS))
+      spam_bar_buffer[i++] = spamd_score_char;
   }
   else{
-    spam_bar_buffer[0] = '/';
-    i = 1;
+      spam_bar_buffer[0] = '/';
+      i = 1;
   }
   spam_bar_buffer[i] = '\0';
   spam_bar = spam_bar_buffer;
@@ -415,12 +519,12 @@ again:


   /* compare threshold against score */
   if (spamd_score >= spamd_threshold) {
-    /* spam as determined by user's threshold */
-    spam_rc = OK;
+      /* spam as determined by user's threshold */
+      spam_rc = OK;
   }
   else {
-    /* not spam */
-    spam_rc = FAIL;
+      /* not spam */
+      spam_rc = FAIL;
   };


/* remember expanded spamd_address if needed */
@@ -440,4 +544,31 @@ again:
};
}

+static int
+spam_push_line(struct iovec *iov, const int i, const char *fmt, ...)
+{
+  va_list ap;
+  size_t len;
+  char buf[512];
+
+  if (i >= 64) {
+    log_write(0, LOG_MAIN, "rspam: %s: index out of bounds", __FUNCTION__);
+    return (-1);
+  }
+
+  va_start(ap, fmt);
+  len = vsnprintf(buf, sizeof(buf), fmt, ap);
+  va_end(ap);
+
+  iov[i].iov_base = string_copy(US buf);
+  iov[i].iov_len = len;
+
+  if (len >= sizeof(buf)) {
+    log_write(0, LOG_MAIN, "rspam: %s: error, string was longer than %d", __FUNCTION__, (int)sizeof(buf));
+    return (-1);
+  }
+
+  return 0;
+}
+
 #endif
diff --git a/src/src/spam.h b/src/src/spam.h
index ba700c8..6047c59 100644
--- a/src/src/spam.h
+++ b/src/src/spam.h
@@ -22,7 +22,8 @@


typedef struct spamd_address_container {
uschar tcp_addr[24];
- unsigned int tcp_port;
+ unsigned short int tcp_port;
+ unsigned is_rspamd:1;
} spamd_address_container;

#endif