------- You are receiving this mail because: -------
You are on the CC list for the bug.
http://bugs.exim.org/show_bug.cgi?id=789
Summary: Asleep Exim process
Product: Exim
Version: 4.69
Platform: x86
OS/Version: Linux
Status: NEW
Severity: bug
Priority: critical
Component: Delivery in general
AssignedTo: nigel@???
ReportedBy: mjtoro@???
CC: exim-dev@???
We have a large exim infrastructure running and very often we are watching some
asleep exim processes. They are becoming a problem because the mail delivery is
delayed even a week.
Some useful information bellow...
[root@buzon ~]$ ps -aux | grep -i -E "exim4 -Mc|exim -Mc"
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
root 984 0.0 0.2 10340 2288 ? S 09:03 0:00
/usr/sbin/exim4 -Mc 1L89B2-0000Fl-NR
[root@buzon ~]$ strace -p 984
Process 984 attached - interrupt to quit
time(NULL) = 1228379245
open("/etc/localtime", O_RDONLY) = 6
fstat64(6, {st_mode=S_IFREG|0644, st_size=946, ...}) = 0
fstat64(6, {st_mode=S_IFREG|0644, st_size=946, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) =
0xb7f1a000
read(6, "TZif\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\10\0\0\0\10"..., 4096) =
946
close(6) = 0
munmap(0xb7f1a000, 4096) = 0
write(4, "2008-12-04 09:27:25 francisco.lo"..., 494) = 494
time(NULL) = 1228379246
time(NULL) = 1228379246
stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0
stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0
stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0
socket(PF_FILE, SOCK_DGRAM, 0) = 6
fcntl64(6, F_SETFD, FD_CLOEXEC) = 0
connect(6, {sa_family=AF_FILE, path="/dev/log"}, 110) = 0
send(6, "<22>Dec 4 09:27:26 exim[984]: 2"..., 544, MSG_NOSIGNAL) = 544
open("/var/log/exim4/main.log", O_WRONLY|O_APPEND|O_LARGEFILE) = 7
fcntl64(7, F_GETFD) = 0
fcntl64(7, F_SETFD, FD_CLOEXEC) = 0
fstat64(7, {st_mode=S_IFREG|0640, st_size=1489668, ...}) = 0
write(7, "2008-12-04 09:27:26 1L89B2-0000F"..., 514) = 514
time(NULL) = 1228379246
write(5, "0\201\240\2\1\3c\201\232\4#o=empleados,o=juntade"..., 163) = 163
poll([{fd=5, events=POLLIN|POLLPRI|POLLERR|POLLHUP,
revents=POLLIN|POLLERR|POLLHUP}], 1, -1) = 1
read(5, 0x911fefb, 8) = -1 ECONNRESET (Connection reset by
peer)
time(NULL) = 1228379246
write(4, "2008-12-04 09:27:26 araceli.seba"..., 480) = 480
time(NULL) = 1228379246
time(NULL) = 1228379246
stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0
stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0
stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0
send(6, "<22>Dec 4 09:27:26 exim[984]: 2"..., 530, MSG_NOSIGNAL) = 530
time(NULL) = 1228379246
close(7) = 0
open("/var/log/exim4/main.log", O_WRONLY|O_APPEND|O_LARGEFILE) = 7
fcntl64(7, F_GETFD) = 0
fcntl64(7, F_SETFD, FD_CLOEXEC) = 0
fstat64(7, {st_mode=S_IFREG|0640, st_size=1490182, ...}) = 0
write(7, "2008-12-04 09:27:26 1L89B2-0000F"..., 500) = 500
time(NULL) = 1228379246
write(5, "0\201\235\2\1\4c\201\227\4#o=empleados,o=juntade"..., 160) = -1 EPIPE
(Broken pipe)
--- SIGPIPE (Broken pipe) @ 0 (0) ---
time(NULL) = 1228379246
time(NULL) = 1228379246
write(4, "2008-12-04 09:27:26 manuelg.garc"..., 474) = 474
time(NULL) = 1228379246
time(NULL) = 1228379246
stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0
stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0
stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0
send(6, "<22>Dec 4 09:27:26 exim[984]: 2"..., 524, MSG_NOSIGNAL) = 524
time(NULL) = 1228379246
close(7) = 0
open("/var/log/exim4/main.log", O_WRONLY|O_APPEND|O_LARGEFILE) = 7
fcntl64(7, F_GETFD) = 0
fcntl64(7, F_SETFD, FD_CLOEXEC) = 0
fstat64(7, {st_mode=S_IFREG|0640, st_size=1490682, ...}) = 0
write(7, "2008-12-04 09:27:26 1L89B2-0000F"..., 494) = 494
write(5, "0\5\2\1\5B\0", 7) = -1 EPIPE (Broken pipe)
--- SIGPIPE (Broken pipe) @ 0 (0) ---
shutdown(5, 2 /* send and receive */) = -1 ENOTCONN (Transport endpoint is
not connected)
close(5) = 0
geteuid32() = 0
getegid32() = 102
setgid32(102) = 0
setuid32(102) = 0
rt_sigaction(SIGTERM, {SIG_IGN}, {SIG_DFL}, 8) = 0
time(NULL) = 1228379246
open("/colas/exim4/db/retry.lockfile", O_RDWR|O_LARGEFILE) = 5
alarm(60) = 0
fcntl64(5, F_SETLKW64, {type=F_WRLCK, whence=SEEK_SET, start=0, len=0},
0xbfef8e64) = 0
alarm(0) = 60
open("/proc/stat", O_RDONLY) = 8
fstat64(8, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) =
0xb7f1a000
read(8, "cpu 4550350 20 769622 26617089 "..., 4096) = 777
read(8, "", 4096) = 0
close(8) = 0
munmap(0xb7f1a000, 4096) = 0
stat64("/colas/exim4/db/retry", {st_mode=S_IFREG|0640, st_size=1134592, ...}) =
0
open("/colas/exim4/db/retry", O_RDWR|O_LARGEFILE) = 8
fcntl64(8, F_SETFD, FD_CLOEXEC) = 0
read(8, "\0\0\0\0\1\0\0\0\0\0\0\0a\25\6\0\10\0\0\0\0\20\0\0\0\10"..., 512) =
512
close(8) = 0
stat64("DB_CONFIG", 0xbfef892c) = -1 ENOENT (No such file or directory)
open("DB_CONFIG", O_RDONLY|O_LARGEFILE) = -1 ENOENT (No such file or directory)
stat64("/var/tmp", {st_mode=S_IFDIR|S_ISVTX|0777, st_size=4096, ...}) = 0
gettimeofday({1228379246, 59517}, NULL) = 0
stat64("__db.002", 0xbfef89cc) = -1 ENOENT (No such file or directory)
open("/colas/exim4/db/retry", O_RDWR|O_LARGEFILE) = 8
fcntl64(8, F_SETFD, FD_CLOEXEC) = 0
fstat64(8, {st_mode=S_IFREG|0640, st_size=1134592, ...}) = 0
pread64(8, "\0\0\0\0\1\0\0\0\0\0\0\0a\25\6\0\10\0\0\0\0\20\0\0\0\10"..., 4096,
0) = 4096
pread64(8, "\0\0\0\0\1\0\0\0\222\0\0\0\0\0\0\0\0\0\0\0&\0b\3\0\2\331"..., 4096,
598016) = 4096
time(NULL) = 1228379246
time(NULL) = 1228379246
time(NULL) = 1228379246
pwrite64(8, "\0\0\0\0\1\0\0\0\0\0\0\0a\25\6\0\10\0\0\0\0\20\0\0\0\10"..., 4096,
0) = 4096
pwrite64(8, "\0\0\0\0\1\0\0\0\222\0\0\0\0\0\0\0\0\0\0\0(\0\224\2\0\2"..., 4096,
598016) = 4096
fdatasync(8) = 0
close(8) = 0
open("/colas/exim4/db/retry", O_RDWR|O_LARGEFILE) = 8
fcntl64(8, F_SETFD, FD_CLOEXEC) = 0
fdatasync(8) = 0
close(8) = 0
close(5) = 0
time(NULL) = 1228379246
pipe([5, 8]) = 0
rt_sigaction(SIGCHLD, {SIG_DFL}, {SIG_DFL}, 8) = 0
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD,
child_tidptr=0xb7f09708) = 16951
close(5) = 0
fcntl64(8, F_GETFL) = 0x1 (flags O_WRONLY)
fstat64(8, {st_mode=S_IFIFO|0600, st_size=0, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) =
0xb7f1a000
_llseek(8, 0, 0xbfef8ddc, SEEK_CUR) = -1 ESPIPE (Illegal seek)
write(8, "Auto-Submitted: auto-replied\nFro"..., 1062) = 1062
close(8) = 0
munmap(0xb7f1a000, 4096) = 0
waitpid(16951, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0) = 16951
--- SIGCHLD (Child exited) @ 0 (0) ---
rt_sigaction(SIGCHLD, {SIG_DFL}, {SIG_DFL}, 8) = 0
open("/colas/exim4/input/2/hdr.984", O_RDWR|O_CREAT|O_EXCL|O_LARGEFILE, 0640) =
5
fchown32(5, 102, 102) = 0
fchmod(5, 0640) = 0
fcntl64(5, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE)
fstat64(5, {st_mode=S_IFREG|0640, st_size=0, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) =
0xb7f1a000
_llseek(5, 0, [0], SEEK_CUR) = 0
write(5, "1L89B2-0000Fl-NR-H\nexim 102 102\n"..., 464) = 464
fstat64(5, {st_mode=S_IFREG|0640, st_size=464, ...}) = 0
write(5, "214P Received: from [10.240.229."..., 1617) = 1617
fsync(5) = 0
fstat64(5, {st_mode=S_IFREG|0640, st_size=2081, ...}) = 0
close(5) = 0
munmap(0xb7f1a000, 4096) = 0
rename("/colas/exim4/input/2/hdr.984",
"/colas/exim4/input/2/1L89B2-0000Fl-NR-H") = 0
open("/colas/exim4/input/2/.", O_RDONLY|O_LARGEFILE) = 5
fsync(5) = 0
close(5) = 0
close(4) = 0
munmap(0xb7f1b000, 4096) = 0
unlink("/colas/exim4/input/2/1L89B2-0000Fl-NR-J") = -1 ENOENT (No such file or
directory)
close(3) = 0
exit_group(0) = ?
Process 984 detached
[root@buzon ~]# ls -ltr 1L89B2-0000Fl-NR*
-rw-r----- 1 exim exim 977 dic 4 09:03 1L89B2-0000Fl-NR-D
-rw-r----- 1 exim exim 2081 dic 4 09:27 1L89B2-0000Fl-NR-H
Another process:
[root@buzon ~]# ps -ef|grep exim4
root 24501 1 0 11:13 ? 00:00:00 /usr/sbin/exim4 -Mc
1LAOLr-0006ME-8o
[root@buzon ~]# lsof -p 24501
OMMAND PID USER FD TYPE DEVICE SIZE NODE NAME
exim4 24501 root cwd DIR 9,2 4096 11555810 /colas/exim4
exim4 24501 root rtd DIR 9,2 4096 2 /
exim4 24501 root txt REG 9,2 911795 6761799 /usr/sbin/exim-4.69-1
exim4 24501 root mem REG 9,2 75284 6766476 /usr/lib/libz.so.1.2.3
exim4 24501 root mem REG 9,2 121684 16369530 /lib/ld-2.5.so
exim4 24501 root mem REG 9,2 1011024 16368031 /lib/libdb-4.3.so
exim4 24501 root mem REG 9,2 1576952 16369531 /lib/libc-2.5.so
exim4 24501 root mem REG 9,2 174508 6749880
/usr/lib/libgssapi_krb5.so.2.2
exim4 24501 root mem REG 9,2 101036 16369538 /lib/libnsl-2.5.so
exim4 24501 root mem REG 9,2 1238928 16368033
/lib/libcrypto.so.0.9.8b
exim4 24501 root mem REG 9,2 318839 6754918
/usr/lib/libspf2.so.2.0.0
exim4 24501 root mem REG 9,2 133764 6766384 /usr/lib/libpq.so.4.1
exim4 24501 root mem REG 9,2 99252 6750115
/usr/lib/libsasl2.so.2.0.22
exim4 24501 root mem REG 9,2 240444 6755749
/usr/lib/libldap-2.3.so.0.2.15
exim4 24501 root mem REG 9,2 30596 6767446
/usr/lib/libkrb5support.so.0.1
exim4 24501 root mem REG 9,2 16528 16368025 /lib/libdl-2.5.so
exim4 24501 root mem REG 9,2 7720 16369553 /lib/libcom_err.so.2.1
exim4 24501 root mem REG 9,2 76392 16369537 /lib/libresolv-2.5.so
exim4 24501 root mem REG 9,2 53824 6767378
/usr/lib/liblber-2.3.so.0.2.15
exim4 24501 root mem REG 9,2 1453200 7692961
/usr/lib/mysql/libmysqlclient.so.15.0.0
exim4 24501 root mem REG 9,2 46740 16368041
/lib/libnss_files-2.5.so
exim4 24501 root mem REG 9,2 208344 16368018 /lib/libm-2.5.so
exim4 24501 root mem REG 9,2 157196 6767448
/usr/lib/libk5crypto.so.3.0
exim4 24501 root mem REG 9,2 559532 6767449
/usr/lib/libkrb5.so.3.2
exim4 24501 root mem REG 9,2 125564 16368029 /lib/libpthread-2.5.so
exim4 24501 root mem REG 9,2 280464 16368035 /lib/libssl.so.0.9.8b
exim4 24501 root mem REG 9,2 1241272 6843928
/usr/lib/perl5/5.8.8/i386-linux-thread-multi/CORE/libperl.so
exim4 24501 root mem REG 9,2 15264 16369540 /lib/libutil-2.5.so
exim4 24501 root mem REG 9,2 27836 16369539 /lib/libcrypt-2.5.so
exim4 24501 root 0u CHR 1,3 1317 /dev/null
exim4 24501 root 1u CHR 1,3 1317 /dev/null
exim4 24501 root 2u CHR 1,3 1317 /dev/null
exim4 24501 root 3uw REG 9,2 4010 11555953
/colas/exim4/input/r/1LAOLr-0006ME-8o-D
exim4 24501 root 4w REG 9,2 199 11555961
/colas/exim4/msglog/r/1LAOLr-0006ME-8o
exim4 24501 root 5u IPv4 14979272 TCP
************:60283->ldap:ldap (ESTABLISHED)
[root@buzon ~]# strace -p 24501
Process 24501 attached - interrupt to quit
time(NULL) = 1228914090
open("/etc/localtime", O_RDONLY) = 6
fstat64(6, {st_mode=S_IFREG|0644, st_size=946, ...}) = 0
fstat64(6, {st_mode=S_IFREG|0644, st_size=946, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) =
0xb7ff3000
read(6, "TZif\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\10\0\0\0\10"..., 4096) =
946
close(6) = 0
munmap(0xb7ff3000, 4096) = 0
write(4, "2008-12-10 14:01:30 carlos.casad"..., 480) = 480
time(NULL) = 1228914090
time(NULL) = 1228914090
stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0
stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0
stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=946, ...}) = 0
socket(PF_FILE, SOCK_DGRAM, 0) = 6
fcntl64(6, F_SETFD, FD_CLOEXEC) = 0
connect(6, {sa_family=AF_FILE, path="/dev/log"}, 110) = 0
send(6, "<22>Dec 10 14:01:30 exim[24501]:"..., 532, MSG_NOSIGNAL) = 532
open("/var/log/exim4/main.log", O_WRONLY|O_APPEND|O_LARGEFILE) = 7
fcntl64(7, F_GETFD) = 0
fcntl64(7, F_SETFD, FD_CLOEXEC) = 0
fstat64(7, {st_mode=S_IFREG|0640, st_size=2844313, ...}) = 0
write(7, "2008-12-10 14:01:30 1LAOLr-0006M"..., 500) = 500
write(5, "0\5\2\1\3B\0", 7) = 7
shutdown(5, 2 /* send and receive */) = 0
close(5) = 0
geteuid32() = 0
getegid32() = 102
setgid32(102) = 0
setuid32(102) = 0
rt_sigaction(SIGTERM, {SIG_IGN}, {SIG_DFL}, 8) = 0
time(NULL) = 1228914090
open("/colas/exim4/db/retry.lockfile", O_RDWR|O_LARGEFILE) = 5
alarm(60) = 0
fcntl64(5, F_SETLKW64, {type=F_WRLCK, whence=SEEK_SET, start=0, len=0},
0xbfc64bc4) = 0
alarm(0) = 60
open("/proc/stat", O_RDONLY) = 8
fstat64(8, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) =
0xb7ff3000
read(8, "cpu 11344351 160 2202934 119297"..., 4096) = 789
read(8, "", 4096) = 0
close(8) = 0
munmap(0xb7ff3000, 4096) = 0
stat64("/colas/exim4/db/retry", {st_mode=S_IFREG|0640, st_size=1134592, ...}) =
0
open("/colas/exim4/db/retry", O_RDWR|O_LARGEFILE) = 8
fcntl64(8, F_SETFD, FD_CLOEXEC) = 0
read(8, "\0\0\0\0\1\0\0\0\0\0\0\0a\25\6\0\10\0\0\0\0\20\0\0\0\10"..., 512) =
512
close(8) = 0
stat64("DB_CONFIG", 0xbfc6468c) = -1 ENOENT (No such file or directory)
open("DB_CONFIG", O_RDONLY|O_LARGEFILE) = -1 ENOENT (No such file or directory)
stat64("/var/tmp", {st_mode=S_IFDIR|S_ISVTX|0777, st_size=4096, ...}) = 0
gettimeofday({1228914090, 239287}, NULL) = 0
stat64("__db.002", 0xbfc6472c) = -1 ENOENT (No such file or directory)
open("/colas/exim4/db/retry", O_RDWR|O_LARGEFILE) = 8
fcntl64(8, F_SETFD, FD_CLOEXEC) = 0
fstat64(8, {st_mode=S_IFREG|0640, st_size=1134592, ...}) = 0
pread64(8, "\0\0\0\0\1\0\0\0\0\0\0\0a\25\6\0\10\0\0\0\0\20\0\0\0\10"..., 4096,
0) = 4096
pread64(8, "\0\0\0\0\1\0\0\0\222\0\0\0\0\0\0\0\0\0\0\0(\0\305\2\0\2"..., 4096,
598016) = 4096
time(NULL) = 1228914090
pwrite64(8, "\0\0\0\0\1\0\0\0\0\0\0\0a\25\6\0\10\0\0\0\0\20\0\0\0\10"..., 4096,
0) = 4096
pwrite64(8, "\0\0\0\0\1\0\0\0\222\0\0\0\0\0\0\0\0\0\0\0*\0\367\1\0\2"..., 4096,
598016) = 4096
fdatasync(8) = 0
close(8) = 0
open("/colas/exim4/db/retry", O_RDWR|O_LARGEFILE) = 8
fcntl64(8, F_SETFD, FD_CLOEXEC) = 0
fdatasync(8) = 0
close(8) = 0
close(5) = 0
time(NULL) = 1228914090
pipe([5, 8]) = 0
rt_sigaction(SIGCHLD, {SIG_DFL}, {SIG_DFL}, 8) = 0
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD,
child_tidptr=0xb7fe2708) = 5839
close(5) = 0
fcntl64(8, F_GETFL) = 0x1 (flags O_WRONLY)
fstat64(8, {st_mode=S_IFIFO|0600, st_size=0, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) =
0xb7ff3000
_llseek(8, 0, 0xbfc64b3c, SEEK_CUR) = -1 ESPIPE (Illegal seek)
write(8, "Auto-Submitted: auto-replied\nFro"..., 990) = 990
close(8) = 0
munmap(0xb7ff3000, 4096) = 0
waitpid(5839, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0) = 5839
--- SIGCHLD (Child exited) @ 0 (0) ---
rt_sigaction(SIGCHLD, {SIG_DFL}, {SIG_DFL}, 8) = 0
open("/colas/exim4/input/r/hdr.24501", O_RDWR|O_CREAT|O_EXCL|O_LARGEFILE, 0640)
= 5
fchown32(5, 102, 102) = 0
fchmod(5, 0640) = 0
fcntl64(5, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE)
fstat64(5, {st_mode=S_IFREG|0640, st_size=0, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) =
0xb7ff3000
_llseek(5, 0, [0], SEEK_CUR) = 0
write(5, "1LAOLr-0006ME-8o-H\nexim 102 102\n"..., 403) = 403
fstat64(5, {st_mode=S_IFREG|0640, st_size=403, ...}) = 0
write(5, "250P Received: from [10.240.229."..., 1296) = 1296
fsync(5) = 0
fstat64(5, {st_mode=S_IFREG|0640, st_size=1699, ...}) = 0
close(5) = 0
munmap(0xb7ff3000, 4096) = 0
rename("/colas/exim4/input/r/hdr.24501",
"/colas/exim4/input/r/1LAOLr-0006ME-8o-H") = 0
open("/colas/exim4/input/r/.", O_RDONLY|O_LARGEFILE) = 5
fsync(5) = 0
close(5) = 0
close(4) = 0
munmap(0xb7ff4000, 4096) = 0
unlink("/colas/exim4/input/r/1LAOLr-0006ME-8o-J") = -1 ENOENT (No such file or
directory)
close(3) = 0
exit_group(0) = ?
Process 24501 detached
Thanks.
--
Configure bugmail:
http://bugs.exim.org/userprefs.cgi?tab=email