Hello,
the appended patch gives a considerable speedup on sorted queue output,
as used by exim -bpa. Unless I have a bug somewhere, here is an example
on one of my outgoing servers:
old code:
145.37user 23.65system 6:30.69elapsed 43%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (257major+51810minor)pagefaults 0swaps
new code:
9.94user 20.94system 4:54.13elapsed 10%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (256major+52234minor)pagefaults 0swaps
My patch replaces an insertion sort by a "reverse merge sort", at
least it looks reverse to me, because I know merge sort as:
chop a list into two lists
apply merge sort to each half
merge them together
This version works when inserting elements:
If there is no list, build a one element list from the new element
If there is a one element list, merge it with the new element to
give a new two element list
If there is a two element list, merge it with the new element to
give a new four element list
...
If the end of input is reached, merge all lists for the result
It does not require much additional code and only a little bit
additional storage. The output at least looks right to me, but
please check it on your system before comitting.
Michael
----------------------------------------------------------------------
--- src/queue.c.orig 2004-02-18 12:01:59.000000000 +0100
+++ src/queue.c 2004-02-18 12:33:11.000000000 +0100
@@ -10,6 +10,30 @@
#include "exim.h"
+#define LOG2_MAXNODES 64
+
+static queue_filename *merge(queue_filename *a, queue_filename *b)
+{
+queue_filename *first=NULL,**append=&first;
+
+while (a && b)
+ {
+ if (Ustrcmp(a->text,b->text)<0)
+ {
+ *append=a;
+ append=&a->next;
+ a=a->next;
+ }
+ else
+ {
+ *append=b;
+ append=&b->next;
+ b=b->next;
+ }
+ }
+*append=(a ? a : b);
+return first;
+}
/*************************************************
@@ -56,7 +80,7 @@
queue_get_spool_list(int subdiroffset, uschar *subdirs, int *subcount,
BOOL randomize)
{
-int i;
+int i,j;
int flags = 0;
int resetflags = -1;
int subptr;
@@ -65,12 +89,15 @@
struct dirent *ent;
DIR *dd;
uschar buffer[256];
+queue_filename *root[LOG2_MAXNODES];
/* The file names are added onto the start or end of the list according to the
bits of the flags variable. When randomizing, get a collection of bits from the
current time. Use the bottom 16 and just keep re-using them if necessary. */
if (randomize) resetflags = time(NULL) & 0xFFFF;
+else
+ for (i=0; i<LOG2_MAXNODES; ++i) root[i]=NULL;
/* If processing the full queue, or just the top-level, start at the base
directory, and initialize the first subdirectory name (as none). Otherwise,
@@ -139,19 +166,11 @@
Ustrcpy(next->text, name);
next->dir_uschar = subdirchar;
- /* First item becomes the top and bottom of the list. */
-
- if (yield == NULL)
- {
- next->next = NULL;
- yield = last = next;
- }
-
/* If randomizing, insert at either top or bottom, randomly. This is, I
argue, faster than doing a sort by allocating a random number to each item,
and it also saves having to store the number with each item. */
- else if (randomize)
+ if (randomize)
{
if (flags == 0) flags = resetflags;
if ((flags & 1) == 0)
@@ -168,30 +187,24 @@
flags = flags >> 1;
}
- /* Otherwise do an insertion sort based on the name. First see if
- it should go before the first item. */
-
- else if (Ustrcmp(next->text, yield->text) < 0)
- {
- next->next = yield;
- yield = next;
- }
-
- /* Otherwise find the item it should go after; check the last one
- first, because that will often be the case. */
+ /* Otherwise do an reverse merge sort based on the name. */
else
{
- queue_filename *this;
- if (Ustrcmp(next->text, last->text) < 0)
+ next->next=NULL;
+ for (j=0; j<LOG2_MAXNODES; ++j)
{
- for (this = yield; this != last; this = this->next)
- if (Ustrcmp(next->text, this->next->text) < 0) break;
+ if (root[j])
+ {
+ next=merge(next,root[j]);
+ root[j]=NULL;
+ }
+ else
+ {
+ root[j]=next;
+ break;
+ }
}
- else this = last;
- next->next = this->next;
- this->next = next;
- if (this == last) last = next;
}
}
}
@@ -228,7 +241,9 @@
}
/* Pass back the list of file items */
-
+if (!randomize)
+ for (j=0; j<LOG2_MAXNODES; ++j)
+ yield=merge(yield,root[j]);
return yield;
}