Hi,
I've been testing ldap with exim in order to have a central storage place
for auto reply texts. worked without any problems...
...unless you had a german "umlaut" in your text (and I guess any other
foreign chars as well). This would cause an unprintable character - no
crash, or similiar; but not nice ;)
I guess this is due openldap storing data in UTF-8.
I have found the code further below, which works fine (at least for the
problem above ;)) - Philip maybe this is worth implementing?
maybe an Expansion operator "utf_to_iso" ?
Regards,
Philipp
--
Philipp Gaschuetz <philipp@???>
/* Read UTF-8 characters from stdin, convert them to Latin-1
(ISO-8859-1), and write the converted characters to stdout.
UTF-8 is defined by RFC 2279.
*/
#include <errno.h>
#include <stdio.h>
static char UTF8len[64]
/* A map from the most-significant 6 bits of the first byte
to the total number of bytes in a UTF-8 character.
*/
= {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* erroneous */
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 6};
int
main (int argc, char** argv)
{
register int c;
while ((c = getchar()) != EOF) {
auto int len = UTF8len [(c >> 2) & 0x3F];
register unsigned long u;
switch (len) {
case 6: u = c & 0x01; break;
case 5: u = c & 0x03; break;
case 4: u = c & 0x07; break;
case 3: u = c & 0x0F; break;
case 2: u = c & 0x1F; break;
case 1: u = c & 0x7F; break;
case 0: /* erroneous: c is the middle of a character. */
u = c & 0x3F; len = 5; break;
}
while (--len && (c = getchar()) != EOF) {
if ((c & 0xC0) == 0x80) {
u = (u << 6) | (c & 0x3F);
} else { /* unexpected start of a new character */
ungetc (c, stdin);
break;
}
}
if (u <= 0xFF) {
putchar (u);
} else { /* this character can't be represented in Latin-1 */
putchar ('?'); /* a reasonable alternative is 0x1A (SUB) */
}
if (c == EOF) break;
}
if ( ! feof (stdin)) {
errno = ferror (stdin);
perror (argv[0]);
}
return 0;
}