Revision: 1098
http://vcs.pcre.org/viewvc?view=rev&revision=1098
Author: chpe
Date: 2012-10-16 16:56:18 +0100 (Tue, 16 Oct 2012)
Log Message:
-----------
pcre32: utf: Reject all non-characters and not just 0xfffe
Modified Paths:
--------------
code/trunk/doc/pcre16.3
code/trunk/doc/pcre32.3
code/trunk/doc/pcreapi.3
code/trunk/pcre.h.in
code/trunk/pcre16_valid_utf16.c
code/trunk/pcre32_valid_utf32.c
code/trunk/pcre_valid_utf8.c
code/trunk/testdata/testinput15
code/trunk/testdata/testinput24
code/trunk/testdata/testinput26
code/trunk/testdata/testoutput15
code/trunk/testdata/testoutput18-16
code/trunk/testdata/testoutput18-32
code/trunk/testdata/testoutput24
code/trunk/testdata/testoutput26
Modified: code/trunk/doc/pcre16.3
===================================================================
--- code/trunk/doc/pcre16.3 2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/doc/pcre16.3 2012-10-16 15:56:18 UTC (rev 1098)
@@ -329,7 +329,7 @@
PCRE_UTF16_ERR1 Missing low surrogate at end of string
PCRE_UTF16_ERR2 Invalid low surrogate follows high surrogate
PCRE_UTF16_ERR3 Isolated low surrogate
- PCRE_UTF16_ERR4 Invalid character 0xfffe
+ PCRE_UTF16_ERR4 Non-character
.
.
.SH "ERROR TEXTS"
Modified: code/trunk/doc/pcre32.3
===================================================================
--- code/trunk/doc/pcre32.3 2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/doc/pcre32.3 2012-10-16 15:56:18 UTC (rev 1098)
@@ -327,8 +327,8 @@
page. The UTF-32 errors are:
.sp
PCRE_UTF32_ERR1 Surrogate character (range from 0xd800 to 0xdfff)
- PCRE_UTF32_ERR2 Invalid character 0xfffe
- PCRE_UTF32_ERR3 Invalid character > 0x10ffff
+ PCRE_UTF32_ERR2 Non-character
+ PCRE_UTF32_ERR3 Character > 0x10ffff
.
.
.SH "ERROR TEXTS"
Modified: code/trunk/doc/pcreapi.3
===================================================================
--- code/trunk/doc/pcreapi.3 2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/doc/pcreapi.3 2012-10-16 15:56:18 UTC (rev 1098)
@@ -2328,6 +2328,11 @@
.sp
The first byte of a character has the value 0xfe or 0xff. These values can
never occur in a valid UTF-8 string.
+.sp
+ PCRE_UTF8_ERR2
+.sp
+Non-character. These are the last two characters in each plane (0xfffe, 0xffff,
+0x1fffe, 0x1ffff .. 0x10fffe, 0x10ffff), and the characters 0xfdd0..0xfdef.
.
.
.SH "EXTRACTING CAPTURED SUBSTRINGS BY NUMBER"
Modified: code/trunk/pcre.h.in
===================================================================
--- code/trunk/pcre.h.in 2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/pcre.h.in 2012-10-16 15:56:18 UTC (rev 1098)
@@ -209,6 +209,7 @@
#define PCRE_UTF8_ERR19 19
#define PCRE_UTF8_ERR20 20
#define PCRE_UTF8_ERR21 21
+#define PCRE_UTF8_ERR22 22
/* Specific error codes for UTF-16 validity checks */
Modified: code/trunk/pcre16_valid_utf16.c
===================================================================
--- code/trunk/pcre16_valid_utf16.c 2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/pcre16_valid_utf16.c 2012-10-16 15:56:18 UTC (rev 1098)
@@ -69,7 +69,7 @@
PCRE_UTF16_ERR1 Missing low surrogate at the end of the string
PCRE_UTF16_ERR2 Invalid low surrogate
PCRE_UTF16_ERR3 Isolated low surrogate
-PCRE_UTF16_ERR4 Not allowed character
+PCRE_UTF16_ERR4 Non-character
Arguments:
string points to the string
@@ -85,7 +85,7 @@
{
#ifdef SUPPORT_UTF
register PCRE_PUCHAR p;
-register pcre_uchar c;
+register pcre_uint32 c;
if (length < 0)
{
@@ -101,9 +101,8 @@
{
/* Normal UTF-16 code point. Neither high nor low surrogate. */
- /* This is probably a BOM from a different byte-order.
- Regardless, the string is rejected. */
- if (c == 0xfffe)
+ /* Check for non-characters */
+ if ((c & 0xfffeu) == 0xfffeu || c >= 0xfdd0u && c <= 0xfdefu)
{
*erroroffset = p - string;
return PCRE_UTF16_ERR4;
@@ -126,6 +125,16 @@
*erroroffset = p - string;
return PCRE_UTF16_ERR2;
}
+ else
+ {
+ /* Valid surrogate, but check for non-characters */
+ c = (((c & 0x3ffu) << 10) | (*p & 0x3ffu)) + 0x10000u;
+ if ((c & 0xfffeu) == 0xfffeu)
+ {
+ *erroroffset = p - string;
+ return PCRE_UTF16_ERR4;
+ }
+ }
}
else
{
Modified: code/trunk/pcre32_valid_utf32.c
===================================================================
--- code/trunk/pcre32_valid_utf32.c 2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/pcre32_valid_utf32.c 2012-10-16 15:56:18 UTC (rev 1098)
@@ -66,7 +66,7 @@
PCRE_UTF32_ERR0 No error
PCRE_UTF32_ERR1 Surrogate character
-PCRE_UTF32_ERR2 Disallowed character 0xfffe
+PCRE_UTF32_ERR2 Non-character
PCRE_UTF32_ERR3 Character > 0x10ffff
Arguments:
@@ -99,8 +99,9 @@
{
/* Normal UTF-32 code point. Neither high nor low surrogate. */
- /* This is probably a 16-bit BOM. Regardless, the string is rejected. */
- if (c == 0xfffeu)
+ /* Check for non-characters */
+ if ((c & 0xfffeu) == 0xfffeu ||
+ c >= 0xfdd0u && c <= 0xfdefu)
{
*erroroffset = p - string;
return PCRE_UTF32_ERR2;
Modified: code/trunk/pcre_valid_utf8.c
===================================================================
--- code/trunk/pcre_valid_utf8.c 2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/pcre_valid_utf8.c 2012-10-16 15:56:18 UTC (rev 1098)
@@ -92,6 +92,7 @@
PCRE_UTF8_ERR19 Overlong 6-byte sequence (won't ever occur)
PCRE_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character)
PCRE_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
+PCRE_UTF8_ERR22 Non-character
Arguments:
string points to the string
@@ -116,7 +117,8 @@
for (p = string; length-- > 0; p++)
{
- register int ab, c, d;
+ register pcre_uchar ab, c, d;
+ pcre_uint32 v = 0;
c = *p;
if (c < 128) continue; /* ASCII character */
@@ -185,6 +187,7 @@
*erroroffset = (int)(p - string) - 2;
return PCRE_UTF8_ERR14;
}
+ v = ((c & 0x0f) << 12) | ((d & 0x3f) << 6) | (*p & 0x3f);
break;
/* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2
@@ -212,6 +215,7 @@
*erroroffset = (int)(p - string) - 3;
return PCRE_UTF8_ERR13;
}
+ v = ((c & 0x07) << 18) | ((d & 0x3f) << 12) | ((p[-1] & 0x3f) << 6) | (*p & 0x3f);
break;
/* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be
@@ -286,6 +290,14 @@
*erroroffset = (int)(p - string) - ab;
return (ab == 4)? PCRE_UTF8_ERR11 : PCRE_UTF8_ERR12;
}
+
+ /* Reject non-characters. The pointer p is currently at the last byte of the
+ character. */
+ if ((v & 0xfffeu) == 0xfffeu || (v >= 0xfdd0 && v <= 0xfdef))
+ {
+ *erroroffset = (int)(p - string) - ab;
+ return PCRE_UTF8_ERR22;
+ }
}
#else /* Not SUPPORT_UTF */
Modified: code/trunk/testdata/testinput15
===================================================================
--- code/trunk/testdata/testinput15 2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/testdata/testinput15 2012-10-16 15:56:18 UTC (rev 1098)
@@ -94,6 +94,74 @@
\?\xfc\x84\x80\x80\x80\x80
\?\xfd\x83\x80\x80\x80\x80
+/noncharacter/8
+ \x{fffe}
+ \x{ffff}
+ \x{1fffe}
+ \x{1ffff}
+ \x{2fffe}
+ \x{2ffff}
+ \x{3fffe}
+ \x{3ffff}
+ \x{4fffe}
+ \x{4ffff}
+ \x{5fffe}
+ \x{5ffff}
+ \x{6fffe}
+ \x{6ffff}
+ \x{7fffe}
+ \x{7ffff}
+ \x{8fffe}
+ \x{8ffff}
+ \x{9fffe}
+ \x{9ffff}
+ \x{afffe}
+ \x{affff}
+ \x{bfffe}
+ \x{bffff}
+ \x{cfffe}
+ \x{cffff}
+ \x{dfffe}
+ \x{dffff}
+ \x{efffe}
+ \x{effff}
+ \x{ffffe}
+ \x{fffff}
+ \x{10fffe}
+ \x{10ffff}
+ \x{fdd0}
+ \x{fdd1}
+ \x{fdd2}
+ \x{fdd3}
+ \x{fdd4}
+ \x{fdd5}
+ \x{fdd6}
+ \x{fdd7}
+ \x{fdd8}
+ \x{fdd9}
+ \x{fdda}
+ \x{fddb}
+ \x{fddc}
+ \x{fddd}
+ \x{fdde}
+ \x{fddf}
+ \x{fde0}
+ \x{fde1}
+ \x{fde2}
+ \x{fde3}
+ \x{fde4}
+ \x{fde5}
+ \x{fde6}
+ \x{fde7}
+ \x{fde8}
+ \x{fde9}
+ \x{fdea}
+ \x{fdeb}
+ \x{fdec}
+ \x{fded}
+ \x{fdee}
+ \x{fdef}
+
/\x{100}/8DZ
/\x{1000}/8DZ
Modified: code/trunk/testdata/testinput24
===================================================================
--- code/trunk/testdata/testinput24 2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/testdata/testinput24 2012-10-16 15:56:18 UTC (rev 1098)
@@ -1,3 +1,71 @@
/-- Tests for the 16-bit library with UTF-16 support only */
+/noncharacter/8
+ \x{fffe}
+ \x{ffff}
+ \x{1fffe}
+ \x{1ffff}
+ \x{2fffe}
+ \x{2ffff}
+ \x{3fffe}
+ \x{3ffff}
+ \x{4fffe}
+ \x{4ffff}
+ \x{5fffe}
+ \x{5ffff}
+ \x{6fffe}
+ \x{6ffff}
+ \x{7fffe}
+ \x{7ffff}
+ \x{8fffe}
+ \x{8ffff}
+ \x{9fffe}
+ \x{9ffff}
+ \x{afffe}
+ \x{affff}
+ \x{bfffe}
+ \x{bffff}
+ \x{cfffe}
+ \x{cffff}
+ \x{dfffe}
+ \x{dffff}
+ \x{efffe}
+ \x{effff}
+ \x{ffffe}
+ \x{fffff}
+ \x{10fffe}
+ \x{10ffff}
+ \x{fdd0}
+ \x{fdd1}
+ \x{fdd2}
+ \x{fdd3}
+ \x{fdd4}
+ \x{fdd5}
+ \x{fdd6}
+ \x{fdd7}
+ \x{fdd8}
+ \x{fdd9}
+ \x{fdda}
+ \x{fddb}
+ \x{fddc}
+ \x{fddd}
+ \x{fdde}
+ \x{fddf}
+ \x{fde0}
+ \x{fde1}
+ \x{fde2}
+ \x{fde3}
+ \x{fde4}
+ \x{fde5}
+ \x{fde6}
+ \x{fde7}
+ \x{fde8}
+ \x{fde9}
+ \x{fdea}
+ \x{fdeb}
+ \x{fdec}
+ \x{fded}
+ \x{fdee}
+ \x{fdef}
+
/-- End of testinput24 --/
Modified: code/trunk/testdata/testinput26
===================================================================
--- code/trunk/testdata/testinput26 2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/testdata/testinput26 2012-10-16 15:56:18 UTC (rev 1098)
@@ -7,4 +7,72 @@
/\C/8
\x{110000}
+/noncharacter/8
+ \x{fffe}
+ \x{ffff}
+ \x{1fffe}
+ \x{1ffff}
+ \x{2fffe}
+ \x{2ffff}
+ \x{3fffe}
+ \x{3ffff}
+ \x{4fffe}
+ \x{4ffff}
+ \x{5fffe}
+ \x{5ffff}
+ \x{6fffe}
+ \x{6ffff}
+ \x{7fffe}
+ \x{7ffff}
+ \x{8fffe}
+ \x{8ffff}
+ \x{9fffe}
+ \x{9ffff}
+ \x{afffe}
+ \x{affff}
+ \x{bfffe}
+ \x{bffff}
+ \x{cfffe}
+ \x{cffff}
+ \x{dfffe}
+ \x{dffff}
+ \x{efffe}
+ \x{effff}
+ \x{ffffe}
+ \x{fffff}
+ \x{10fffe}
+ \x{10ffff}
+ \x{fdd0}
+ \x{fdd1}
+ \x{fdd2}
+ \x{fdd3}
+ \x{fdd4}
+ \x{fdd5}
+ \x{fdd6}
+ \x{fdd7}
+ \x{fdd8}
+ \x{fdd9}
+ \x{fdda}
+ \x{fddb}
+ \x{fddc}
+ \x{fddd}
+ \x{fdde}
+ \x{fddf}
+ \x{fde0}
+ \x{fde1}
+ \x{fde2}
+ \x{fde3}
+ \x{fde4}
+ \x{fde5}
+ \x{fde6}
+ \x{fde7}
+ \x{fde8}
+ \x{fde9}
+ \x{fdea}
+ \x{fdeb}
+ \x{fdec}
+ \x{fded}
+ \x{fdee}
+ \x{fdef}
+
/-- End of testinput26 --/
Modified: code/trunk/testdata/testoutput15
===================================================================
--- code/trunk/testdata/testoutput15 2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/testdata/testoutput15 2012-10-16 15:56:18 UTC (rev 1098)
@@ -170,6 +170,140 @@
\?\xfd\x83\x80\x80\x80\x80
No match
+/noncharacter/8
+ \x{fffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{ffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{1fffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{1ffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{2fffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{2ffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{3fffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{3ffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{4fffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{4ffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{5fffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{5ffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{6fffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{6ffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{7fffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{7ffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{8fffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{8ffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{9fffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{9ffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{afffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{affff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{bfffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{bffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{cfffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{cffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{dfffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{dffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{efffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{effff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{ffffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{10fffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{10ffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fdd0}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fdd1}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fdd2}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fdd3}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fdd4}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fdd5}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fdd6}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fdd7}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fdd8}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fdd9}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fdda}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fddb}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fddc}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fddd}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fdde}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fddf}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fde0}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fde1}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fde2}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fde3}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fde4}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fde5}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fde6}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fde7}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fde8}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fde9}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fdea}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fdeb}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fdec}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fded}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fdee}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+ \x{fdef}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+
/\x{100}/8DZ
------------------------------------------------------------------
Bra
@@ -678,7 +812,7 @@
/X/8
\x{0}\x{d7ff}\x{e000}\x{10ffff}
-No match
+Error -10 (bad UTF-8 string) offset=7 reason=22
\x{d800}
Error -10 (bad UTF-8 string) offset=0 reason=14
\x{d800}\?
Modified: code/trunk/testdata/testoutput18-16
===================================================================
--- code/trunk/testdata/testoutput18-16 2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/testdata/testoutput18-16 2012-10-16 15:56:18 UTC (rev 1098)
@@ -609,7 +609,7 @@
/X/8
\x{0}\x{d7ff}\x{e000}\x{10ffff}
-No match
+Error -10 (bad UTF-16 string) offset=4 reason=4
\x{d800}
Error -10 (bad UTF-16 string) offset=0 reason=1
\x{d800}\?
Modified: code/trunk/testdata/testoutput18-32
===================================================================
--- code/trunk/testdata/testoutput18-32 2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/testdata/testoutput18-32 2012-10-16 15:56:18 UTC (rev 1098)
@@ -607,7 +607,7 @@
/X/8
\x{0}\x{d7ff}\x{e000}\x{10ffff}
-No match
+Error -10 (bad UTF-32 string) offset=3 reason=2
\x{d800}
Error -10 (bad UTF-32 string) offset=0 reason=1
\x{d800}\?
Modified: code/trunk/testdata/testoutput24
===================================================================
--- code/trunk/testdata/testoutput24 2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/testdata/testoutput24 2012-10-16 15:56:18 UTC (rev 1098)
@@ -1,3 +1,137 @@
/-- Tests for the 16-bit library with UTF-16 support only */
+/noncharacter/8
+ \x{fffe}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{ffff}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{1fffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{1ffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{2fffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{2ffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{3fffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{3ffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{4fffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{4ffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{5fffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{5ffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{6fffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{6ffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{7fffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{7ffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{8fffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{8ffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{9fffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{9ffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{afffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{affff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{bfffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{bffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{cfffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{cffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{dfffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{dffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{efffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{effff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{ffffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{fffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{10fffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{10ffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+ \x{fdd0}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fdd1}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fdd2}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fdd3}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fdd4}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fdd5}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fdd6}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fdd7}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fdd8}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fdd9}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fdda}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fddb}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fddc}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fddd}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fdde}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fddf}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fde0}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fde1}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fde2}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fde3}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fde4}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fde5}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fde6}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fde7}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fde8}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fde9}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fdea}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fdeb}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fdec}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fded}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fdee}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+ \x{fdef}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+
/-- End of testinput24 --/
Modified: code/trunk/testdata/testoutput26
===================================================================
--- code/trunk/testdata/testoutput26 2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/testdata/testoutput26 2012-10-16 15:56:18 UTC (rev 1098)
@@ -9,4 +9,138 @@
\x{110000}
Error -10 (bad UTF-32 string) offset=0 reason=3
+/noncharacter/8
+ \x{fffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{ffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{1fffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{1ffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{2fffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{2ffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{3fffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{3ffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{4fffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{4ffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{5fffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{5ffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{6fffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{6ffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{7fffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{7ffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{8fffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{8ffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{9fffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{9ffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{afffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{affff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{bfffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{bffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{cfffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{cffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{dfffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{dffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{efffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{effff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{ffffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{10fffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{10ffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fdd0}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fdd1}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fdd2}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fdd3}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fdd4}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fdd5}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fdd6}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fdd7}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fdd8}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fdd9}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fdda}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fddb}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fddc}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fddd}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fdde}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fddf}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fde0}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fde1}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fde2}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fde3}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fde4}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fde5}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fde6}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fde7}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fde8}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fde9}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fdea}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fdeb}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fdec}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fded}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fdee}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+ \x{fdef}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+
/-- End of testinput26 --/