[Pcre-svn] [1098] code/trunk: pcre32: utf: Reject all non-c…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [1098] code/trunk: pcre32: utf: Reject all non-characters and not just 0xfffe
Revision: 1098
          http://vcs.pcre.org/viewvc?view=rev&revision=1098
Author:   chpe
Date:     2012-10-16 16:56:18 +0100 (Tue, 16 Oct 2012)


Log Message:
-----------
pcre32: utf: Reject all non-characters and not just 0xfffe

Modified Paths:
--------------
    code/trunk/doc/pcre16.3
    code/trunk/doc/pcre32.3
    code/trunk/doc/pcreapi.3
    code/trunk/pcre.h.in
    code/trunk/pcre16_valid_utf16.c
    code/trunk/pcre32_valid_utf32.c
    code/trunk/pcre_valid_utf8.c
    code/trunk/testdata/testinput15
    code/trunk/testdata/testinput24
    code/trunk/testdata/testinput26
    code/trunk/testdata/testoutput15
    code/trunk/testdata/testoutput18-16
    code/trunk/testdata/testoutput18-32
    code/trunk/testdata/testoutput24
    code/trunk/testdata/testoutput26


Modified: code/trunk/doc/pcre16.3
===================================================================
--- code/trunk/doc/pcre16.3    2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/doc/pcre16.3    2012-10-16 15:56:18 UTC (rev 1098)
@@ -329,7 +329,7 @@
   PCRE_UTF16_ERR1  Missing low surrogate at end of string
   PCRE_UTF16_ERR2  Invalid low surrogate follows high surrogate
   PCRE_UTF16_ERR3  Isolated low surrogate
-  PCRE_UTF16_ERR4  Invalid character 0xfffe
+  PCRE_UTF16_ERR4  Non-character
 .
 .
 .SH "ERROR TEXTS"


Modified: code/trunk/doc/pcre32.3
===================================================================
--- code/trunk/doc/pcre32.3    2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/doc/pcre32.3    2012-10-16 15:56:18 UTC (rev 1098)
@@ -327,8 +327,8 @@
 page. The UTF-32 errors are:
 .sp
   PCRE_UTF32_ERR1  Surrogate character (range from 0xd800 to 0xdfff)
-  PCRE_UTF32_ERR2  Invalid character 0xfffe
-  PCRE_UTF32_ERR3  Invalid character > 0x10ffff
+  PCRE_UTF32_ERR2  Non-character
+  PCRE_UTF32_ERR3  Character > 0x10ffff
 .
 .
 .SH "ERROR TEXTS"


Modified: code/trunk/doc/pcreapi.3
===================================================================
--- code/trunk/doc/pcreapi.3    2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/doc/pcreapi.3    2012-10-16 15:56:18 UTC (rev 1098)
@@ -2328,6 +2328,11 @@
 .sp
 The first byte of a character has the value 0xfe or 0xff. These values can
 never occur in a valid UTF-8 string.
+.sp
+  PCRE_UTF8_ERR2
+.sp
+Non-character. These are the last two characters in each plane (0xfffe, 0xffff,
+0x1fffe, 0x1ffff .. 0x10fffe, 0x10ffff), and the characters 0xfdd0..0xfdef.
 .
 .
 .SH "EXTRACTING CAPTURED SUBSTRINGS BY NUMBER"


Modified: code/trunk/pcre.h.in
===================================================================
--- code/trunk/pcre.h.in    2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/pcre.h.in    2012-10-16 15:56:18 UTC (rev 1098)
@@ -209,6 +209,7 @@
 #define PCRE_UTF8_ERR19             19
 #define PCRE_UTF8_ERR20             20
 #define PCRE_UTF8_ERR21             21
+#define PCRE_UTF8_ERR22             22


/* Specific error codes for UTF-16 validity checks */


Modified: code/trunk/pcre16_valid_utf16.c
===================================================================
--- code/trunk/pcre16_valid_utf16.c    2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/pcre16_valid_utf16.c    2012-10-16 15:56:18 UTC (rev 1098)
@@ -69,7 +69,7 @@
 PCRE_UTF16_ERR1  Missing low surrogate at the end of the string
 PCRE_UTF16_ERR2  Invalid low surrogate
 PCRE_UTF16_ERR3  Isolated low surrogate
-PCRE_UTF16_ERR4  Not allowed character
+PCRE_UTF16_ERR4  Non-character


 Arguments:
   string       points to the string
@@ -85,7 +85,7 @@
 {
 #ifdef SUPPORT_UTF
 register PCRE_PUCHAR p;
-register pcre_uchar c;
+register pcre_uint32 c;


 if (length < 0)
   {
@@ -101,9 +101,8 @@
     {
     /* Normal UTF-16 code point. Neither high nor low surrogate. */


-    /* This is probably a BOM from a different byte-order.
-    Regardless, the string is rejected. */
-    if (c == 0xfffe)
+    /* Check for non-characters */
+    if ((c & 0xfffeu) == 0xfffeu || c >= 0xfdd0u && c <= 0xfdefu)
       {
       *erroroffset = p - string;
       return PCRE_UTF16_ERR4;
@@ -126,6 +125,16 @@
       *erroroffset = p - string;
       return PCRE_UTF16_ERR2;
       }
+    else
+      {
+      /* Valid surrogate, but check for non-characters */
+      c = (((c & 0x3ffu) << 10) | (*p & 0x3ffu)) + 0x10000u;
+      if ((c & 0xfffeu) == 0xfffeu)
+        {
+        *erroroffset = p - string;
+        return PCRE_UTF16_ERR4;
+        }
+      }
     }
   else
     {


Modified: code/trunk/pcre32_valid_utf32.c
===================================================================
--- code/trunk/pcre32_valid_utf32.c    2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/pcre32_valid_utf32.c    2012-10-16 15:56:18 UTC (rev 1098)
@@ -66,7 +66,7 @@


PCRE_UTF32_ERR0 No error
PCRE_UTF32_ERR1 Surrogate character
-PCRE_UTF32_ERR2 Disallowed character 0xfffe
+PCRE_UTF32_ERR2 Non-character
PCRE_UTF32_ERR3 Character > 0x10ffff

 Arguments:
@@ -99,8 +99,9 @@
     {
     /* Normal UTF-32 code point. Neither high nor low surrogate. */


-    /* This is probably a 16-bit BOM. Regardless, the string is rejected. */
-    if (c == 0xfffeu)
+    /* Check for non-characters */
+    if ((c & 0xfffeu) == 0xfffeu ||
+        c >= 0xfdd0u && c <= 0xfdefu)
       {
       *erroroffset = p - string;
       return PCRE_UTF32_ERR2;


Modified: code/trunk/pcre_valid_utf8.c
===================================================================
--- code/trunk/pcre_valid_utf8.c    2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/pcre_valid_utf8.c    2012-10-16 15:56:18 UTC (rev 1098)
@@ -92,6 +92,7 @@
 PCRE_UTF8_ERR19  Overlong 6-byte sequence (won't ever occur)
 PCRE_UTF8_ERR20  Isolated 0x80 byte (not within UTF-8 character)
 PCRE_UTF8_ERR21  Byte with the illegal value 0xfe or 0xff
+PCRE_UTF8_ERR22  Non-character


 Arguments:
   string       points to the string
@@ -116,7 +117,8 @@


for (p = string; length-- > 0; p++)
{
- register int ab, c, d;
+ register pcre_uchar ab, c, d;
+ pcre_uint32 v = 0;

   c = *p;
   if (c < 128) continue;                /* ASCII character */
@@ -185,6 +187,7 @@
       *erroroffset = (int)(p - string) - 2;
       return PCRE_UTF8_ERR14;
       }
+    v = ((c & 0x0f) << 12) | ((d & 0x3f) << 6) | (*p & 0x3f);
     break;


     /* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2
@@ -212,6 +215,7 @@
       *erroroffset = (int)(p - string) - 3;
       return PCRE_UTF8_ERR13;
       }
+    v = ((c & 0x07) << 18) | ((d & 0x3f) << 12) | ((p[-1] & 0x3f) << 6) | (*p & 0x3f);
     break;


     /* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be
@@ -286,6 +290,14 @@
     *erroroffset = (int)(p - string) - ab;
     return (ab == 4)? PCRE_UTF8_ERR11 : PCRE_UTF8_ERR12;
     }
+
+  /* Reject non-characters. The pointer p is currently at the last byte of the
+  character. */
+  if ((v & 0xfffeu) == 0xfffeu || (v >= 0xfdd0 && v <= 0xfdef))
+    {
+    *erroroffset = (int)(p - string) - ab;
+    return PCRE_UTF8_ERR22;
+    }
   }


#else /* Not SUPPORT_UTF */

Modified: code/trunk/testdata/testinput15
===================================================================
--- code/trunk/testdata/testinput15    2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/testdata/testinput15    2012-10-16 15:56:18 UTC (rev 1098)
@@ -94,6 +94,74 @@
     \?\xfc\x84\x80\x80\x80\x80
     \?\xfd\x83\x80\x80\x80\x80


+/noncharacter/8
+    \x{fffe}
+    \x{ffff}
+    \x{1fffe}
+    \x{1ffff}
+    \x{2fffe}
+    \x{2ffff}
+    \x{3fffe}
+    \x{3ffff}
+    \x{4fffe}
+    \x{4ffff}
+    \x{5fffe}
+    \x{5ffff}
+    \x{6fffe}
+    \x{6ffff}
+    \x{7fffe}
+    \x{7ffff}
+    \x{8fffe}
+    \x{8ffff}
+    \x{9fffe}
+    \x{9ffff}
+    \x{afffe}
+    \x{affff}
+    \x{bfffe}
+    \x{bffff}
+    \x{cfffe}
+    \x{cffff}
+    \x{dfffe}
+    \x{dffff}
+    \x{efffe}
+    \x{effff}
+    \x{ffffe}
+    \x{fffff}
+    \x{10fffe}
+    \x{10ffff}
+    \x{fdd0}
+    \x{fdd1}
+    \x{fdd2}
+    \x{fdd3}
+    \x{fdd4}
+    \x{fdd5}
+    \x{fdd6}
+    \x{fdd7}
+    \x{fdd8}
+    \x{fdd9}
+    \x{fdda}
+    \x{fddb}
+    \x{fddc}
+    \x{fddd}
+    \x{fdde}
+    \x{fddf}
+    \x{fde0}
+    \x{fde1}
+    \x{fde2}
+    \x{fde3}
+    \x{fde4}
+    \x{fde5}
+    \x{fde6}
+    \x{fde7}
+    \x{fde8}
+    \x{fde9}
+    \x{fdea}
+    \x{fdeb}
+    \x{fdec}
+    \x{fded}
+    \x{fdee}
+    \x{fdef}
+
 /\x{100}/8DZ


/\x{1000}/8DZ

Modified: code/trunk/testdata/testinput24
===================================================================
--- code/trunk/testdata/testinput24    2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/testdata/testinput24    2012-10-16 15:56:18 UTC (rev 1098)
@@ -1,3 +1,71 @@
 /-- Tests for the 16-bit library with UTF-16 support only */


+/noncharacter/8
+    \x{fffe}
+    \x{ffff}
+    \x{1fffe}
+    \x{1ffff}
+    \x{2fffe}
+    \x{2ffff}
+    \x{3fffe}
+    \x{3ffff}
+    \x{4fffe}
+    \x{4ffff}
+    \x{5fffe}
+    \x{5ffff}
+    \x{6fffe}
+    \x{6ffff}
+    \x{7fffe}
+    \x{7ffff}
+    \x{8fffe}
+    \x{8ffff}
+    \x{9fffe}
+    \x{9ffff}
+    \x{afffe}
+    \x{affff}
+    \x{bfffe}
+    \x{bffff}
+    \x{cfffe}
+    \x{cffff}
+    \x{dfffe}
+    \x{dffff}
+    \x{efffe}
+    \x{effff}
+    \x{ffffe}
+    \x{fffff}
+    \x{10fffe}
+    \x{10ffff}
+    \x{fdd0}
+    \x{fdd1}
+    \x{fdd2}
+    \x{fdd3}
+    \x{fdd4}
+    \x{fdd5}
+    \x{fdd6}
+    \x{fdd7}
+    \x{fdd8}
+    \x{fdd9}
+    \x{fdda}
+    \x{fddb}
+    \x{fddc}
+    \x{fddd}
+    \x{fdde}
+    \x{fddf}
+    \x{fde0}
+    \x{fde1}
+    \x{fde2}
+    \x{fde3}
+    \x{fde4}
+    \x{fde5}
+    \x{fde6}
+    \x{fde7}
+    \x{fde8}
+    \x{fde9}
+    \x{fdea}
+    \x{fdeb}
+    \x{fdec}
+    \x{fded}
+    \x{fdee}
+    \x{fdef}
+
 /-- End of testinput24 --/


Modified: code/trunk/testdata/testinput26
===================================================================
--- code/trunk/testdata/testinput26    2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/testdata/testinput26    2012-10-16 15:56:18 UTC (rev 1098)
@@ -7,4 +7,72 @@
 /\C/8
     \x{110000}


+/noncharacter/8
+    \x{fffe}
+    \x{ffff}
+    \x{1fffe}
+    \x{1ffff}
+    \x{2fffe}
+    \x{2ffff}
+    \x{3fffe}
+    \x{3ffff}
+    \x{4fffe}
+    \x{4ffff}
+    \x{5fffe}
+    \x{5ffff}
+    \x{6fffe}
+    \x{6ffff}
+    \x{7fffe}
+    \x{7ffff}
+    \x{8fffe}
+    \x{8ffff}
+    \x{9fffe}
+    \x{9ffff}
+    \x{afffe}
+    \x{affff}
+    \x{bfffe}
+    \x{bffff}
+    \x{cfffe}
+    \x{cffff}
+    \x{dfffe}
+    \x{dffff}
+    \x{efffe}
+    \x{effff}
+    \x{ffffe}
+    \x{fffff}
+    \x{10fffe}
+    \x{10ffff}
+    \x{fdd0}
+    \x{fdd1}
+    \x{fdd2}
+    \x{fdd3}
+    \x{fdd4}
+    \x{fdd5}
+    \x{fdd6}
+    \x{fdd7}
+    \x{fdd8}
+    \x{fdd9}
+    \x{fdda}
+    \x{fddb}
+    \x{fddc}
+    \x{fddd}
+    \x{fdde}
+    \x{fddf}
+    \x{fde0}
+    \x{fde1}
+    \x{fde2}
+    \x{fde3}
+    \x{fde4}
+    \x{fde5}
+    \x{fde6}
+    \x{fde7}
+    \x{fde8}
+    \x{fde9}
+    \x{fdea}
+    \x{fdeb}
+    \x{fdec}
+    \x{fded}
+    \x{fdee}
+    \x{fdef}
+
 /-- End of testinput26 --/


Modified: code/trunk/testdata/testoutput15
===================================================================
--- code/trunk/testdata/testoutput15    2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/testdata/testoutput15    2012-10-16 15:56:18 UTC (rev 1098)
@@ -170,6 +170,140 @@
     \?\xfd\x83\x80\x80\x80\x80
 No match


+/noncharacter/8
+    \x{fffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{ffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{1fffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{1ffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{2fffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{2ffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{3fffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{3ffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{4fffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{4ffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{5fffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{5ffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{6fffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{6ffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{7fffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{7ffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{8fffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{8ffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{9fffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{9ffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{afffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{affff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{bfffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{bffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{cfffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{cffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{dfffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{dffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{efffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{effff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{ffffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{10fffe}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{10ffff}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fdd0}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fdd1}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fdd2}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fdd3}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fdd4}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fdd5}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fdd6}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fdd7}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fdd8}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fdd9}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fdda}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fddb}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fddc}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fddd}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fdde}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fddf}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fde0}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fde1}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fde2}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fde3}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fde4}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fde5}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fde6}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fde7}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fde8}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fde9}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fdea}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fdeb}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fdec}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fded}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fdee}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+    \x{fdef}
+Error -10 (bad UTF-8 string) offset=0 reason=22
+
 /\x{100}/8DZ
 ------------------------------------------------------------------
         Bra
@@ -678,7 +812,7 @@


 /X/8
     \x{0}\x{d7ff}\x{e000}\x{10ffff}
-No match
+Error -10 (bad UTF-8 string) offset=7 reason=22
     \x{d800}
 Error -10 (bad UTF-8 string) offset=0 reason=14
     \x{d800}\?


Modified: code/trunk/testdata/testoutput18-16
===================================================================
--- code/trunk/testdata/testoutput18-16    2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/testdata/testoutput18-16    2012-10-16 15:56:18 UTC (rev 1098)
@@ -609,7 +609,7 @@


 /X/8
     \x{0}\x{d7ff}\x{e000}\x{10ffff}
-No match
+Error -10 (bad UTF-16 string) offset=4 reason=4
     \x{d800}
 Error -10 (bad UTF-16 string) offset=0 reason=1
     \x{d800}\?


Modified: code/trunk/testdata/testoutput18-32
===================================================================
--- code/trunk/testdata/testoutput18-32    2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/testdata/testoutput18-32    2012-10-16 15:56:18 UTC (rev 1098)
@@ -607,7 +607,7 @@


 /X/8
     \x{0}\x{d7ff}\x{e000}\x{10ffff}
-No match
+Error -10 (bad UTF-32 string) offset=3 reason=2
     \x{d800}
 Error -10 (bad UTF-32 string) offset=0 reason=1
     \x{d800}\?


Modified: code/trunk/testdata/testoutput24
===================================================================
--- code/trunk/testdata/testoutput24    2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/testdata/testoutput24    2012-10-16 15:56:18 UTC (rev 1098)
@@ -1,3 +1,137 @@
 /-- Tests for the 16-bit library with UTF-16 support only */


+/noncharacter/8
+    \x{fffe}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{ffff}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{1fffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{1ffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{2fffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{2ffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{3fffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{3ffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{4fffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{4ffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{5fffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{5ffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{6fffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{6ffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{7fffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{7ffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{8fffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{8ffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{9fffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{9ffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{afffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{affff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{bfffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{bffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{cfffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{cffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{dfffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{dffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{efffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{effff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{ffffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{fffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{10fffe}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{10ffff}
+Error -10 (bad UTF-16 string) offset=1 reason=4
+    \x{fdd0}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fdd1}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fdd2}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fdd3}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fdd4}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fdd5}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fdd6}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fdd7}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fdd8}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fdd9}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fdda}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fddb}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fddc}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fddd}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fdde}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fddf}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fde0}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fde1}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fde2}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fde3}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fde4}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fde5}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fde6}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fde7}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fde8}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fde9}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fdea}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fdeb}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fdec}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fded}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fdee}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+    \x{fdef}
+Error -10 (bad UTF-16 string) offset=0 reason=4
+
 /-- End of testinput24 --/


Modified: code/trunk/testdata/testoutput26
===================================================================
--- code/trunk/testdata/testoutput26    2012-10-16 15:56:13 UTC (rev 1097)
+++ code/trunk/testdata/testoutput26    2012-10-16 15:56:18 UTC (rev 1098)
@@ -9,4 +9,138 @@
     \x{110000}
 Error -10 (bad UTF-32 string) offset=0 reason=3


+/noncharacter/8
+    \x{fffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{ffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{1fffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{1ffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{2fffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{2ffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{3fffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{3ffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{4fffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{4ffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{5fffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{5ffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{6fffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{6ffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{7fffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{7ffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{8fffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{8ffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{9fffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{9ffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{afffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{affff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{bfffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{bffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{cfffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{cffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{dfffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{dffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{efffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{effff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{ffffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{10fffe}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{10ffff}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fdd0}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fdd1}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fdd2}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fdd3}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fdd4}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fdd5}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fdd6}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fdd7}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fdd8}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fdd9}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fdda}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fddb}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fddc}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fddd}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fdde}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fddf}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fde0}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fde1}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fde2}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fde3}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fde4}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fde5}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fde6}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fde7}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fde8}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fde9}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fdea}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fdeb}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fdec}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fded}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fdee}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+    \x{fdef}
+Error -10 (bad UTF-32 string) offset=0 reason=2
+
 /-- End of testinput26 --/