[Pcre-svn] [823] code/branches/pcre16: Tidy pcretest source …

Startseite
Nachricht löschen
Autor: Subversion repository
Datum:  
To: pcre-svn
Betreff: [Pcre-svn] [823] code/branches/pcre16: Tidy pcretest source code and some 8/ 16 messages.
Revision: 823
          http://vcs.pcre.org/viewvc?view=rev&revision=823
Author:   ph10
Date:     2011-12-24 17:43:22 +0000 (Sat, 24 Dec 2011)


Log Message:
-----------
Tidy pcretest source code and some 8/16 messages. Add "16" error codes.

Modified Paths:
--------------
    code/branches/pcre16/pcre.h.in
    code/branches/pcre16/pcre_exec.c
    code/branches/pcre16/pcretest.c
    code/branches/pcre16/testdata/testinput17
    code/branches/pcre16/testdata/testinput18
    code/branches/pcre16/testdata/testoutput17
    code/branches/pcre16/testdata/testoutput18


Modified: code/branches/pcre16/pcre.h.in
===================================================================
--- code/branches/pcre16/pcre.h.in    2011-12-23 20:37:29 UTC (rev 822)
+++ code/branches/pcre16/pcre.h.in    2011-12-24 17:43:22 UTC (rev 823)
@@ -146,36 +146,39 @@


/* Exec-time and get/set-time error codes */

-#define PCRE_ERROR_NOMATCH         (-1)
-#define PCRE_ERROR_NULL            (-2)
-#define PCRE_ERROR_BADOPTION       (-3)
-#define PCRE_ERROR_BADMAGIC        (-4)
-#define PCRE_ERROR_UNKNOWN_OPCODE  (-5)
-#define PCRE_ERROR_UNKNOWN_NODE    (-5)  /* For backward compatibility */
-#define PCRE_ERROR_NOMEMORY        (-6)
-#define PCRE_ERROR_NOSUBSTRING     (-7)
-#define PCRE_ERROR_MATCHLIMIT      (-8)
-#define PCRE_ERROR_CALLOUT         (-9)  /* Never used by PCRE itself */
-#define PCRE_ERROR_BADUTF8        (-10)
-#define PCRE_ERROR_BADUTF8_OFFSET (-11)
-#define PCRE_ERROR_PARTIAL        (-12)
-#define PCRE_ERROR_BADPARTIAL     (-13)
-#define PCRE_ERROR_INTERNAL       (-14)
-#define PCRE_ERROR_BADCOUNT       (-15)
-#define PCRE_ERROR_DFA_UITEM      (-16)
-#define PCRE_ERROR_DFA_UCOND      (-17)
-#define PCRE_ERROR_DFA_UMLIMIT    (-18)
-#define PCRE_ERROR_DFA_WSSIZE     (-19)
-#define PCRE_ERROR_DFA_RECURSE    (-20)
-#define PCRE_ERROR_RECURSIONLIMIT (-21)
-#define PCRE_ERROR_NULLWSLIMIT    (-22)  /* No longer actually used */
-#define PCRE_ERROR_BADNEWLINE     (-23)
-#define PCRE_ERROR_BADOFFSET      (-24)
-#define PCRE_ERROR_SHORTUTF8      (-25)
-#define PCRE_ERROR_RECURSELOOP    (-26)
-#define PCRE_ERROR_JIT_STACKLIMIT (-27)
-#define PCRE_ERROR_BADMODE        (-28)
-#define PCRE_ERROR_BADENDIANNESS  (-29)
+#define PCRE_ERROR_NOMATCH          (-1)
+#define PCRE_ERROR_NULL             (-2)
+#define PCRE_ERROR_BADOPTION        (-3)
+#define PCRE_ERROR_BADMAGIC         (-4)
+#define PCRE_ERROR_UNKNOWN_OPCODE   (-5)
+#define PCRE_ERROR_UNKNOWN_NODE     (-5)  /* For backward compatibility */
+#define PCRE_ERROR_NOMEMORY         (-6)
+#define PCRE_ERROR_NOSUBSTRING      (-7)
+#define PCRE_ERROR_MATCHLIMIT       (-8)
+#define PCRE_ERROR_CALLOUT          (-9)  /* Never used by PCRE itself */
+#define PCRE_ERROR_BADUTF8         (-10)  /* Same for 8/16 */
+#define PCRE_ERROR_BADUTF16        (-10)  /* Same for 8/16 */
+#define PCRE_ERROR_BADUTF8_OFFSET  (-11)  /* Same for 8/16 */
+#define PCRE_ERROR_BADUTF16_OFFSET (-11)  /* Same for 8/16 */
+#define PCRE_ERROR_PARTIAL         (-12)
+#define PCRE_ERROR_BADPARTIAL      (-13)
+#define PCRE_ERROR_INTERNAL        (-14)
+#define PCRE_ERROR_BADCOUNT        (-15)
+#define PCRE_ERROR_DFA_UITEM       (-16)
+#define PCRE_ERROR_DFA_UCOND       (-17)
+#define PCRE_ERROR_DFA_UMLIMIT     (-18)
+#define PCRE_ERROR_DFA_WSSIZE      (-19)
+#define PCRE_ERROR_DFA_RECURSE     (-20)
+#define PCRE_ERROR_RECURSIONLIMIT  (-21)
+#define PCRE_ERROR_NULLWSLIMIT     (-22)  /* No longer actually used */
+#define PCRE_ERROR_BADNEWLINE      (-23)
+#define PCRE_ERROR_BADOFFSET       (-24)
+#define PCRE_ERROR_SHORTUTF8       (-25)
+#define PCRE_ERROR_SHORTUTF16      (-25)  /* Same for 8/16 */
+#define PCRE_ERROR_RECURSELOOP     (-26)
+#define PCRE_ERROR_JIT_STACKLIMIT  (-27)
+#define PCRE_ERROR_BADMODE         (-28)
+#define PCRE_ERROR_BADENDIANNESS   (-29)


/* Specific error codes for UTF-8 validity checks */


Modified: code/branches/pcre16/pcre_exec.c
===================================================================
--- code/branches/pcre16/pcre_exec.c    2011-12-23 20:37:29 UTC (rev 822)
+++ code/branches/pcre16/pcre_exec.c    2011-12-24 17:43:22 UTC (rev 823)
@@ -6078,8 +6078,13 @@
       offsets[0] = erroroffset;
       offsets[1] = errorcode;
       }
+#ifdef COMPILE_PCRE16
+    return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
+      PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
+#else
     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
+#endif       
     }


/* Check that a start_offset points to the start of a UTF character. */

Modified: code/branches/pcre16/pcretest.c
===================================================================
--- code/branches/pcre16/pcretest.c    2011-12-23 20:37:29 UTC (rev 822)
+++ code/branches/pcre16/pcretest.c    2011-12-24 17:43:22 UTC (rev 823)
@@ -36,7 +36,17 @@
 -----------------------------------------------------------------------------
 */


+/* This program now supports the testing of both the 8-bit and 16-bit PCRE
+libraries in a single program. This is different from the modules such as
+pcre_compile.c in the library itself, which are compiled separately for each
+mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
+(the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
+compiled only once. Therefore, it must not make use of any of the macros from
+pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
+however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
+only supported library functions. */

+
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
@@ -161,13 +171,13 @@
#endif

/* It is also possible, originally for the benefit of a version that was
-imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
-without the interface to the DFA matcher (NODFA). In fact, we automatically cut
-out the UTF8 support if PCRE is built without it. */
+imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
+NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
+automatically cut out the UTF support if PCRE is built without it. */

-#ifndef SUPPORT_UTF8
-#ifndef NOUTF8
-#define NOUTF8
+#ifndef SUPPORT_UTF
+#ifndef NOUTF
+#define NOUTF
#endif
#endif

@@ -177,8 +187,14 @@
using a single macro to do this in a generic way, because of the many different
argument requirements. We know that at least one of SUPPORT_PCRE8 and
SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
-use these in the definitions of generic macros. */
+use these in the definitions of generic macros.

+**** Special note about the PCHARSxxx macros: the address of the string to be
+printed is always given as two arguments: a base address followed by an offset.
+The base address is cast to the correct data size for 8 or 16 bit data; the
+offset is in units of this size. If the string were given as base+offset in one
+argument, the casting might be incorrectly applied. */
+
#ifdef SUPPORT_PCRE8

#define PCHARS8(lv, p, offset, len, f) \
@@ -605,7 +621,6 @@
#endif

static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
-
#endif /* SUPPORT_PCRE16 */

/* If we have 8-bit support, default use_pcre16 to false; if there is also
@@ -631,8 +646,8 @@
NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
"match limit exceeded",
"callout error code",
- NULL, /* BADUTF8 is handled specially */
- "bad UTF-8 offset",
+ NULL, /* BADUTF8/16 is handled specially */
+ NULL, /* BADUTF8/16 offset is handled specially */
NULL, /* PARTIAL is handled specially */
"not used - internal error",
"internal error - pattern overwritten?",
@@ -646,7 +661,7 @@
"not used - internal error",
"invalid combination of newline options",
"bad offset value",
- NULL, /* SHORTUTF8 is handled specially */
+ NULL, /* SHORTUTF8/16 is handled specially */
"nested recursion at the same subject position",
"JIT stack limit reached",
"pattern compiled in wrong mode (8-bit/16-bit error)"
@@ -1011,6 +1026,7 @@
}


+#if !defined NOUTF
 /*************************************************
 *            Convert UTF-8 string to value       *
 *************************************************/
@@ -1026,8 +1042,6 @@
               -6 to 0 => malformed UTF-8 character at offset = (-return)
 */


-#if !defined NOUTF8
-
static int
utf82ord(pcre_uint8 *utf8bytes, int *vptr)
{
@@ -1068,11 +1082,11 @@
*vptr = d;
return i+1;
}
+#endif /* NOUTF */

-#endif


-
+#if !defined NOUTF
 /*************************************************
 *       Convert character value to UTF-8         *
 *************************************************/
@@ -1087,8 +1101,6 @@
 Returns:     number of characters placed in the buffer
 */


-#if !defined NOUTF8
-
static int
ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
{
@@ -1104,7 +1116,6 @@
*utf8bytes = utf8_table2[i] | cvalue;
return i + 1;
}
-
#endif


@@ -1120,6 +1131,10 @@
in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
result is always left in buffer16.

+Note that this function does not object to surrogate values. This is 
+deliberate; it makes it possible to construct UTF-16 strings that are invalid, 
+for the purpose of testing that they are correctly faulted.
+
 Arguments:
   p          points to a byte string
   utf        true if UTF-8 (to be converted to UTF-16)
@@ -1127,6 +1142,7 @@


 Returns:     number of 16-bit data items used (excluding trailing zero)
              OR -1 if a UTF-8 string is malformed
+             OR -2 if a value > 0x10ffff is encountered 
 */


 static int
@@ -1160,6 +1176,7 @@
     {
     int chlen = utf82ord(p, &c);
     if (chlen <= 0) return -1;
+    if (c > 0x10ffff) return -2; 
     p += chlen;
     len -= chlen;
     if (c < 0x10000) *pp++ = c; else
@@ -1365,7 +1382,7 @@


 while (length-- > 0)
   {
-#if !defined NOUTF8
+#if !defined NOUTF
   if (use_utf)
     {
     int rc = utf82ord(p, &c);
@@ -1399,9 +1416,10 @@
 while (*p++ != 0) len++;
 return len;
 }
+#endif  /* SUPPORT_PCRE16 */



-
+#ifdef SUPPORT_PCRE16
 /*************************************************
 *           Print 16-bit character string        *
 *************************************************/
@@ -1419,7 +1437,7 @@
 while (length-- > 0)
   {
   int c = *p++ & 0xffff;
-#if !defined NOUTF8
+#if !defined NOUTF
   if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
     {
     int d = *p & 0xffff;
@@ -1436,7 +1454,7 @@


return yield;
}
-#endif
+#endif /* SUPPORT_PCRE16 */



@@ -1462,7 +1480,7 @@
*pp = npp;
return p;
}
-#endif
+#endif /* SUPPORT_PCRE8 */



@@ -1489,7 +1507,7 @@
*pp = npp;
return p;
}
-#endif
+#endif /* SUPPORT_PCRE16 */



@@ -1680,8 +1698,8 @@
 *             Swap byte functions                *
 *************************************************/


-/* The following functions swap the bytes of a pcre_uint16
-and pcre_uint32 value.
+/* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
+value, respectively.

 Arguments:
   value        any number
@@ -1721,9 +1739,8 @@
 regexflip(pcre *ere, pcre_extra *extra)
 {
 real_pcre *re = (real_pcre *)ere;
+#ifdef SUPPORT_PCRE16
 int op;
-
-#ifdef SUPPORT_PCRE16
 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
 int length = re->name_count * re->name_entry_size;
 #ifdef SUPPORT_UTF
@@ -2128,7 +2145,7 @@
 #endif


/* Get the version number: both pcre_version() and pcre16_version() give the
-same answer. We just need to ensure that we call one that is availab.e */
+same answer. We just need to ensure that we call one that is available. */

 #ifdef SUPPORT_PCRE8
 version = pcre_version();
@@ -2706,11 +2723,20 @@
 #ifdef SUPPORT_PCRE16
     if (use_pcre16)
       {
-      if (to16(p, options & PCRE_UTF8, (int)strlen((char *)p)) < 0)
+      switch(to16(p, options & PCRE_UTF8, (int)strlen((char *)p)))
         {
+        case -1: 
         fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
           "converted to UTF-16\n");
         goto SKIP_DATA;
+         
+        case -2:
+        fprintf(outfile, "**Failed: character value greater than 0x10ffff "
+          "cannot be converted to UTF-16\n");
+        goto SKIP_DATA;
+         
+        default:
+        break;    
         }
       p = (pcre_uint8 *)buffer16;
       }
@@ -3231,7 +3257,7 @@
         while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
           c = c * 8 + *p++ - '0';


-#if !defined NOUTF8
+#if !defined NOUTF
         if (use_utf && c > 255)
           {
           pcre_uint8 buff8[8];
@@ -3247,7 +3273,7 @@


         /* Handle \x{..} specially - new Perl thing for utf8 */


-#if !defined NOUTF8
+#if !defined NOUTF
         if (*p == '{')
           {
           pcre_uint8 *pt = p;
@@ -3593,11 +3619,20 @@
     if (use_pcre16)
       {
       len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
-      if (len < 0)
+      switch(len)
         {
+        case -1: 
         fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
           "converted to UTF-16\n");
         goto NEXT_DATA;
+         
+        case -2:
+        fprintf(outfile, "**Failed: character value greater than 0x10ffff "
+          "cannot be converted to UTF-16\n");
+        goto NEXT_DATA;
+         
+        default:
+        break;    
         }
       bptr = (pcre_uint8 *)buffer16;
       }
@@ -4021,13 +4056,19 @@


             case PCRE_ERROR_BADUTF8:
             case PCRE_ERROR_SHORTUTF8:
-            fprintf(outfile, "Error %d (%s UTF-8 string)", count,
-              (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
+            fprintf(outfile, "Error %d (%s UTF-%s string)", count,
+              (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
+              use_pcre16? "16" : "8");
             if (use_size_offsets >= 2)
               fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
                 use_offsets[1]);
             fprintf(outfile, "\n");
             break;
+            
+            case PCRE_ERROR_BADUTF8_OFFSET:
+            fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
+              use_pcre16? "16" : "8");
+            break;   


             default:
             if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))


Modified: code/branches/pcre16/testdata/testinput17
===================================================================
--- code/branches/pcre16/testdata/testinput17    2011-12-23 20:37:29 UTC (rev 822)
+++ code/branches/pcre16/testdata/testinput17    2011-12-24 17:43:22 UTC (rev 823)
@@ -1,5 +1,6 @@
 /-- This set of tests is for the 16-bit library's basic (non-UTF-16) features 
-    that are not compatible with the 8-bit library. --/
+    that are not compatible with the 8-bit library, or which give different 
+    output in 16-bit mode. --/


 /a\Cb/
     aXb


Modified: code/branches/pcre16/testdata/testinput18
===================================================================
--- code/branches/pcre16/testdata/testinput18    2011-12-23 20:37:29 UTC (rev 822)
+++ code/branches/pcre16/testdata/testinput18    2011-12-24 17:43:22 UTC (rev 823)
@@ -11,46 +11,46 @@


 /X(\C{4})/8
     X\x{11234}YZ
-    
+
 /X\C*/8
     XYZabcdce
-    
+
 /X\C*?/8
     XYZabcde
-    
+
 /X\C{3,5}/8
-    Xabcdefg   
-    X\x{11234}Y 
+    Xabcdefg
+    X\x{11234}Y
     X\x{11234}YZ
-    X\x{11234}\x{512}  
+    X\x{11234}\x{512}
     X\x{11234}\x{512}YZ
     X\x{11234}\x{512}\x{11234}Z


 /X\C{3,5}?/8
-    Xabcdefg   
-    X\x{11234}Y 
+    Xabcdefg
+    X\x{11234}Y
     X\x{11234}YZ
-    X\x{11234}\x{512}YZ  
+    X\x{11234}\x{512}YZ
     *** Failers
     X\x{11234}


 /a\Cb/8
     aXb
     a\nb
-    
-/a\C\Cb/8 
+
+/a\C\Cb/8
     a\x{12257}b
-    ** Failers 
+    ** Failers
     a\x{100}b


 /ab\Cde/8
     abXde
-    
+
 /-- Check maximum character size --/


/\x{ffff}/8DZ

-/\x{10000}/8DZ
+/\x{10000}/8DZ

/\x{100}/8DZ

@@ -70,8 +70,8 @@

/\xff/8DZ

-/\x{D55c}\x{ad6d}\x{C5B4}/DZ8 
-    \x{D55c}\x{ad6d}\x{C5B4} 
+/\x{D55c}\x{ad6d}\x{C5B4}/DZ8
+    \x{D55c}\x{ad6d}\x{C5B4}


 /\x{65e5}\x{672c}\x{8a9e}/DZ8
     \x{65e5}\x{672c}\x{8a9e}
@@ -89,26 +89,26 @@
 /-- This one is here not because it's different to Perl, but because the way
 the captured single-byte is displayed. (In Perl it becomes a character, and you
 can't tell the difference.) --/
-    
+
 /X(\C)(.*)/8
     X\x{1234}
-    X\nabc 
+    X\nabc


-/-- This one is here because Perl gives out a grumbly error message (quite 
+/-- This one is here because Perl gives out a grumbly error message (quite
 correctly, but that messes up comparisons). --/
-    
+
 /a\Cb/8
-    *** Failers 
-    a\x{100}b 
-    
+    *** Failers
+    a\x{100}b
+
 /[^ab\xC0-\xF0]/8SDZ
     \x{f1}
     \x{bf}
     \x{100}
-    \x{1000}   
+    \x{1000}
     *** Failers
-    \x{c0} 
-    \x{f0} 
+    \x{c0}
+    \x{f0}


 /Ā{3,4}/8SDZ
   \x{100}\x{100}\x{100}\x{100\x{100}
@@ -133,7 +133,7 @@
     \x{100}
     Z\x{100}
     \x{100}Z
-    *** Failers 
+    *** Failers


 /[\xff]/DZ8
     >\x{ff}<
@@ -144,8 +144,8 @@


/\777/8I
\x{1ff}
- \777
-
+ \777
+
/\x{100}+\x{200}/8DZ

 /\x{100}+X/8DZ
@@ -160,12 +160,9 @@
     \x{da00}\?
     \x{dfff}
     \x{dfff}\?
-    \x{110000}    
-    \x{110000}\?    
-    \x{2000000} 
-    \x{2000000}\? 
-    \x{7fffffff} 
-    \x{7fffffff}\? 
+    \x{110000}
+    \x{d800}\x{1234}
+    \x{fffe}


 /(*UTF16)\x{11234}/
   abcd\x{11234}pqr
@@ -179,9 +176,9 @@
     ABC\x{1680}
     ABC\x{180e}
     ABC\x{2000}
-    ABC\x{202f} 
-    ABC\x{205f} 
-    ABC\x{3000} 
+    ABC\x{202f}
+    ABC\x{205f}
+    ABC\x{3000}


 /\v/SI8
     ABC\x{0a}
@@ -193,7 +190,7 @@


 /\h*A/SI8
     CDBABC
-    
+
 /\v+A/SI8


 /\s?xxx\s/8SI
@@ -203,8 +200,8 @@
     AB\x{a0}xxx\x{85}XYZ


 /\S \S/I8ST1
-    \x{a2} \x{84} 
-    A Z 
+    \x{a2} \x{84}
+    A Z


 /a+/8
     a\x{123}aa\>1
@@ -228,4 +225,13 @@


/\R/SI8

+/-- Check bad offset --/
+
+/a/8
+    \x{10000}\>1
+    \x{10000}ab\>2
+    \x{10000}ab\>3
+    \x{10000}ab\>4
+    \x{10000}ab\>5
+
 /-- End of testinput18 --/


Modified: code/branches/pcre16/testdata/testoutput17
===================================================================
--- code/branches/pcre16/testdata/testoutput17    2011-12-23 20:37:29 UTC (rev 822)
+++ code/branches/pcre16/testdata/testoutput17    2011-12-24 17:43:22 UTC (rev 823)
@@ -1,5 +1,6 @@
 /-- This set of tests is for the 16-bit library's basic (non-UTF-16) features 
-    that are not compatible with the 8-bit library. --/
+    that are not compatible with the 8-bit library, or which give different 
+    output in 16-bit mode. --/


 /a\Cb/
     aXb


Modified: code/branches/pcre16/testdata/testoutput18
===================================================================
--- code/branches/pcre16/testdata/testoutput18    2011-12-23 20:37:29 UTC (rev 822)
+++ code/branches/pcre16/testdata/testoutput18    2011-12-24 17:43:22 UTC (rev 823)
@@ -17,23 +17,23 @@
     X\x{11234}YZ
  0: X\x{11234}YZ
  1: \x{11234}YZ
-    
+
 /X\C*/8
     XYZabcdce
  0: XYZabcdce
-    
+
 /X\C*?/8
     XYZabcde
  0: X
-    
+
 /X\C{3,5}/8
-    Xabcdefg   
+    Xabcdefg
  0: Xabcde
-    X\x{11234}Y 
+    X\x{11234}Y
  0: X\x{11234}Y
     X\x{11234}YZ
  0: X\x{11234}YZ
-    X\x{11234}\x{512}  
+    X\x{11234}\x{512}
  0: X\x{11234}\x{512}
     X\x{11234}\x{512}YZ
  0: X\x{11234}\x{512}YZ
@@ -41,13 +41,13 @@
  0: X\x{11234}\x{512}\x{11234}


 /X\C{3,5}?/8
-    Xabcdefg   
+    Xabcdefg
  0: Xabc
-    X\x{11234}Y 
+    X\x{11234}Y
  0: X\x{11234}Y
     X\x{11234}YZ
  0: X\x{11234}Y
-    X\x{11234}\x{512}YZ  
+    X\x{11234}\x{512}YZ
  0: X\x{11234}\x{512}
     *** Failers
 No match
@@ -59,11 +59,11 @@
  0: aXb
     a\nb
  0: a\x{0a}b
-    
-/a\C\Cb/8 
+
+/a\C\Cb/8
     a\x{12257}b
  0: a\x{12257}b
-    ** Failers 
+    ** Failers
 No match
     a\x{100}b
 No match
@@ -71,7 +71,7 @@
 /ab\Cde/8
     abXde
  0: abXde
-    
+
 /-- Check maximum character size --/


/\x{ffff}/8DZ
@@ -86,7 +86,7 @@
First char = \x{ffff}
No need char

-/\x{10000}/8DZ 
+/\x{10000}/8DZ
 ------------------------------------------------------------------
         Bra
         \x{10000}
@@ -206,7 +206,7 @@
 First char = \x{ff}
 No need char


-/\x{D55c}\x{ad6d}\x{C5B4}/DZ8 
+/\x{D55c}\x{ad6d}\x{C5B4}/DZ8
 ------------------------------------------------------------------
         Bra
         \x{d55c}\x{ad6d}\x{c5b4}
@@ -217,7 +217,7 @@
 Options: utf
 First char = \x{d55c}
 Need char = \x{c5b4}
-    \x{D55c}\x{ad6d}\x{C5B4} 
+    \x{D55c}\x{ad6d}\x{C5B4}
  0: \x{d55c}\x{ad6d}\x{c5b4}


 /\x{65e5}\x{672c}\x{8a9e}/DZ8
@@ -297,26 +297,26 @@
 /-- This one is here not because it's different to Perl, but because the way
 the captured single-byte is displayed. (In Perl it becomes a character, and you
 can't tell the difference.) --/
-    
+
 /X(\C)(.*)/8
     X\x{1234}
  0: X\x{1234}
  1: \x{1234}
  2: 
-    X\nabc 
+    X\nabc
  0: X\x{0a}abc
  1: \x{0a}
  2: abc


-/-- This one is here because Perl gives out a grumbly error message (quite 
+/-- This one is here because Perl gives out a grumbly error message (quite
 correctly, but that messes up comparisons). --/
-    
+
 /a\Cb/8
-    *** Failers 
+    *** Failers
 No match
-    a\x{100}b 
+    a\x{100}b
  0: a\x{100}b
-    
+
 /[^ab\xC0-\xF0]/8SDZ
 ------------------------------------------------------------------
         Bra
@@ -346,13 +346,13 @@
  0: \x{bf}
     \x{100}
  0: \x{100}
-    \x{1000}   
+    \x{1000}
  0: \x{1000}
     *** Failers
  0: *
-    \x{c0} 
+    \x{c0}
 No match
-    \x{f0} 
+    \x{f0}
 No match


 /Ā{3,4}/8SDZ
@@ -515,7 +515,7 @@
  0: \x{100}
     \x{100}Z
  0: \x{100}
-    *** Failers 
+    *** Failers
 No match


 /[\xff]/DZ8
@@ -567,9 +567,9 @@
 No need char
   \x{1ff}
  0: \x{1ff}
-  \777 
+  \777
  0: \x{1ff}
-  
+
 /\x{100}+\x{200}/8DZ
 ------------------------------------------------------------------
         Bra
@@ -603,29 +603,23 @@
     \x{0}\x{d7ff}\x{e000}\x{10ffff}
 No match
     \x{d800}
-Error -10 (bad UTF-8 string) offset=0 reason=1
+Error -10 (bad UTF-16 string) offset=0 reason=1
     \x{d800}\?
 No match
     \x{da00}
-Error -10 (bad UTF-8 string) offset=0 reason=1
+Error -10 (bad UTF-16 string) offset=0 reason=1
     \x{da00}\?
 No match
     \x{dfff}
-Error -10 (bad UTF-8 string) offset=0 reason=3
+Error -10 (bad UTF-16 string) offset=0 reason=3
     \x{dfff}\?
 No match
-    \x{110000}    
-Error -10 (bad UTF-8 string) offset=0 reason=3
-    \x{110000}\?    
-No match
-    \x{2000000} 
-Error -10 (bad UTF-8 string) offset=1 reason=3
-    \x{2000000}\? 
-No match
-    \x{7fffffff} 
-Error -10 (bad UTF-8 string) offset=1 reason=3
-    \x{7fffffff}\? 
-No match
+    \x{110000}
+**Failed: character value greater than 0x10ffff cannot be converted to UTF-16
+    \x{d800}\x{1234}
+Error -10 (bad UTF-16 string) offset=1 reason=2
+    \x{fffe}
+Error -10 (bad UTF-16 string) offset=0 reason=4


 /(*UTF16)\x{11234}/
   abcd\x{11234}pqr
@@ -657,11 +651,11 @@
  0: \x{180e}
     ABC\x{2000}
  0: \x{2000}
-    ABC\x{202f} 
+    ABC\x{202f}
  0: \x{202f}
-    ABC\x{205f} 
+    ABC\x{205f}
  0: \x{205f}
-    ABC\x{3000} 
+    ABC\x{3000}
  0: \x{3000}


 /\v/SI8
@@ -693,7 +687,7 @@
 Starting byte set: \x09 \x20 A \xa0 
     CDBABC
  0: A
-    
+
 /\v+A/SI8
 Capturing subpattern count = 0
 Options: utf
@@ -742,9 +736,9 @@
   \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee 
   \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd 
   \xfe \xff 
-    \x{a2} \x{84} 
+    \x{a2} \x{84}
  0: \x{a2} \x{84}
-    A Z 
+    A Z
  0: A Z


/a+/8
@@ -826,4 +820,18 @@
Subject length lower bound = 1
Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff

+/-- Check bad offset --/
+
+/a/8
+    \x{10000}\>1
+Error -11 (bad UTF-16 offset)
+    \x{10000}ab\>2
+ 0: a
+    \x{10000}ab\>3
+No match
+    \x{10000}ab\>4
+No match
+    \x{10000}ab\>5
+Error -24 (bad offset value)
+
 /-- End of testinput18 --/