[Pcre-svn] [795] code/branches/pcre16: extending the 16 bit …

Startseite
Nachricht löschen
Autor: Subversion repository
Datum:  
To: pcre-svn
Betreff: [Pcre-svn] [795] code/branches/pcre16: extending the 16 bit API, mode check, and fixes
Revision: 795
          http://vcs.pcre.org/viewvc?view=rev&revision=795
Author:   zherczeg
Date:     2011-12-10 02:20:06 +0000 (Sat, 10 Dec 2011)


Log Message:
-----------
extending the 16 bit API, mode check, and fixes

Modified Paths:
--------------
    code/branches/pcre16/Makefile.am
    code/branches/pcre16/pcre.h.in
    code/branches/pcre16/pcre16_ord2utf16.c
    code/branches/pcre16/pcre_compile.c
    code/branches/pcre16/pcre_dfa_exec.c
    code/branches/pcre16/pcre_exec.c
    code/branches/pcre16/pcre_fullinfo.c
    code/branches/pcre16/pcre_get.c
    code/branches/pcre16/pcre_info.c
    code/branches/pcre16/pcre_internal.h
    code/branches/pcre16/pcre_jit_compile.c
    code/branches/pcre16/pcre_jit_test.c
    code/branches/pcre16/pcre_newline.c
    code/branches/pcre16/pcre_ord2utf8.c
    code/branches/pcre16/pcre_study.c
    code/branches/pcre16/pcre_valid_utf8.c
    code/branches/pcre16/pcre_xclass.c
    code/branches/pcre16/pcreposix.c


Added Paths:
-----------
    code/branches/pcre16/pcre16_dfa_exec.c
    code/branches/pcre16/pcre16_get.c


Modified: code/branches/pcre16/Makefile.am
===================================================================
--- code/branches/pcre16/Makefile.am    2011-12-08 07:36:41 UTC (rev 794)
+++ code/branches/pcre16/Makefile.am    2011-12-10 02:20:06 UTC (rev 795)
@@ -212,8 +212,10 @@
   pcre16_chartables.c \
   pcre16_compile.c \
   pcre16_config.c \
+  pcre16_dfa_exec.c \
   pcre16_exec.c \
   pcre16_fullinfo.c \
+  pcre16_get.c \
   pcre16_info.c \
   pcre16_jit_compile.c \
   pcre16_newline.c \


Modified: code/branches/pcre16/pcre.h.in
===================================================================
--- code/branches/pcre16/pcre.h.in    2011-12-08 07:36:41 UTC (rev 794)
+++ code/branches/pcre16/pcre.h.in    2011-12-10 02:20:06 UTC (rev 795)
@@ -166,6 +166,7 @@
 #define PCRE_ERROR_SHORTUTF8      (-25)
 #define PCRE_ERROR_RECURSELOOP    (-26)
 #define PCRE_ERROR_JIT_STACKLIMIT (-27)
+#define PCRE_ERROR_BADMODE        (-28)


/* Specific error codes for UTF-8 validity checks */

@@ -357,29 +358,46 @@
 PCRE_EXP_DECL int  pcre16_config(int, void *);
 PCRE_EXP_DECL int  pcre_copy_named_substring(const pcre *, const char *,
                   int *, int, const char *, char *, int);
-PCRE_EXP_DECL int  pcre_copy_substring(const char *, int *, int, int, char *,
-                  int);
+PCRE_EXP_DECL int  pcre16_copy_named_substring(const pcre *, PCRE_SPTR16,
+                  int *, int, PCRE_SPTR16, PCRE_SCHAR16 *, int);
+PCRE_EXP_DECL int  pcre_copy_substring(const char *, int *, int, int,
+                  char *, int);
+PCRE_EXP_DECL int  pcre16_copy_substring(PCRE_SPTR16, int *, int, int,
+                  PCRE_SCHAR16 *, int);
 PCRE_EXP_DECL int  pcre_dfa_exec(const pcre *, const pcre_extra *,
                   const char *, int, int, int, int *, int , int *, int);
+PCRE_EXP_DECL int  pcre16_dfa_exec(const pcre *, const pcre_extra *,
+                  PCRE_SPTR16, int, int, int, int *, int , int *, int);
 PCRE_EXP_DECL int  pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
                    int, int, int, int *, int);
 PCRE_EXP_DECL int  pcre16_exec(const pcre *, const pcre_extra *, PCRE_SPTR16,
                    int, int, int, int *, int);
 PCRE_EXP_DECL void pcre_free_substring(const char *);
+PCRE_EXP_DECL void pcre16_free_substring(PCRE_SPTR16);
 PCRE_EXP_DECL void pcre_free_substring_list(const char **);
+PCRE_EXP_DECL void pcre16_free_substring_list(PCRE_SPTR16 *);
 PCRE_EXP_DECL int  pcre_fullinfo(const pcre *, const pcre_extra *, int,
                   void *);
 PCRE_EXP_DECL int  pcre16_fullinfo(const pcre *, const pcre_extra *, int,
                   void *);
 PCRE_EXP_DECL int  pcre_get_named_substring(const pcre *, const char *,
                   int *, int, const char *, const char **);
+PCRE_EXP_DECL int  pcre16_get_named_substring(const pcre *, PCRE_SPTR16,
+                  int *, int, PCRE_SPTR16, PCRE_SPTR16 *);
 PCRE_EXP_DECL int  pcre_get_stringnumber(const pcre *, const char *);
+PCRE_EXP_DECL int  pcre16_get_stringnumber(const pcre *, PCRE_SPTR16);
 PCRE_EXP_DECL int  pcre_get_stringtable_entries(const pcre *, const char *,
                   char **, char **);
+PCRE_EXP_DECL int  pcre16_get_stringtable_entries(const pcre *, PCRE_SPTR16,
+                  PCRE_SCHAR16 **, PCRE_SCHAR16 **);
 PCRE_EXP_DECL int  pcre_get_substring(const char *, int *, int, int,
                   const char **);
+PCRE_EXP_DECL int  pcre16_get_substring(PCRE_SPTR16, int *, int, int,
+                  PCRE_SPTR16 *);
 PCRE_EXP_DECL int  pcre_get_substring_list(const char *, int *, int,
                   const char ***);
+PCRE_EXP_DECL int  pcre16_get_substring_list(PCRE_SPTR16, int *, int,
+                  PCRE_SPTR16 **);
 PCRE_EXP_DECL int  pcre_info(const pcre *, int *, int *);
 PCRE_EXP_DECL int  pcre16_info(const pcre *, int *, int *);
 PCRE_EXP_DECL const unsigned char *pcre_maketables(void);


Added: code/branches/pcre16/pcre16_dfa_exec.c
===================================================================
--- code/branches/pcre16/pcre16_dfa_exec.c                            (rev 0)
+++ code/branches/pcre16/pcre16_dfa_exec.c    2011-12-10 02:20:06 UTC (rev 795)
@@ -0,0 +1,45 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+           Copyright (c) 1997-2011 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 16 bit character support. */
+#define COMPILE_PCRE16
+
+#include "pcre_dfa_exec.c"
+
+/* End of pcre16_dfa_exec.c */


Added: code/branches/pcre16/pcre16_get.c
===================================================================
--- code/branches/pcre16/pcre16_get.c                            (rev 0)
+++ code/branches/pcre16/pcre16_get.c    2011-12-10 02:20:06 UTC (rev 795)
@@ -0,0 +1,45 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+           Copyright (c) 1997-2011 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 16 bit character support. */
+#define COMPILE_PCRE16
+
+#include "pcre_get.c"
+
+/* End of pcre16_get.c */


Modified: code/branches/pcre16/pcre16_ord2utf16.c
===================================================================
--- code/branches/pcre16/pcre16_ord2utf16.c    2011-12-08 07:36:41 UTC (rev 794)
+++ code/branches/pcre16/pcre16_ord2utf16.c    2011-12-10 02:20:06 UTC (rev 795)
@@ -87,7 +87,7 @@


#else /* SUPPORT_UTF */
(void)(cvalue); /* Keep compiler happy; this function won't ever be */
-(void)(buffer); /* called when SUPPORT_UTF8 is not defined. */
+(void)(buffer); /* called when SUPPORT_UTF is not defined. */
return 0;
#endif /* SUPPORT_UTF */
}

Modified: code/branches/pcre16/pcre_compile.c
===================================================================
--- code/branches/pcre16/pcre_compile.c    2011-12-08 07:36:41 UTC (rev 794)
+++ code/branches/pcre16/pcre_compile.c    2011-12-10 02:20:06 UTC (rev 795)
@@ -2357,7 +2357,7 @@
     actual length is stored in the compiled code, so we must update "code"
     here. */


-#ifdef SUPPORT_UTF8
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
     case OP_XCLASS:
     ccode = code += GET(code, 1);
     goto CHECK_CLASS_REPEAT;
@@ -2367,7 +2367,7 @@
     case OP_NCLASS:
     ccode = code + PRIV(OP_lengths)[OP_CLASS];


-#ifdef SUPPORT_UTF8
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
     CHECK_CLASS_REPEAT:
 #endif


@@ -2980,7 +2980,7 @@
if (next >= 0) switch(op_code)
{
case OP_CHAR:
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
GETCHARTEST(c, previous);
#else
c = *previous;
@@ -2992,13 +2992,13 @@
high-valued characters. */

   case OP_CHARI:
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
   GETCHARTEST(c, previous);
 #else
   c = *previous;
 #endif
   if (c == next) return FALSE;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
   if (utf)
     {
     unsigned int othercase;
@@ -3011,7 +3011,7 @@
     return (unsigned int)c != othercase;
     }
   else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
   return (c != cd->fcc[next]);  /* Non-UTF-8 mode */


/* For OP_NOT and OP_NOTI, the data is always a single-byte character. These
@@ -3023,7 +3023,7 @@

   case OP_NOTI:
   if ((c = *previous) == next) return TRUE;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
   if (utf)
     {
     unsigned int othercase;
@@ -3036,7 +3036,7 @@
     return (unsigned int)c == othercase;
     }
   else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
   return (c == cd->fcc[next]);  /* Non-UTF-8 mode */


/* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
@@ -3128,7 +3128,7 @@
{
case OP_CHAR:
case OP_CHARI:
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
GETCHARTEST(c, previous);
#else
c = *previous;
@@ -3358,7 +3358,7 @@
must not do this for other options (e.g. PCRE_EXTENDED) because they may change
dynamically as we process the pattern. */

-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
 BOOL utf = (options & PCRE_UTF8) != 0;
 pcre_uchar utf_chars[6];
@@ -4150,7 +4150,7 @@
           goto LONE_SINGLE_CHARACTER;
           }


-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
         if (utf)
           {                           /* Braces are required because the */
           GETCHARLEN(d, ptr, ptr);    /* macro generates multiple statements */
@@ -4200,7 +4200,9 @@
         matching for characters > 127 is available only if UCP support is
         available. */


-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)
+        if ((d > 255) || (utf && ((options & PCRE_CASELESS) != 0 && d > 127)))
+#elif defined  SUPPORT_UTF
         if (utf && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127)))
 #elif !(defined COMPILE_PCRE8)
         if (d > 255)
@@ -4214,7 +4216,11 @@
           they fit with the basic range. */


 #ifdef SUPPORT_UCP
+#ifndef COMPILE_PCRE8
+          if (utf && (options & PCRE_CASELESS) != 0)
+#else
           if ((options & PCRE_CASELESS) != 0)
+#endif
             {
             unsigned int occ, ocd;
             unsigned int cc = c;
@@ -4257,12 +4263,25 @@


           *class_uchardata++ = XCL_RANGE;
 #ifdef SUPPORT_UTF
+#ifndef COMPILE_PCRE8
+          if (utf)
+            {
+            class_uchardata += PRIV(ord2utf)(c, class_uchardata);
+            class_uchardata += PRIV(ord2utf)(d, class_uchardata);
+            }
+          else
+            {
+            *class_uchardata++ = c;
+            *class_uchardata++ = d;
+            }
+#else
           class_uchardata += PRIV(ord2utf)(c, class_uchardata);
           class_uchardata += PRIV(ord2utf)(d, class_uchardata);
-#else
+#endif
+#else /* SUPPORT_UTF */
           *class_uchardata++ = c;
           *class_uchardata++ = d;
-#endif
+#endif /* SUPPORT_UTF */


           /* With UCP support, we are done. Without UCP support, there is no
           caseless matching for UTF characters > 127; we can use the bit map
@@ -4270,9 +4289,26 @@
           can still use  */


 #ifdef SUPPORT_UCP
-          continue;    /* With next character in the class */
-#else
-#ifdef SUPPORT_UTF
+#ifndef COMPILE_PCRE8
+          if (utf)
+#endif
+            continue;    /* With next character in the class */
+#endif  /* SUPPORT_UCP */
+
+#if defined SUPPORT_UTF && !defined(SUPPORT_UCP) && !(defined COMPILE_PCRE8)
+          if (utf)
+            {
+            if ((options & PCRE_CASELESS) == 0 || c > 127) continue;
+            /* Adjust upper limit and fall through to set up the map */
+            d = 127;
+            }
+          else
+            {
+            if (c > 255) continue;
+            /* Adjust upper limit and fall through to set up the map */
+            d = 255;
+            }
+#elif defined SUPPORT_UTF && !defined(SUPPORT_UCP)
           if ((options & PCRE_CASELESS) == 0 || c > 127) continue;
           /* Adjust upper limit and fall through to set up the map */
           d = 127;
@@ -4280,10 +4316,9 @@
           if (c > 255) continue;
           /* Adjust upper limit and fall through to set up the map */
           d = 255;
-#endif  /* SUPPORT_UTF */
-#endif  /* SUPPORT_UCP */
+#endif  /* SUPPORT_UTF && !SUPPORT_UCP && !COMPILE_PCRE8 */
           }
-#endif  /* SUPPORT_UTF8 || COMPILE_PCRE16 */
+#endif  /* SUPPORT_UTF || !COMPILE_PCRE8 */


         /* We use the bit map for 8 bit mode, or when the characters fall
         partially or entirely to [0-255] ([0-127] for UCP) ranges. */
@@ -4314,7 +4349,9 @@


       /* Handle a character that cannot go in the bit map */


-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)
+      if ((c > 255) || (utf && ((options & PCRE_CASELESS) != 0 && c > 127)))
+#elif defined SUPPORT_UTF
       if (utf && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))
 #elif !(defined COMPILE_PCRE8)
       if (c > 255)
@@ -4324,13 +4361,26 @@
         xclass = TRUE;
         *class_uchardata++ = XCL_SINGLE;
 #ifdef SUPPORT_UTF
-        class_uchardata += PRIV(ord2utf)(c, class_uchardata);
-#else
+#ifndef COMPILE_PCRE8
+        /* In non 8 bit mode, we can get here even
+        if we are not in UTF mode. */
+        if (!utf)
+          *class_uchardata++ = c;
+        else
+#endif
+          class_uchardata += PRIV(ord2utf)(c, class_uchardata);
+#else /* SUPPORT_UTF */
         *class_uchardata++ = c;
-#endif
+#endif /* SUPPORT_UTF */


 #ifdef SUPPORT_UCP
+#ifdef COMPILE_PCRE8
         if ((options & PCRE_CASELESS) != 0)
+#else
+        /* In non 8 bit mode, we can get here even
+        if we are not in UTF mode. */
+        if (utf && (options & PCRE_CASELESS) != 0)
+#endif
           {
           unsigned int othercase;
           if ((othercase = UCD_OTHERCASE(c)) != c)
@@ -4415,7 +4465,7 @@
       /* For a single, positive character, get the value into mcbuffer, and
       then we can handle this with the normal one-character code. */


-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
       if (utf && class_lastchar > 127)
         mclength = PRIV(ord2utf)(class_lastchar, mcbuffer);
       else
@@ -4843,7 +4893,7 @@


     else if (*previous == OP_CLASS ||
              *previous == OP_NCLASS ||
-#if defined SUPPORT_UTF8 || !defined COMPILE_PCRE8
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
              *previous == OP_XCLASS ||
 #endif
              *previous == OP_REF ||
@@ -6635,7 +6685,7 @@
     a value > 127. We set its representation in the length/buffer, and then
     handle it as a data character. */


-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
     if (utf && c > 127)
       mclength = PRIV(ord2utf)(c, mcbuffer);
     else
@@ -7471,12 +7521,12 @@
 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
 utf = (options & PCRE_UTF8) != 0;


-/* Can't support UTF8 unless PCRE has been compiled to include the code. The
+/* Can't support UTF unless PCRE has been compiled to include the code. The
return of an error code from PRIV(valid_utf)() is a new feature, introduced in
release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is
not used here. */

-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 if (utf && (options & PCRE_NO_UTF8_CHECK) == 0 &&
      (errorcode = PRIV(valid_utf)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)
   {
@@ -7673,7 +7723,7 @@
   &firstchar, &reqchar, NULL, cd, NULL);
 re->top_bracket = cd->bracount;
 re->top_backref = cd->top_backref;
-re->flags = cd->external_flags;
+re->flags = cd->external_flags | PCRE_MODE;


if (cd->had_accept) reqchar = REQ_NONE; /* Must disable after (*ACCEPT) */


Modified: code/branches/pcre16/pcre_dfa_exec.c
===================================================================
--- code/branches/pcre16/pcre_dfa_exec.c    2011-12-08 07:36:41 UTC (rev 794)
+++ code/branches/pcre16/pcre_dfa_exec.c    2011-12-10 02:20:06 UTC (rev 795)
@@ -413,7 +413,7 @@
 const pcre_uchar *end_subject = md->end_subject;
 const pcre_uchar *start_code = md->start_code;


-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
BOOL utf = (md->poptions & PCRE_UTF8) != 0;
#else
BOOL utf = FALSE;
@@ -471,7 +471,7 @@
/* If we can't go back the amount required for the longest lookbehind
pattern, go back as far as we can; some alternatives may still be viable. */

-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
/* In character mode we have to step back character by character */

   if (utf)
@@ -603,9 +603,9 @@
   if (ptr < end_subject)
     {
     clen = 1;        /* Number of bytes in the character */
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
     if (utf) { GETCHARLEN(c, ptr, clen); } else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
     c = *ptr;
     }
   else
@@ -692,9 +692,9 @@
     if (coptable[codevalue] > 0)
       {
       dlen = 1;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
       if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
       d = code[coptable[codevalue]];
       if (codevalue >= OP_TYPESTAR)
         {
@@ -957,8 +957,8 @@
           {
           const pcre_uchar *temp = ptr - 1;
           if (temp < md->start_used_ptr) md->start_used_ptr = temp;
-#ifdef SUPPORT_UTF8
-          if (utf) BACKCHAR(temp);
+#ifdef SUPPORT_UTF
+          if (utf) { BACKCHAR(temp); }
 #endif
           GETCHARTEST(d, temp);
 #ifdef SUPPORT_UCP
@@ -1983,28 +1983,28 @@
       case OP_CHARI:
       if (clen == 0) break;


-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
       if (utf)
         {
         if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
           {
           unsigned int othercase;
-          if (c < 128) othercase = fcc[c]; else
-
-          /* If we have Unicode property support, we can use it to test the
-          other case of the character. */
-
+          if (c < 128)
+            othercase = fcc[c];
+          else
+            /* If we have Unicode property support, we can use it to test the
+            other case of the character. */
 #ifdef SUPPORT_UCP
-          othercase = UCD_OTHERCASE(c);
+            othercase = UCD_OTHERCASE(c);
 #else
-          othercase = NOTACHAR;
+            othercase = NOTACHAR;
 #endif


           if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
           }
         }
       else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
       /* Not UTF mode */
         {
         if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }
@@ -2207,7 +2207,7 @@
         unsigned int otherd = NOTACHAR;
         if (caseless)
           {
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
           if (utf && d >= 128)
             {
 #ifdef SUPPORT_UCP
@@ -2215,7 +2215,7 @@
 #endif  /* SUPPORT_UCP */
             }
           else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
           otherd = fcc[d];
           }
         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@@ -2254,7 +2254,7 @@
         unsigned int otherd = NOTACHAR;
         if (caseless)
           {
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
           if (utf && d >= 128)
             {
 #ifdef SUPPORT_UCP
@@ -2262,7 +2262,7 @@
 #endif  /* SUPPORT_UCP */
             }
           else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
           otherd = fcc[d];
           }
         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@@ -2299,7 +2299,7 @@
         unsigned int otherd = NOTACHAR;
         if (caseless)
           {
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
           if (utf && d >= 128)
             {
 #ifdef SUPPORT_UCP
@@ -2307,7 +2307,7 @@
 #endif  /* SUPPORT_UCP */
             }
           else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
           otherd = fcc[d];
           }
         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@@ -2336,7 +2336,7 @@
         unsigned int otherd = NOTACHAR;
         if (caseless)
           {
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
           if (utf && d >= 128)
             {
 #ifdef SUPPORT_UCP
@@ -2344,7 +2344,7 @@
 #endif  /* SUPPORT_UCP */
             }
           else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
           otherd = fcc[d];
           }
         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@@ -2380,7 +2380,7 @@
         unsigned int otherd = NOTACHAR;
         if (caseless)
           {
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
           if (utf && d >= 128)
             {
 #ifdef SUPPORT_UCP
@@ -2388,7 +2388,7 @@
 #endif  /* SUPPORT_UCP */
             }
           else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
           otherd = fcc[d];
           }
         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@@ -2438,7 +2438,7 @@
         else
          {
          ecode = code + GET(code, 1);
-         if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE);
+         if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
          }


         /* At this point, isinclass is set for all kinds of class, and ecode
@@ -2994,10 +2994,17 @@
                  < -1 => some kind of unexpected problem
 */


+#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
const char *subject, int length, int start_offset, int options, int *offsets,
int offsetcount, int *workspace, int wscount)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
+ PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
+ int offsetcount, int *workspace, int wscount)
+#endif
{
real_pcre *re = (real_pcre *)argument_re;
dfa_match_data match_block;
@@ -3062,14 +3069,15 @@
if (re == NULL) return PCRE_ERROR_BADMAGIC;
if (study != NULL) study = &internal_study;
}
+if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;

/* Set some local values */

-current_subject = (const unsigned char *)subject + start_offset;
-end_subject = (const unsigned char *)subject + length;
+current_subject = (const pcre_uchar *)subject + start_offset;
+end_subject = (const pcre_uchar *)subject + length;
req_char_ptr = current_subject - 1;

-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
/* PCRE_UTF16 has the same value as PCRE_UTF8. */
utf = (re->options & PCRE_UTF8) != 0;
#else
@@ -3083,7 +3091,7 @@

 md->start_code = (const pcre_uchar *)argument_re +
     re->name_table_offset + re->name_count * re->name_entry_size;
-md->start_subject = (const unsigned char *)subject;
+md->start_subject = (const pcre_uchar *)subject;
 md->end_subject = end_subject;
 md->start_offset = start_offset;
 md->moptions = options;


Modified: code/branches/pcre16/pcre_exec.c
===================================================================
--- code/branches/pcre16/pcre_exec.c    2011-12-08 07:36:41 UTC (rev 794)
+++ code/branches/pcre16/pcre_exec.c    2011-12-10 02:20:06 UTC (rev 795)
@@ -2968,7 +2968,7 @@
           MRRETURN(MATCH_NOMATCH);
           }
         GETCHARINCTEST(c, eptr);
-        if (!PRIV(xclass)(c, data)) MRRETURN(MATCH_NOMATCH);
+        if (!PRIV(xclass)(c, data, utf)) MRRETURN(MATCH_NOMATCH);
         }


       /* If max == min we can continue with the main loop without the
@@ -2992,7 +2992,7 @@
             MRRETURN(MATCH_NOMATCH);
             }
           GETCHARINCTEST(c, eptr);
-          if (!PRIV(xclass)(c, data)) MRRETURN(MATCH_NOMATCH);
+          if (!PRIV(xclass)(c, data, utf)) MRRETURN(MATCH_NOMATCH);
           }
         /* Control never gets here */
         }
@@ -3015,7 +3015,7 @@
 #else
           c = *eptr;
 #endif
-          if (!PRIV(xclass)(c, data)) break;
+          if (!PRIV(xclass)(c, data, utf)) break;
           eptr += len;
           }
         for(;;)
@@ -6113,6 +6113,7 @@
   if (re == NULL) return PCRE_ERROR_BADMAGIC;
   if (study != NULL) study = &internal_study;
   }
+if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;


/* Set up other data */


Modified: code/branches/pcre16/pcre_fullinfo.c
===================================================================
--- code/branches/pcre16/pcre_fullinfo.c    2011-12-08 07:36:41 UTC (rev 794)
+++ code/branches/pcre16/pcre_fullinfo.c    2011-12-10 02:20:06 UTC (rev 795)
@@ -91,6 +91,7 @@
   if (re == NULL) return PCRE_ERROR_BADMAGIC;
   if (study != NULL) study = &internal_study;
   }
+if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;


switch (what)
{

Modified: code/branches/pcre16/pcre_get.c
===================================================================
--- code/branches/pcre16/pcre_get.c    2011-12-08 07:36:41 UTC (rev 794)
+++ code/branches/pcre16/pcre_get.c    2011-12-10 02:20:06 UTC (rev 795)
@@ -65,8 +65,13 @@
                 (PCRE_ERROR_NOSUBSTRING) if not found
 */


+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_get_stringnumber(const pcre *code, const char *stringname)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_get_stringnumber(const pcre *code, PCRE_SPTR16 stringname)
+#endif
 {
 int rc;
 int entrysize;
@@ -87,7 +92,8 @@
   {
   int mid = (top + bot) / 2;
   pcre_uchar *entry = nametable + entrysize*mid;
-  int c = strcmp(stringname, (char *)(entry + 2));
+  int c = STRCMP_UC_UC((pcre_uchar *)stringname,
+    (pcre_uchar *)(entry + IMM2_SIZE));
   if (c == 0) return (entry[0] << 8) + entry[1];
   if (c > 0) bot = mid + 1; else top = mid;
   }
@@ -114,9 +120,15 @@
                 (PCRE_ERROR_NOSUBSTRING) if not found
 */


+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_get_stringtable_entries(const pcre *code, const char *stringname,
   char **firstptr, char **lastptr)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_get_stringtable_entries(const pcre *code, PCRE_SPTR16 stringname,
+  PCRE_SCHAR16 **firstptr, PCRE_SCHAR16 **lastptr)
+#endif
 {
 int rc;
 int entrysize;
@@ -138,23 +150,31 @@
   {
   int mid = (top + bot) / 2;
   pcre_uchar *entry = nametable + entrysize*mid;
-  int c = strcmp(stringname, (char *)(entry + 2));
+  int c = STRCMP_UC_UC((pcre_uchar *)stringname,
+    (pcre_uchar *)(entry + IMM2_SIZE));
   if (c == 0)
     {
     pcre_uchar *first = entry;
     pcre_uchar *last = entry;
     while (first > nametable)
       {
-      if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
+      if (STRCMP_UC_UC((pcre_uchar *)stringname,
+        (pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break;
       first -= entrysize;
       }
     while (last < lastentry)
       {
-      if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
+      if (STRCMP_UC_UC((pcre_uchar *)stringname,
+        (pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break;
       last += entrysize;
       }
+#ifdef COMPILE_PCRE8
     *firstptr = (char *)first;
     *lastptr = (char *)last;
+#else
+    *firstptr = (PCRE_SCHAR16 *)first;
+    *lastptr = (PCRE_SCHAR16 *)last;
+#endif
     return entrysize;
     }
   if (c > 0) bot = mid + 1; else top = mid;
@@ -182,16 +202,29 @@
                or a negative number on error
 */


+#ifdef COMPILE_PCRE8
 static int
 get_first_set(const pcre *code, const char *stringname, int *ovector)
+#else
+static int
+get_first_set(const pcre *code, PCRE_SPTR16 stringname, int *ovector)
+#endif
 {
 const real_pcre *re = (const real_pcre *)code;
 int entrysize;
-char *first, *last;
+pcre_uchar *first, *last;
 pcre_uchar *entry;
+#ifdef COMPILE_PCRE8
 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
   return pcre_get_stringnumber(code, stringname);
-entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
+entrysize = pcre_get_stringtable_entries(code, stringname,
+  (char **)&first, (char **)&last);
+#else
+if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
+  return pcre16_get_stringnumber(code, stringname);
+entrysize = pcre16_get_stringtable_entries(code, stringname,
+  (PCRE_SCHAR16 **)&first, (PCRE_SCHAR16 **)&last);
+#endif
 if (entrysize <= 0) return entrysize;
 for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
   {
@@ -231,9 +264,15 @@
                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
 */


+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
   int stringnumber, char *buffer, int size)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
+  int stringnumber, PCRE_SCHAR16 *buffer, int size)
+#endif
 {
 int yield;
 if (stringnumber < 0 || stringnumber >= stringcount)
@@ -241,7 +280,7 @@
 stringnumber *= 2;
 yield = ovector[stringnumber+1] - ovector[stringnumber];
 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
-memcpy(buffer, subject + ovector[stringnumber], yield);
+memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield));
 buffer[yield] = 0;
 return yield;
 }
@@ -276,13 +315,23 @@
                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
 */


+#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
int stringcount, const char *stringname, char *buffer, int size)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_copy_named_substring(const pcre *code, PCRE_SPTR16 subject, int *ovector,
+ int stringcount, PCRE_SPTR16 stringname, PCRE_SCHAR16 *buffer, int size)
+#endif
{
int n = get_first_set(code, stringname, ovector);
if (n <= 0) return n;
+#ifdef COMPILE_PCRE8
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
+#else
+return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size);
+#endif
}


@@ -308,29 +357,39 @@
                    PCRE_ERROR_NOMEMORY (-6) failed to get store
 */


+#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
const char ***listptr)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount,
+ PCRE_SPTR16 **listptr)
+#endif
{
int i;
-int size = sizeof(char *);
+int size = sizeof(pcre_uchar *);
int double_count = stringcount * 2;
-char **stringlist;
-char *p;
+pcre_uchar **stringlist;
+pcre_uchar *p;

for (i = 0; i < double_count; i += 2)
- size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
+ size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1);

-stringlist = (char **)(pcre_malloc)(size);
+stringlist = (pcre_uchar **)(pcre_malloc)(size);
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;

+#ifdef COMPILE_PCRE8
*listptr = (const char **)stringlist;
-p = (char *)(stringlist + stringcount + 1);
+#else
+*listptr = (PCRE_SPTR16 *)stringlist;
+#endif
+p = (pcre_uchar *)(stringlist + stringcount + 1);

 for (i = 0; i < double_count; i += 2)
   {
   int len = ovector[i+1] - ovector[i];
-  memcpy(p, subject + ovector[i], len);
+  memcpy(p, subject + ovector[i], IN_UCHARS(len));
   *stringlist++ = p;
   p += len;
   *p++ = 0;
@@ -353,8 +412,13 @@
 Returns:    nothing
 */


+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
 pcre_free_substring_list(const char **pointer)
+#else
+PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
+pcre16_free_substring_list(PCRE_SPTR16 *pointer)
+#endif
 {
 (pcre_free)((void *)pointer);
 }
@@ -386,21 +450,31 @@
                    PCRE_ERROR_NOSUBSTRING (-7) substring not present
 */


+#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_substring(const char *subject, int *ovector, int stringcount,
int stringnumber, const char **stringptr)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
+ int stringnumber, PCRE_SPTR16 *stringptr)
+#endif
{
int yield;
-char *substring;
+pcre_uchar *substring;
if (stringnumber < 0 || stringnumber >= stringcount)
return PCRE_ERROR_NOSUBSTRING;
stringnumber *= 2;
yield = ovector[stringnumber+1] - ovector[stringnumber];
-substring = (char *)(pcre_malloc)(yield + 1);
+substring = (pcre_uchar *)(pcre_malloc)(IN_UCHARS(yield + 1));
if (substring == NULL) return PCRE_ERROR_NOMEMORY;
-memcpy(substring, subject + ovector[stringnumber], yield);
+memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield));
substring[yield] = 0;
-*stringptr = substring;
+#ifdef COMPILE_PCRE8
+*stringptr = (const char *)substring;
+#else
+*stringptr = (PCRE_SPTR16)substring;
+#endif
return yield;
}

@@ -433,13 +507,23 @@
                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
 */


+#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
int stringcount, const char *stringname, const char **stringptr)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_get_named_substring(const pcre *code, PCRE_SPTR16 subject, int *ovector,
+ int stringcount, PCRE_SPTR16 stringname, PCRE_SPTR16 *stringptr)
+#endif
{
int n = get_first_set(code, stringname, ovector);
if (n <= 0) return n;
+#ifdef COMPILE_PCRE8
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
+#else
+return pcre16_get_substring(subject, ovector, stringcount, n, stringptr);
+#endif
}


@@ -456,8 +540,13 @@
 Returns:    nothing
 */


+#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre_free_substring(const char *pointer)
+#else
+PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
+pcre16_free_substring(PCRE_SPTR16 pointer)
+#endif
{
(pcre_free)((void *)pointer);
}

Modified: code/branches/pcre16/pcre_info.c
===================================================================
--- code/branches/pcre16/pcre_info.c    2011-12-08 07:36:41 UTC (rev 794)
+++ code/branches/pcre16/pcre_info.c    2011-12-10 02:20:06 UTC (rev 795)
@@ -88,6 +88,7 @@
   re = PRIV(try_flipped)(re, &internal_re, NULL, NULL);
   if (re == NULL) return PCRE_ERROR_BADMAGIC;
   }
+if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
 if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_COMPILE_OPTIONS);
 if (first_char != NULL)
   *first_char = ((re->flags & PCRE_FIRSTSET) != 0)? re->first_char :


Modified: code/branches/pcre16/pcre_internal.h
===================================================================
--- code/branches/pcre16/pcre_internal.h    2011-12-08 07:36:41 UTC (rev 794)
+++ code/branches/pcre16/pcre_internal.h    2011-12-10 02:20:06 UTC (rev 795)
@@ -832,15 +832,21 @@
 the restrictions on partial matching have been lifted. It remains for backwards
 compatibility. */


-#define PCRE_NOPARTIAL     0x0001  /* can't use partial with this regex */
-#define PCRE_FIRSTSET      0x0002  /* first_char is set */
-#define PCRE_REQCHSET      0x0004  /* req_byte is set */
-#define PCRE_STARTLINE     0x0008  /* start after \n for multiline */
-#define PCRE_JCHANGED      0x0010  /* j option used in regex */
-#define PCRE_HASCRORLF     0x0020  /* explicit \r or \n in pattern */
-#define PCRE_HASTHEN       0x0040  /* pattern contains (*THEN) */
-#define PCRE_FCH_CASELESS  0x0080  /* caseless first char */
-#define PCRE_RCH_CASELESS  0x0100  /* caseless requested char */
+#ifdef COMPILE_PCRE8
+#define PCRE_MODE          0x0001  /* compiled in 8 bit mode */
+#endif
+#ifdef COMPILE_PCRE16
+#define PCRE_MODE          0x0002  /* compiled in 16 bit mode */
+#endif
+#define PCRE_FIRSTSET      0x0010  /* first_char is set */
+#define PCRE_FCH_CASELESS  0x0020  /* caseless first char */
+#define PCRE_REQCHSET      0x0040  /* req_byte is set */
+#define PCRE_RCH_CASELESS  0x0080  /* caseless requested char */
+#define PCRE_STARTLINE     0x0100  /* start after \n for multiline */
+#define PCRE_NOPARTIAL     0x0200  /* can't use partial with this regex */
+#define PCRE_JCHANGED      0x0400  /* j option used in regex */
+#define PCRE_HASCRORLF     0x0800  /* explicit \r or \n in pattern */
+#define PCRE_HASTHEN       0x1000  /* pattern contains (*THEN) */


/* Flags for the "extra" block produced by pcre_study(). */

@@ -917,7 +923,7 @@
application that did need both could compile two versions of the library, using
macros to give the functions distinct names. */

-#ifndef SUPPORT_UTF8
+#ifndef SUPPORT_UTF

 /* UTF-8 support is not enabled; use the platform-dependent character literals
 so that PCRE works on both ASCII and EBCDIC platforms, in non-UTF-mode only. */
@@ -1186,7 +1192,7 @@
 #define STRING_UCP_RIGHTPAR            "UCP)"
 #define STRING_NO_START_OPT_RIGHTPAR   "NO_START_OPT)"


-#else /* SUPPORT_UTF8 */
+#else /* SUPPORT_UTF */

 /* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This
 works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode
@@ -1446,7 +1452,7 @@
 #define STRING_UCP_RIGHTPAR            STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
 #define STRING_NO_START_OPT_RIGHTPAR   STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS


-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */

/* Escape items that are just an encoding of a particular data value. */

@@ -2249,7 +2255,7 @@
 extern int               PRIV(valid_utf)(PCRE_PUCHAR, int, int *);
 extern BOOL              PRIV(was_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,
                            int *, BOOL);
-extern BOOL              PRIV(xclass)(int, const pcre_uchar *);
+extern BOOL              PRIV(xclass)(int, const pcre_uchar *, BOOL);


 #ifdef SUPPORT_JIT
 extern void              PRIV(jit_compile)(const real_pcre *, pcre_extra *);


Modified: code/branches/pcre16/pcre_jit_compile.c
===================================================================
--- code/branches/pcre16/pcre_jit_compile.c    2011-12-08 07:36:41 UTC (rev 794)
+++ code/branches/pcre16/pcre_jit_compile.c    2011-12-10 02:20:06 UTC (rev 795)
@@ -1311,7 +1311,7 @@
 {
 /* Detects if the character and its othercase has only 1 bit difference. */
 unsigned int c, oc, bit;
-#if defined SUPPORT_UTF8 && defined COMPILE_PCRE8
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
 int n;
 #endif



Modified: code/branches/pcre16/pcre_jit_test.c
===================================================================
--- code/branches/pcre16/pcre_jit_test.c    2011-12-08 07:36:41 UTC (rev 794)
+++ code/branches/pcre16/pcre_jit_test.c    2011-12-10 02:20:06 UTC (rev 795)
@@ -56,6 +56,8 @@
  Non-letter characters:
    \xc2\xa1 = 0xa1 =  (Inverted Exclamation Mark)
    \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
+   \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
+   \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
  Newlines:
    \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
    \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
@@ -99,14 +101,20 @@
 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
 #endif


-#define MUA     (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
-#define MUAP    (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
-#define CMUA    (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
-#define CMUAP   (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
-#define MA      (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
-#define MAP     (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
-#define CMA     (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
+#define MUA    (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
+#define MUAP    (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
+#define CMUA    (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
+#define CMUAP    (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
+#define MA    (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
+#define MAP    (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
+#define CMA    (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)


+#define OFFSET_MASK    0xffff
+#define F_DIFF        0x010000
+#define F_FORCECONV    0x020000
+#define F_NO8        0x100000
+#define F_NO16        0x200000
+
 struct regression_test_case {
     int flags;
     int start_offset;
@@ -521,7 +529,7 @@
     { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
     { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
     { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
-    { MUA | PCRE_BUG, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
+    { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
     { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
     { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
     { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
@@ -535,11 +543,11 @@
     { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
     { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
     { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
-    { MUA | PCRE_BUG, 0, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
+    { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
     { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
     { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
     { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
-    { MUA | PCRE_BUG, 0, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
+    { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
     { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
     { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
     { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
@@ -601,6 +609,20 @@
     { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
     { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },


+    /* 16 bit specific tests. */
+    { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
+    { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
+    { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
+    { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
+    { CMA, 0 | F_FORCECONV | F_NO8, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
+    { CMA, 0 | F_FORCECONV | F_NO8, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
+    { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
+    { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
+    { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
+    { CMA, 0 | F_FORCECONV | F_NO8, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
+    { CMA, 0 | F_FORCECONV | F_NO8, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
+    { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
+
     /* Deep recursion. */
     { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
     { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
@@ -721,7 +743,7 @@
     int ovector8_2[32];
     int return_value8_1, return_value8_2;
     int utf8 = 0, ucp8 = 0;
-    int disabled_flags8 = PCRE_BUG;
+    int disabled_flags8 = 0;
 #endif
 #ifdef SUPPORT_PCRE16
     pcre *re16;
@@ -730,7 +752,7 @@
     int ovector16_2[32];
     int return_value16_1, return_value16_2;
     int utf16 = 0, ucp16 = 0;
-    int disabled_flags16 = PCRE_BUG;
+    int disabled_flags16 = 0;
     int length16;
 #endif


@@ -765,9 +787,11 @@

         error = NULL;
 #ifdef SUPPORT_PCRE8
-        re8 = pcre_compile(current->pattern,
-            current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
-            &error, &err_offs, NULL);
+        re8 = NULL;
+        if (!(current->start_offset & F_NO8))
+            re8 = pcre_compile(current->pattern,
+                current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
+                &error, &err_offs, NULL);


         extra8 = NULL;
         if (re8) {
@@ -784,18 +808,21 @@
                 pcre_free(re8);
                 re8 = NULL;
             }
-        } else if (utf8 && ucp8)
+        } else if (utf8 && ucp8 && !(current->start_offset & F_NO8))
             printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
 #endif
 #ifdef SUPPORT_PCRE16
-        if (current->flags & PCRE_UTF8)
+        if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
             convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
         else
             copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
-        re16 = pcre16_compile(regtest_buf,
-            current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
-            &error, &err_offs, NULL);


+        re16 = NULL;
+        if (!(current->start_offset & F_NO16))
+            re16 = pcre16_compile(regtest_buf,
+                current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
+                &error, &err_offs, NULL);
+
         extra16 = NULL;
         if (re16) {
             error = NULL;
@@ -811,7 +838,7 @@
                 pcre_free(re16);
                 re16 = NULL;
             }
-        } else if (utf16 && ucp16)
+        } else if (utf16 && ucp16 && !(current->start_offset & F_NO16))
             printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
 #endif


@@ -822,16 +849,15 @@
 #ifdef SUPPORT_PCRE8
         return_value8_1 = -1000;
         return_value8_2 = -1000;
+        for (i = 0; i < 32; ++i)
+            ovector8_1[i] = -2;
+        for (i = 0; i < 32; ++i)
+            ovector8_2[i] = -2;
         if (re8) {
             setstack(extra8, 0);
-            for (i = 0; i < 32; ++i)
-                ovector8_1[i] = -2;
-            return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset,
+            return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
                 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32);
-
-            for (i = 0; i < 32; ++i)
-                ovector8_2[i] = -2;
-            return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset,
+            return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
                 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32);
         }
 #endif
@@ -839,32 +865,30 @@
 #ifdef SUPPORT_PCRE16
         return_value16_1 = -1000;
         return_value16_2 = -1000;
+        for (i = 0; i < 32; ++i)
+            ovector16_1[i] = -2;
+        for (i = 0; i < 32; ++i)
+            ovector16_2[i] = -2;
         if (re16) {
             setstack(extra16, 0);
-            if (current->flags & PCRE_UTF8)
+            if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
                 length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
             else
                 length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
-
-            for (i = 0; i < 32; ++i)
-                ovector16_1[i] = -2;
-            return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset,
+            return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
                 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32);
-
-            for (i = 0; i < 32; ++i)
-                ovector16_2[i] = -2;
-            return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset,
+            return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset & OFFSET_MASK,
                 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32);
         }
 #endif


-        /* If PCRE_BUG is set, just run the test, but do not compare the results.
+        /* If F_DIFF is set, just run the test, but do not compare the results.
         Segfaults can still be captured. */


         is_succesful = 1;
-        if (!(current->flags & PCRE_BUG)) {
+        if (!(current->start_offset & F_DIFF)) {
 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
-            if (utf8 == utf16) {
+            if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
                 /* All results must be the same. */
                 if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
                     printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
@@ -947,7 +971,7 @@
         }
 #endif


-        /* printf("[%d-%d]%s", ovector1[0], ovector1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
+        /* printf("[%d-%d|%d-%d]%s", ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
         printf(".");
         fflush(stdout);
         current++;
@@ -962,5 +986,4 @@
     }
 }


-
/* End of pcre_jit_test.c */

Modified: code/branches/pcre16/pcre_newline.c
===================================================================
--- code/branches/pcre16/pcre_newline.c    2011-12-08 07:36:41 UTC (rev 794)
+++ code/branches/pcre16/pcre_newline.c    2011-12-10 02:20:06 UTC (rev 795)
@@ -84,7 +84,7 @@
   GETCHAR(c, ptr);
   }
 else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
   c = *ptr;


if (type == NLTYPE_ANYCRLF) switch(c)
@@ -150,7 +150,7 @@
GETCHAR(c, ptr);
}
else
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
c = *ptr;

if (type == NLTYPE_ANYCRLF) switch(c)

Modified: code/branches/pcre16/pcre_ord2utf8.c
===================================================================
--- code/branches/pcre16/pcre_ord2utf8.c    2011-12-08 07:36:41 UTC (rev 794)
+++ code/branches/pcre16/pcre_ord2utf8.c    2011-12-10 02:20:06 UTC (rev 795)
@@ -65,7 +65,7 @@
 int
 PRIV(ord2utf)(pcre_uint32 cvalue, pcre_uchar *buffer)
 {
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF


register int i, j;

@@ -88,7 +88,7 @@
#else

(void)(cvalue); /* Keep compiler happy; this function won't ever be */
-(void)(buffer); /* called when SUPPORT_UTF8 is not defined. */
+(void)(buffer); /* called when SUPPORT_UTF is not defined. */
return 0;

#endif

Modified: code/branches/pcre16/pcre_study.c
===================================================================
--- code/branches/pcre16/pcre_study.c    2011-12-08 07:36:41 UTC (rev 794)
+++ code/branches/pcre16/pcre_study.c    2011-12-10 02:20:06 UTC (rev 795)
@@ -323,7 +323,7 @@


     /* Check a class for variable quantification */


-#if defined SUPPORT_UTF8 || !defined COMPILE_PCRE8
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
     case OP_XCLASS:
     cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS];
     /* Fall through */
@@ -824,7 +824,7 @@
       case OP_SOM:
       case OP_THEN:
       case OP_THEN_ARG:
-#if defined SUPPORT_UTF8 || !defined COMPILE_PCRE8
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
       case OP_XCLASS:
 #endif
       return SSB_FAIL;
@@ -1325,6 +1325,16 @@
   return NULL;
   }


+if ((re->flags & PCRE_MODE) == 0)
+ {
+#ifdef COMPILE_PCRE8
+ *errorptr = "argument is compiled in 16 bit mode";
+#else
+ *errorptr = "argument is compiled in 8 bit mode";
+#endif
+ return NULL;
+ }
+
if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
{
*errorptr = "unknown or incorrect option bit(s) set";
@@ -1346,9 +1356,16 @@
/* Set the character tables in the block that is passed around */

   tables = re->tables;
+
+#ifdef COMPILE_PCRE8
   if (tables == NULL)
     (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
     (void *)(&tables));
+#else
+  if (tables == NULL)
+    (void)pcre16_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
+    (void *)(&tables));
+#endif


compile_block.lcc = tables + lcc_offset;
compile_block.fcc = tables + fcc_offset;

Modified: code/branches/pcre16/pcre_valid_utf8.c
===================================================================
--- code/branches/pcre16/pcre_valid_utf8.c    2011-12-08 07:36:41 UTC (rev 794)
+++ code/branches/pcre16/pcre_valid_utf8.c    2011-12-10 02:20:06 UTC (rev 795)
@@ -105,7 +105,7 @@
 int
 PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
 {
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 register PCRE_PUCHAR p;


 if (length < 0)
@@ -288,7 +288,7 @@
     }
   }


-#else /* SUPPORT_UTF8 */
+#else /* SUPPORT_UTF */
(void)(string); /* Keep picky compilers happy */
(void)(length);
#endif

Modified: code/branches/pcre16/pcre_xclass.c
===================================================================
--- code/branches/pcre16/pcre_xclass.c    2011-12-08 07:36:41 UTC (rev 794)
+++ code/branches/pcre16/pcre_xclass.c    2011-12-10 02:20:06 UTC (rev 795)
@@ -64,11 +64,17 @@
 */


BOOL
-PRIV(xclass)(int c, const pcre_uchar *data)
+PRIV(xclass)(int c, const pcre_uchar *data, BOOL utf)
{
int t;
BOOL negated = (*data & XCL_NOT) != 0;

+(void)utf;
+#ifdef COMPILE_PCRE8
+/* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */
+utf = TRUE;
+#endif
+
 /* Character values < 256 are matched against a bitmap, if one is present. If
 not, we still carry on, because there may be ranges that start below 256 in the
 additional data. */
@@ -91,13 +97,30 @@
   int x, y;
   if (t == XCL_SINGLE)
     {
-    GETCHARINC(x, data);
+#ifdef SUPPORT_UTF
+    if (utf)
+      {
+      GETCHARINC(x, data); /* macro generates multiple statements */
+      }
+    else
+#endif
+      x = *data++;
     if (c == x) return !negated;
     }
   else if (t == XCL_RANGE)
     {
-    GETCHARINC(x, data);
-    GETCHARINC(y, data);
+#ifdef SUPPORT_UTF
+    if (utf)
+      {
+      GETCHARINC(x, data); /* macro generates multiple statements */
+      GETCHARINC(y, data); /* macro generates multiple statements */
+      }
+    else
+#endif
+      {
+      x = *data++;
+      y = *data++;
+      }
     if (c >= x && c <= y) return !negated;
     }



Modified: code/branches/pcre16/pcreposix.c
===================================================================
--- code/branches/pcre16/pcreposix.c    2011-12-08 07:36:41 UTC (rev 794)
+++ code/branches/pcre16/pcreposix.c    2011-12-10 02:20:06 UTC (rev 795)
@@ -401,6 +401,7 @@
   case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE;
   case PCRE_ERROR_BADUTF8: return REG_INVARG;
   case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG;
+  case PCRE_ERROR_BADMODE: return REG_INVARG;
   default: return REG_ASSERT;
   }
 }