[Pcre-svn] [794] code/branches/pcre16: Adding --enable-utf o…

Startseite
Nachricht löschen
Autor: Subversion repository
Datum:  
To: pcre-svn
Betreff: [Pcre-svn] [794] code/branches/pcre16: Adding --enable-utf option rather than --enable-utf16.
Revision: 794
          http://vcs.pcre.org/viewvc?view=rev&revision=794
Author:   zherczeg
Date:     2011-12-08 07:36:41 +0000 (Thu, 08 Dec 2011)


Log Message:
-----------
Adding --enable-utf option rather than --enable-utf16. --enable-utf8 is kept for compatibility reasons. And fixing other, minor issues.

Modified Paths:
--------------
    code/branches/pcre16/configure.ac
    code/branches/pcre16/pcre16_ord2utf16.c
    code/branches/pcre16/pcre16_utf16_utils.c
    code/branches/pcre16/pcre16_valid_utf16.c
    code/branches/pcre16/pcre_compile.c
    code/branches/pcre16/pcre_config.c
    code/branches/pcre16/pcre_dfa_exec.c
    code/branches/pcre16/pcre_exec.c
    code/branches/pcre16/pcre_internal.h
    code/branches/pcre16/pcre_jit_compile.c
    code/branches/pcre16/pcre_jit_test.c


Modified: code/branches/pcre16/configure.ac
===================================================================
--- code/branches/pcre16/configure.ac    2011-12-07 16:52:34 UTC (rev 793)
+++ code/branches/pcre16/configure.ac    2011-12-08 07:36:41 UTC (rev 794)
@@ -121,7 +121,7 @@
 AC_ARG_ENABLE(cpp,
               AS_HELP_STRING([--disable-cpp],
                              [disable C++ support]),
-              , enable_cpp=yes)
+              , enable_cpp=unset)
 AC_SUBST(enable_cpp)


 # Handle --enable-jit (disabled by default)
@@ -145,19 +145,19 @@
 # Handle --enable-utf8 (disabled by default)
 AC_ARG_ENABLE(utf8,
               AS_HELP_STRING([--enable-utf8],
-                             [enable UTF-8 support (incompatible with --enable-ebcdic)]),
+                             [another name for --enable-utf. Kept only for compatibility reasons]),
               , enable_utf8=unset)


-# Handle --enable-utf16 (disabled by default)
-AC_ARG_ENABLE(utf16,
-              AS_HELP_STRING([--enable-utf16],
-                             [enable UTF-16 support (incompatible with --enable-ebcdic)]),
-              , enable_utf16=unset)
+# Handle --enable-utf (disabled by default)
+AC_ARG_ENABLE(utf,
+              AS_HELP_STRING([--enable-utf],
+                             [enable UTF-8/16 support (incompatible with --enable-ebcdic)]),
+              , enable_utf=unset)


 # Handle --enable-unicode-properties
 AC_ARG_ENABLE(unicode-properties,
               AS_HELP_STRING([--enable-unicode-properties],
-                             [enable Unicode properties support (implies --enable-utf8 and --enable-utf16)]),
+                             [enable Unicode properties support (implies --enable-utf)]),
               , enable_unicode_properties=no)


 # Handle --enable-newline=NL
@@ -199,7 +199,7 @@
 # Handle --enable-ebcdic
 AC_ARG_ENABLE(ebcdic,
               AS_HELP_STRING([--enable-ebcdic],
-                             [assume EBCDIC coding rather than ASCII; incompatible with --enable-utf8; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]),
+                             [assume EBCDIC coding rather than ASCII; incompatible with --enable-utf; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]),
               , enable_ebcdic=no)


 # Handle --disable-stack-for-recursion
@@ -263,26 +263,16 @@
                            [default limit on internal recursion (default=MATCH_LIMIT)]),
             , with_match_limit_recursion=MATCH_LIMIT)


-# Make sure that if enable_utf8 was set, that enable_pcre8 support is enabled
-if test "x$enable_utf8" = "xyes"
+# Copy enable_utf8 value to enable_utf for compatibility reasons
+if test "x$enable_utf8" != "xunset"
 then
-  if test "x$enable_pcre8" = "xno"
+  if test "x$enable_utf" != "xunset"
   then
-    AC_MSG_ERROR([support for UTF-8 requires pcre library with 8 bit characters])
+    AC_MSG_ERROR([--enable/disable-utf8 is kept only for compatibility reasons and its value is copied to --enable/disable-utf. Newer code must use --enable/disable-utf alone.])
   fi
-  enable_pcre8=yes
+  enable_utf=$enable_utf8
 fi


-# Make sure that if enable_utf16 was set, that enable_pcre16 support is enabled
-if test "x$enable_utf16" = "xyes"
-then
-  if test "x$enable_pcre16" = "xno"
-  then
-    AC_MSG_ERROR([support for UTF-16 requires pcre library with 16 bit characters])
-  fi
-  enable_pcre16=yes
-fi
-
 # Set the default value for pcre8
 if test "x$enable_pcre8" = "xunset"
 then
@@ -301,39 +291,26 @@
   AC_MSG_ERROR([Either 8 or 16 bit (or both) pcre library must be enabled])
 fi


-# Make sure that if enable_unicode_properties was set, that UTF-8 or UTF-16
-# support enabled.
-#
+# Make sure that if enable_unicode_properties was set, that UTF support is enabled.
 if test "x$enable_unicode_properties" = "xyes"
 then
-  if test "x$enable_utf8" = "xno"
+  if test "x$enable_utf" = "xno"
   then
-    AC_MSG_ERROR([support for Unicode properties requires UTF-8 support])
+    AC_MSG_ERROR([support for Unicode properties requires UTF-8/16 support])
   fi
-  if test "x$enable_utf16" = "xno"
-  then
-    AC_MSG_ERROR([support for Unicode properties requires UTF-16 support])
-  fi
-  if test "x$enable_pcre8" = "xyes"
-  then
-    enable_utf8=yes
-  fi
-  if test "x$enable_pcre16" = "xyes"
-  then
-    enable_utf16=yes
-  fi
+  enable_utf=yes
 fi


-# enable_utf8 is disabled by default.
-if test "x$enable_utf8" = "xunset"
+# enable_utf is disabled by default.
+if test "x$enable_utf" = "xunset"
then
- enable_utf8=no
+ enable_utf=no
fi

-# enable_utf16 is disabled by default.
-if test "x$enable_utf16" = "xunset"
+# enable_cpp copies the value of enable_pcre8 by default
+if test "x$enable_cpp" = "xunset"
then
- enable_utf16=no
+ enable_cpp=$enable_pcre8
fi

# Make sure that if enable_cpp was set, that enable_pcre8 support is enabled
@@ -346,21 +323,17 @@
fi

 # Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled.
-# Also check that UTF-8 or UTF-16 support is not requested, because PCRE cannot
-# handle EBCDIC and UTF in the same build. To do so it would need to use different
+# Also check that UTF support is not requested, because PCRE cannot handle
+# EBCDIC and UTF in the same build. To do so it would need to use different
 # character constants depending on the mode.
 #
 if test "x$enable_ebcdic" = "xyes"
 then
   enable_rebuild_chartables=yes
-  if test "x$enable_utf8" = "xyes"
+  if test "x$enable_utf" = "xyes"
   then
-    AC_MSG_ERROR([support for EBCDIC and UTF-8 cannot be enabled at the same time])
+    AC_MSG_ERROR([support for EBCDIC and UTF-8/16 cannot be enabled at the same time])
   fi
-  if test "x$enable_utf16" = "xyes"
-  then
-    AC_MSG_ERROR([support for EBCDIC and UTF-16 cannot be enabled at the same time])
-  fi
 fi


# Convert the newline identifier into the appropriate integer value.
@@ -502,8 +475,7 @@
AM_CONDITIONAL(WITH_PCRE_CPP, test "x$enable_cpp" = "xyes")
AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes")
-AM_CONDITIONAL(WITH_UTF8, test "x$enable_utf8" = "xyes")
-AM_CONDITIONAL(WITH_UTF16, test "x$enable_utf16" = "xyes")
+AM_CONDITIONAL(WITH_UTF, test "x$enable_utf" = "xyes")

# Checks for typedefs, structures, and compiler characteristics.

@@ -594,22 +566,14 @@
     Define to enable JIT support in pcregrep.])
 fi


-if test "$enable_utf8" = "yes"; then
-  AC_DEFINE([SUPPORT_UTF8], [], [
-    Define to enable support for the UTF-8 Unicode encoding. This will
-    work even in an EBCDIC environment, but it is incompatible with
-    the EBCDIC macro. That is, PCRE can support *either* EBCDIC code
-    *or* ASCII/UTF-8, but not both at once.])
+if test "$enable_utf" = "yes"; then
+  AC_DEFINE([SUPPORT_UTF], [], [
+    Define to enable support for the UTF-8/16 Unicode encoding. This
+    will work even in an EBCDIC environment, but it is incompatible
+    with the EBCDIC macro. That is, PCRE can support *either* EBCDIC
+    code *or* ASCII/UTF-8/16, but not both at once.])
 fi


-if test "$enable_utf16" = "yes"; then
-  AC_DEFINE([SUPPORT_UTF16], [], [
-    Define to enable support for the UTF-16 Unicode encoding. This will
-    work even in an EBCDIC environment, but it is incompatible with
-    the EBCDIC macro. That is, PCRE can support *either* EBCDIC code
-    *or* ASCII/UTF-16, but not both at once.])
-fi
-
 if test "$enable_unicode_properties" = "yes"; then
   AC_DEFINE([SUPPORT_UCP], [], [
     Define to enable support for Unicode properties.])
@@ -742,9 +706,9 @@
     character codes, define this macro as 1. On systems that can use
     "configure", this can be done via --enable-ebcdic. PCRE will then
     assume that all input strings are in EBCDIC. If you do not define
-    this macro, PCRE will assume input strings are ASCII or UTF-8 Unicode.
-    It is not possible to build a version of PCRE that supports both
-    EBCDIC and UTF-8.])
+    this macro, PCRE will assume input strings are ASCII or UTF-8/16
+    Unicode. It is not possible to build a version of PCRE that
+    supports both EBCDIC and UTF-8/16.])
 fi


 # Platform specific issues
@@ -869,8 +833,7 @@
     Build 16 bit pcre library ....... : ${enable_pcre16}
     Build C++ library ............... : ${enable_cpp}
     Enable JIT compiling support .... : ${enable_jit}
-    Enable UTF-8 support ............ : ${enable_utf8}
-    Enable UTF-16 support ........... : ${enable_utf16}
+    Enable UTF-8/16 support ......... : ${enable_utf}
     Unicode properties .............. : ${enable_unicode_properties}
     Newline char/sequence ........... : ${enable_newline}
     \R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}


Modified: code/branches/pcre16/pcre16_ord2utf16.c
===================================================================
--- code/branches/pcre16/pcre16_ord2utf16.c    2011-12-07 16:52:34 UTC (rev 793)
+++ code/branches/pcre16/pcre16_ord2utf16.c    2011-12-08 07:36:41 UTC (rev 794)
@@ -67,7 +67,7 @@
 int
 PRIV(ord2utf)(pcre_uint32 cvalue, pcre_uchar *buffer)
 {
-#ifdef SUPPORT_UTF16
+#ifdef SUPPORT_UTF


/* Checking invalid cvalue character, encoded as invalid UTF-16 character.
Should never happen in practice. */
@@ -85,11 +85,11 @@
*buffer = 0xdc00 | (cvalue & 0x3ff);
return 2;

-#else
+#else /* SUPPORT_UTF */
(void)(cvalue); /* Keep compiler happy; this function won't ever be */
(void)(buffer); /* called when SUPPORT_UTF8 is not defined. */
return 0;
-#endif
+#endif /* SUPPORT_UTF */
}

/* End of pcre16_ord2utf16.c */

Modified: code/branches/pcre16/pcre16_utf16_utils.c
===================================================================
--- code/branches/pcre16/pcre16_utf16_utils.c    2011-12-07 16:52:34 UTC (rev 793)
+++ code/branches/pcre16/pcre16_utf16_utils.c    2011-12-08 07:36:41 UTC (rev 794)
@@ -77,7 +77,7 @@
 int
 pcre16_utf16_to_host_byte_order(PCRE_SCHAR16 *output, PCRE_SPTR16 input, int length, int keep_boms)
 {
-#ifdef SUPPORT_UTF16
+#ifdef SUPPORT_UTF
 /* This function converts any UTF-16 string to host byte order and optionally removes
 any Byte Order Marks (BOMS). Returns with the remainig length. */
 BOOL same_bo = TRUE;
@@ -108,11 +108,11 @@
     *optr++ = same_bo ? c : ((c >> 8) | (c << 8)); /* Flip bytes if needed. */
   }


-#else
+#else /* SUPPORT_UTF */
(void)(output); /* Keep picky compilers happy */
(void)(input);
(void)(keep_boms);
-#endif
+#endif /* SUPPORT_UTF */
return length;
}


Modified: code/branches/pcre16/pcre16_valid_utf16.c
===================================================================
--- code/branches/pcre16/pcre16_valid_utf16.c    2011-12-07 16:52:34 UTC (rev 793)
+++ code/branches/pcre16/pcre16_valid_utf16.c    2011-12-08 07:36:41 UTC (rev 794)
@@ -83,7 +83,7 @@
 int
 PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
 {
-#ifdef SUPPORT_UTF16
+#ifdef SUPPORT_UTF
 register PCRE_PUCHAR p;
 register pcre_uchar c;


@@ -135,10 +135,10 @@
     }
   }


-#else /* SUPPORT_UTF16 */
+#else /* SUPPORT_UTF */
(void)(string); /* Keep picky compilers happy */
(void)(length);
-#endif
+#endif /* SUPPORT_UTF */

return PCRE_UTF16_ERR0; /* This indicates success */
}

Modified: code/branches/pcre16/pcre_compile.c
===================================================================
--- code/branches/pcre16/pcre_compile.c    2011-12-07 16:52:34 UTC (rev 793)
+++ code/branches/pcre16/pcre_compile.c    2011-12-08 07:36:41 UTC (rev 794)
@@ -4607,12 +4607,7 @@
       it's a length rather than a small character. */


 #ifdef SUPPORT_UTF
-#ifdef COMPILE_PCRE8
-      if (utf && (code[-1] & 0x80) != 0)
-#endif /* COMPILE_PCRE8 */
-#ifdef COMPILE_PCRE16
-      if (utf && (code[-1] & 0xfc00) == 0xdc00)
-#endif /* COMPILE_PCRE8 */
+      if (utf && NOT_FIRSTCHAR(code[-1]))
         {
         pcre_uchar *lastchar = code - 1;
         BACKCHAR(lastchar);
@@ -4625,7 +4620,6 @@


       /* Handle the case of a single charater - either with no UTF support, or
       with UTF disabled, or for a single character UTF character. */
-
         {
         c = code[-1];
         if (repeat_min > 1) reqchar = c | req_caseopt | cd->req_varyopt;
@@ -7438,8 +7432,14 @@
   int newnl = 0;
   int newbsr = 0;


+#ifdef COMPILE_PCRE8
   if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 5) == 0)
     { skipatstart += 7; options |= PCRE_UTF8; continue; }
+#endif
+#ifdef COMPILE_PCRE16
+  if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 6) == 0)
+    { skipatstart += 8; options |= PCRE_UTF16; continue; }
+#endif
   else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)
     { skipatstart += 6; options |= PCRE_UCP; continue; }
   else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)


Modified: code/branches/pcre16/pcre_config.c
===================================================================
--- code/branches/pcre16/pcre_config.c    2011-12-07 16:52:34 UTC (rev 793)
+++ code/branches/pcre16/pcre_config.c    2011-12-08 07:36:41 UTC (rev 794)
@@ -73,7 +73,7 @@
 switch (what)
   {
   case PCRE_CONFIG_UTF8:
-#if defined SUPPORT_UTF8 && defined COMPILE_PCRE8
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
   *((int *)where) = 1;
 #else
   *((int *)where) = 0;
@@ -81,7 +81,7 @@
   break;


case PCRE_CONFIG_UTF16:
-#if defined SUPPORT_UTF16 && defined COMPILE_PCRE16
+#if defined SUPPORT_UTF && defined COMPILE_PCRE16
*((int *)where) = 1;
#else
*((int *)where) = 0;

Modified: code/branches/pcre16/pcre_dfa_exec.c
===================================================================
--- code/branches/pcre16/pcre_dfa_exec.c    2011-12-07 16:52:34 UTC (rev 793)
+++ code/branches/pcre16/pcre_dfa_exec.c    2011-12-08 07:36:41 UTC (rev 794)
@@ -2683,7 +2683,7 @@
             const pcre_uchar *p = start_subject + local_offsets[rc];
             const pcre_uchar *pp = start_subject + local_offsets[rc+1];
             int charcount = local_offsets[rc+1] - local_offsets[rc];
-            while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
+            while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
             if (charcount > 0)
               {
               ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
@@ -2780,7 +2780,7 @@
             const pcre_uchar *p = ptr;
             const pcre_uchar *pp = local_ptr;
             charcount = pp - p;
-            while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
+            while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
             ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
             }
           }
@@ -2862,7 +2862,7 @@
             {
             const pcre_uchar *p = start_subject + local_offsets[0];
             const pcre_uchar *pp = start_subject + local_offsets[1];
-            while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
+            while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
             ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
             if (repeat_state_offset >= 0)
               { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
@@ -3144,7 +3144,7 @@
 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
 back the character offset. */


-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
   {
   int erroroffset;
@@ -3159,17 +3159,9 @@
     return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?
       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
     }
-#ifdef COMPILE_PCRE8
   if (start_offset > 0 && start_offset < length &&
-        (((PCRE_PUCHAR)subject)[start_offset] & 0xc0) == 0x80)
+        NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
     return PCRE_ERROR_BADUTF8_OFFSET;
-#else
-#ifdef COMPILE_PCRE16
-  if (start_offset > 0 && start_offset < length &&
-        (((PCRE_PUCHAR)subject)[start_offset] & 0xfc00) == 0xdc00)
-    return PCRE_ERROR_BADUTF8_OFFSET;
-#endif /* COMPILE_PCRE16 */
-#endif /* COMPILE_PCRE8 */
   }
 #endif



Modified: code/branches/pcre16/pcre_exec.c
===================================================================
--- code/branches/pcre16/pcre_exec.c    2011-12-07 16:52:34 UTC (rev 793)
+++ code/branches/pcre16/pcre_exec.c    2011-12-08 07:36:41 UTC (rev 794)
@@ -6038,17 +6038,9 @@
     }


   /* Check that a start_offset points to the start of a UTF character. */
-#ifdef COMPILE_PCRE8
   if (start_offset > 0 && start_offset < length &&
-      (((PCRE_PUCHAR)subject)[start_offset] & 0xc0) == 0x80)
+      NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
     return PCRE_ERROR_BADUTF8_OFFSET;
-#else
-#ifdef COMPILE_PCRE16
-  if (start_offset > 0 && start_offset < length &&
-      (((PCRE_PUCHAR)subject)[start_offset] & 0xfc00) == 0xdc00)
-    return PCRE_ERROR_BADUTF8_OFFSET;
-#endif /* COMPILE_PCRE16 */
-#endif /* COMPILE_PCRE8 */
   }
 #endif



Modified: code/branches/pcre16/pcre_internal.h
===================================================================
--- code/branches/pcre16/pcre_internal.h    2011-12-07 16:52:34 UTC (rev 793)
+++ code/branches/pcre16/pcre_internal.h    2011-12-08 07:36:41 UTC (rev 794)
@@ -57,33 +57,32 @@
 #define COMPILE_PCRE8
 #endif


-/* We do not support both EBCDIC and UTF-8/16 at the same time. The "configure"
-script prevents both being selected, but not everybody uses "configure". */
+/* If SUPPORT_UCP is defined, SUPPORT_UTF must also be defined. The
+"configure" script ensures this, but not everybody uses "configure". */

-#if defined EBCDIC && (defined SUPPORT_UTF8 || defined SUPPORT_UTF16)
-#error The use of both EBCDIC and SUPPORT_UTF8/16 is not supported.
+#if defined SUPPORT_UCP && !(defined SUPPORT_UTF)
+#define SUPPORT_UTF 1
#endif

-/* If SUPPORT_UCP is defined, SUPPORT_UTF8 must also be defined. The
-"configure" script ensures this, but not everybody uses "configure". */
+/* We define SUPPORT_UTF if SUPPORT_UTF8 is enabled for compatibility
+reasons with existing code. */

-#if defined SUPPORT_UCP && !defined SUPPORT_UTF8
-#define SUPPORT_UTF8 1
+#if defined SUPPORT_UTF8 && !(defined SUPPORT_UTF)
+#define SUPPORT_UTF 1
#endif

-/* If SUPPORT_UCP is defined, SUPPORT_UTF16 must also be defined. The
-"configure" script ensures this, but not everybody uses "configure". */
+/* Fixme: SUPPORT_UTF8 should be eventually disappear from the code.
+Until then we define it if SUPPORT_UTF is defined. */

-#if defined SUPPORT_UCP && defined COMPILE_PCRE16 && !defined SUPPORT_UTF16
-#define SUPPORT_UTF16 1
+#if defined SUPPORT_UTF && !(defined SUPPORT_UTF8)
+#define SUPPORT_UTF8 1
#endif

-/* This macro is defined if either UTF-8 or UTF-16 support or both are
-enabled. */
+/* We do not support both EBCDIC and UTF-8/16 at the same time. The "configure"
+script prevents both being selected, but not everybody uses "configure". */

-#if defined SUPPORT_UTF8 || defined SUPPORT_UTF16
-/* Unicode Transformation Format is enabled. */
-#define SUPPORT_UTF 1
+#if defined EBCDIC && defined SUPPORT_UTF
+#error The use of both EBCDIC and SUPPORT_UTF8/16 is not supported.
#endif

/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
@@ -524,16 +523,18 @@

#define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE

-/* When UTF-8 encoding is being used, a character is no longer just a single
-byte. The macros for character handling generate simple sequences when used in
-byte-mode, and more complicated ones for UTF-8 characters. GETCHARLENTEST is
-not used when UTF-8 is not supported, so it is not defined, and BACKCHAR should
-never be called in byte mode. To make sure they can never even appear when
-UTF-8 support is omitted, we don't even define them. */
+/* When UTF encoding is being used, a character is no longer just a single
+character. The macros for character handling generate simple sequences when
+used in character-mode, and more complicated ones for UTF characters.
+GETCHARLENTEST and other macros are not used when UTF is not supported,
+so they are not defined. To make sure they can never even appear when
+UTF support is omitted, we don't even define them. */

+#ifndef SUPPORT_UTF
+
/* #define HAS_EXTRALEN(c) */
/* #define GET_EXTRALEN(c) */
-#ifndef SUPPORT_UTF
+/* #define NOT_FIRSTCHAR(c) */
#define GETCHAR(c, eptr) c = *eptr;
#define GETCHARTEST(c, eptr) c = *eptr;
#define GETCHARINC(c, eptr) c = *eptr++;
@@ -562,6 +563,11 @@

#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])

+/* Returns TRUE, if the given character is not the first character
+of a UTF sequence. */
+
+#define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80)
+
/* Base macro to pick up the remaining bytes of a UTF-8 character, not
advancing the pointer. */

@@ -724,6 +730,11 @@

#define GET_EXTRALEN(c) 1

+/* Returns TRUE, if the given character is not the first character
+of a UTF sequence. */
+
+#define NOT_FIRSTCHAR(c) (((c) & 0xfc00) == 0xdc00)
+
/* Base macro to pick up the low surrogate of a UTF-16 character, not
advancing the pointer. */


Modified: code/branches/pcre16/pcre_jit_compile.c
===================================================================
--- code/branches/pcre16/pcre_jit_compile.c    2011-12-07 16:52:34 UTC (rev 793)
+++ code/branches/pcre16/pcre_jit_compile.c    2011-12-08 07:36:41 UTC (rev 794)
@@ -1240,7 +1240,7 @@
 OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_GENERAL_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
 /* Copy the integer value to the output buffer */
 #ifdef COMPILE_PCRE16
-OP2(SLJIT_LSHR, SLJIT_GENERAL_REG2, 0, SLJIT_GENERAL_REG2, 0, SLJIT_IMM, 1);
+OP2(SLJIT_ASHR, SLJIT_GENERAL_REG2, 0, SLJIT_GENERAL_REG2, 0, SLJIT_IMM, 1);
 #endif
 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_GENERAL_REG2, 0);
 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
@@ -1353,7 +1353,7 @@


#ifdef COMPILE_PCRE8

-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 if (common->utf && c > 127)
   {
   n = GET_EXTRALEN(*cc);
@@ -1364,13 +1364,13 @@
     }
   return (n << 8) | bit;
   }
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
 return (0 << 8) | bit;


#else /* COMPILE_PCRE8 */

 #ifdef COMPILE_PCRE16
-#ifdef SUPPORT_UTF16
+#ifdef SUPPORT_UTF
 if (common->utf && c > 65535)
   {
   if (bit >= (1 << 10))
@@ -1378,7 +1378,7 @@
   else
     return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
   }
-#endif /* SUPPORT_UTF16 */
+#endif /* SUPPORT_UTF */
 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
 #endif /* COMPILE_PCRE16 */



Modified: code/branches/pcre16/pcre_jit_test.c
===================================================================
--- code/branches/pcre16/pcre_jit_test.c    2011-12-07 16:52:34 UTC (rev 793)
+++ code/branches/pcre16/pcre_jit_test.c    2011-12-08 07:36:41 UTC (rev 794)
@@ -616,7 +616,7 @@
     { 0, 0, NULL, NULL }
 };


-pcre_jit_stack* callback(void *arg)
+static pcre_jit_stack* callback(void *arg)
 {
     return (pcre_jit_stack *)arg;
 }