Revision: 794
http://vcs.pcre.org/viewvc?view=rev&revision=794
Author: zherczeg
Date: 2011-12-08 07:36:41 +0000 (Thu, 08 Dec 2011)
Log Message:
-----------
Adding --enable-utf option rather than --enable-utf16. --enable-utf8 is kept for compatibility reasons. And fixing other, minor issues.
Modified Paths:
--------------
code/branches/pcre16/configure.ac
code/branches/pcre16/pcre16_ord2utf16.c
code/branches/pcre16/pcre16_utf16_utils.c
code/branches/pcre16/pcre16_valid_utf16.c
code/branches/pcre16/pcre_compile.c
code/branches/pcre16/pcre_config.c
code/branches/pcre16/pcre_dfa_exec.c
code/branches/pcre16/pcre_exec.c
code/branches/pcre16/pcre_internal.h
code/branches/pcre16/pcre_jit_compile.c
code/branches/pcre16/pcre_jit_test.c
Modified: code/branches/pcre16/configure.ac
===================================================================
--- code/branches/pcre16/configure.ac 2011-12-07 16:52:34 UTC (rev 793)
+++ code/branches/pcre16/configure.ac 2011-12-08 07:36:41 UTC (rev 794)
@@ -121,7 +121,7 @@
AC_ARG_ENABLE(cpp,
AS_HELP_STRING([--disable-cpp],
[disable C++ support]),
- , enable_cpp=yes)
+ , enable_cpp=unset)
AC_SUBST(enable_cpp)
# Handle --enable-jit (disabled by default)
@@ -145,19 +145,19 @@
# Handle --enable-utf8 (disabled by default)
AC_ARG_ENABLE(utf8,
AS_HELP_STRING([--enable-utf8],
- [enable UTF-8 support (incompatible with --enable-ebcdic)]),
+ [another name for --enable-utf. Kept only for compatibility reasons]),
, enable_utf8=unset)
-# Handle --enable-utf16 (disabled by default)
-AC_ARG_ENABLE(utf16,
- AS_HELP_STRING([--enable-utf16],
- [enable UTF-16 support (incompatible with --enable-ebcdic)]),
- , enable_utf16=unset)
+# Handle --enable-utf (disabled by default)
+AC_ARG_ENABLE(utf,
+ AS_HELP_STRING([--enable-utf],
+ [enable UTF-8/16 support (incompatible with --enable-ebcdic)]),
+ , enable_utf=unset)
# Handle --enable-unicode-properties
AC_ARG_ENABLE(unicode-properties,
AS_HELP_STRING([--enable-unicode-properties],
- [enable Unicode properties support (implies --enable-utf8 and --enable-utf16)]),
+ [enable Unicode properties support (implies --enable-utf)]),
, enable_unicode_properties=no)
# Handle --enable-newline=NL
@@ -199,7 +199,7 @@
# Handle --enable-ebcdic
AC_ARG_ENABLE(ebcdic,
AS_HELP_STRING([--enable-ebcdic],
- [assume EBCDIC coding rather than ASCII; incompatible with --enable-utf8; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]),
+ [assume EBCDIC coding rather than ASCII; incompatible with --enable-utf; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]),
, enable_ebcdic=no)
# Handle --disable-stack-for-recursion
@@ -263,26 +263,16 @@
[default limit on internal recursion (default=MATCH_LIMIT)]),
, with_match_limit_recursion=MATCH_LIMIT)
-# Make sure that if enable_utf8 was set, that enable_pcre8 support is enabled
-if test "x$enable_utf8" = "xyes"
+# Copy enable_utf8 value to enable_utf for compatibility reasons
+if test "x$enable_utf8" != "xunset"
then
- if test "x$enable_pcre8" = "xno"
+ if test "x$enable_utf" != "xunset"
then
- AC_MSG_ERROR([support for UTF-8 requires pcre library with 8 bit characters])
+ AC_MSG_ERROR([--enable/disable-utf8 is kept only for compatibility reasons and its value is copied to --enable/disable-utf. Newer code must use --enable/disable-utf alone.])
fi
- enable_pcre8=yes
+ enable_utf=$enable_utf8
fi
-# Make sure that if enable_utf16 was set, that enable_pcre16 support is enabled
-if test "x$enable_utf16" = "xyes"
-then
- if test "x$enable_pcre16" = "xno"
- then
- AC_MSG_ERROR([support for UTF-16 requires pcre library with 16 bit characters])
- fi
- enable_pcre16=yes
-fi
-
# Set the default value for pcre8
if test "x$enable_pcre8" = "xunset"
then
@@ -301,39 +291,26 @@
AC_MSG_ERROR([Either 8 or 16 bit (or both) pcre library must be enabled])
fi
-# Make sure that if enable_unicode_properties was set, that UTF-8 or UTF-16
-# support enabled.
-#
+# Make sure that if enable_unicode_properties was set, that UTF support is enabled.
if test "x$enable_unicode_properties" = "xyes"
then
- if test "x$enable_utf8" = "xno"
+ if test "x$enable_utf" = "xno"
then
- AC_MSG_ERROR([support for Unicode properties requires UTF-8 support])
+ AC_MSG_ERROR([support for Unicode properties requires UTF-8/16 support])
fi
- if test "x$enable_utf16" = "xno"
- then
- AC_MSG_ERROR([support for Unicode properties requires UTF-16 support])
- fi
- if test "x$enable_pcre8" = "xyes"
- then
- enable_utf8=yes
- fi
- if test "x$enable_pcre16" = "xyes"
- then
- enable_utf16=yes
- fi
+ enable_utf=yes
fi
-# enable_utf8 is disabled by default.
-if test "x$enable_utf8" = "xunset"
+# enable_utf is disabled by default.
+if test "x$enable_utf" = "xunset"
then
- enable_utf8=no
+ enable_utf=no
fi
-# enable_utf16 is disabled by default.
-if test "x$enable_utf16" = "xunset"
+# enable_cpp copies the value of enable_pcre8 by default
+if test "x$enable_cpp" = "xunset"
then
- enable_utf16=no
+ enable_cpp=$enable_pcre8
fi
# Make sure that if enable_cpp was set, that enable_pcre8 support is enabled
@@ -346,21 +323,17 @@
fi
# Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled.
-# Also check that UTF-8 or UTF-16 support is not requested, because PCRE cannot
-# handle EBCDIC and UTF in the same build. To do so it would need to use different
+# Also check that UTF support is not requested, because PCRE cannot handle
+# EBCDIC and UTF in the same build. To do so it would need to use different
# character constants depending on the mode.
#
if test "x$enable_ebcdic" = "xyes"
then
enable_rebuild_chartables=yes
- if test "x$enable_utf8" = "xyes"
+ if test "x$enable_utf" = "xyes"
then
- AC_MSG_ERROR([support for EBCDIC and UTF-8 cannot be enabled at the same time])
+ AC_MSG_ERROR([support for EBCDIC and UTF-8/16 cannot be enabled at the same time])
fi
- if test "x$enable_utf16" = "xyes"
- then
- AC_MSG_ERROR([support for EBCDIC and UTF-16 cannot be enabled at the same time])
- fi
fi
# Convert the newline identifier into the appropriate integer value.
@@ -502,8 +475,7 @@
AM_CONDITIONAL(WITH_PCRE_CPP, test "x$enable_cpp" = "xyes")
AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes")
-AM_CONDITIONAL(WITH_UTF8, test "x$enable_utf8" = "xyes")
-AM_CONDITIONAL(WITH_UTF16, test "x$enable_utf16" = "xyes")
+AM_CONDITIONAL(WITH_UTF, test "x$enable_utf" = "xyes")
# Checks for typedefs, structures, and compiler characteristics.
@@ -594,22 +566,14 @@
Define to enable JIT support in pcregrep.])
fi
-if test "$enable_utf8" = "yes"; then
- AC_DEFINE([SUPPORT_UTF8], [], [
- Define to enable support for the UTF-8 Unicode encoding. This will
- work even in an EBCDIC environment, but it is incompatible with
- the EBCDIC macro. That is, PCRE can support *either* EBCDIC code
- *or* ASCII/UTF-8, but not both at once.])
+if test "$enable_utf" = "yes"; then
+ AC_DEFINE([SUPPORT_UTF], [], [
+ Define to enable support for the UTF-8/16 Unicode encoding. This
+ will work even in an EBCDIC environment, but it is incompatible
+ with the EBCDIC macro. That is, PCRE can support *either* EBCDIC
+ code *or* ASCII/UTF-8/16, but not both at once.])
fi
-if test "$enable_utf16" = "yes"; then
- AC_DEFINE([SUPPORT_UTF16], [], [
- Define to enable support for the UTF-16 Unicode encoding. This will
- work even in an EBCDIC environment, but it is incompatible with
- the EBCDIC macro. That is, PCRE can support *either* EBCDIC code
- *or* ASCII/UTF-16, but not both at once.])
-fi
-
if test "$enable_unicode_properties" = "yes"; then
AC_DEFINE([SUPPORT_UCP], [], [
Define to enable support for Unicode properties.])
@@ -742,9 +706,9 @@
character codes, define this macro as 1. On systems that can use
"configure", this can be done via --enable-ebcdic. PCRE will then
assume that all input strings are in EBCDIC. If you do not define
- this macro, PCRE will assume input strings are ASCII or UTF-8 Unicode.
- It is not possible to build a version of PCRE that supports both
- EBCDIC and UTF-8.])
+ this macro, PCRE will assume input strings are ASCII or UTF-8/16
+ Unicode. It is not possible to build a version of PCRE that
+ supports both EBCDIC and UTF-8/16.])
fi
# Platform specific issues
@@ -869,8 +833,7 @@
Build 16 bit pcre library ....... : ${enable_pcre16}
Build C++ library ............... : ${enable_cpp}
Enable JIT compiling support .... : ${enable_jit}
- Enable UTF-8 support ............ : ${enable_utf8}
- Enable UTF-16 support ........... : ${enable_utf16}
+ Enable UTF-8/16 support ......... : ${enable_utf}
Unicode properties .............. : ${enable_unicode_properties}
Newline char/sequence ........... : ${enable_newline}
\R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}
Modified: code/branches/pcre16/pcre16_ord2utf16.c
===================================================================
--- code/branches/pcre16/pcre16_ord2utf16.c 2011-12-07 16:52:34 UTC (rev 793)
+++ code/branches/pcre16/pcre16_ord2utf16.c 2011-12-08 07:36:41 UTC (rev 794)
@@ -67,7 +67,7 @@
int
PRIV(ord2utf)(pcre_uint32 cvalue, pcre_uchar *buffer)
{
-#ifdef SUPPORT_UTF16
+#ifdef SUPPORT_UTF
/* Checking invalid cvalue character, encoded as invalid UTF-16 character.
Should never happen in practice. */
@@ -85,11 +85,11 @@
*buffer = 0xdc00 | (cvalue & 0x3ff);
return 2;
-#else
+#else /* SUPPORT_UTF */
(void)(cvalue); /* Keep compiler happy; this function won't ever be */
(void)(buffer); /* called when SUPPORT_UTF8 is not defined. */
return 0;
-#endif
+#endif /* SUPPORT_UTF */
}
/* End of pcre16_ord2utf16.c */
Modified: code/branches/pcre16/pcre16_utf16_utils.c
===================================================================
--- code/branches/pcre16/pcre16_utf16_utils.c 2011-12-07 16:52:34 UTC (rev 793)
+++ code/branches/pcre16/pcre16_utf16_utils.c 2011-12-08 07:36:41 UTC (rev 794)
@@ -77,7 +77,7 @@
int
pcre16_utf16_to_host_byte_order(PCRE_SCHAR16 *output, PCRE_SPTR16 input, int length, int keep_boms)
{
-#ifdef SUPPORT_UTF16
+#ifdef SUPPORT_UTF
/* This function converts any UTF-16 string to host byte order and optionally removes
any Byte Order Marks (BOMS). Returns with the remainig length. */
BOOL same_bo = TRUE;
@@ -108,11 +108,11 @@
*optr++ = same_bo ? c : ((c >> 8) | (c << 8)); /* Flip bytes if needed. */
}
-#else
+#else /* SUPPORT_UTF */
(void)(output); /* Keep picky compilers happy */
(void)(input);
(void)(keep_boms);
-#endif
+#endif /* SUPPORT_UTF */
return length;
}
Modified: code/branches/pcre16/pcre16_valid_utf16.c
===================================================================
--- code/branches/pcre16/pcre16_valid_utf16.c 2011-12-07 16:52:34 UTC (rev 793)
+++ code/branches/pcre16/pcre16_valid_utf16.c 2011-12-08 07:36:41 UTC (rev 794)
@@ -83,7 +83,7 @@
int
PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
{
-#ifdef SUPPORT_UTF16
+#ifdef SUPPORT_UTF
register PCRE_PUCHAR p;
register pcre_uchar c;
@@ -135,10 +135,10 @@
}
}
-#else /* SUPPORT_UTF16 */
+#else /* SUPPORT_UTF */
(void)(string); /* Keep picky compilers happy */
(void)(length);
-#endif
+#endif /* SUPPORT_UTF */
return PCRE_UTF16_ERR0; /* This indicates success */
}
Modified: code/branches/pcre16/pcre_compile.c
===================================================================
--- code/branches/pcre16/pcre_compile.c 2011-12-07 16:52:34 UTC (rev 793)
+++ code/branches/pcre16/pcre_compile.c 2011-12-08 07:36:41 UTC (rev 794)
@@ -4607,12 +4607,7 @@
it's a length rather than a small character. */
#ifdef SUPPORT_UTF
-#ifdef COMPILE_PCRE8
- if (utf && (code[-1] & 0x80) != 0)
-#endif /* COMPILE_PCRE8 */
-#ifdef COMPILE_PCRE16
- if (utf && (code[-1] & 0xfc00) == 0xdc00)
-#endif /* COMPILE_PCRE8 */
+ if (utf && NOT_FIRSTCHAR(code[-1]))
{
pcre_uchar *lastchar = code - 1;
BACKCHAR(lastchar);
@@ -4625,7 +4620,6 @@
/* Handle the case of a single charater - either with no UTF support, or
with UTF disabled, or for a single character UTF character. */
-
{
c = code[-1];
if (repeat_min > 1) reqchar = c | req_caseopt | cd->req_varyopt;
@@ -7438,8 +7432,14 @@
int newnl = 0;
int newbsr = 0;
+#ifdef COMPILE_PCRE8
if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 5) == 0)
{ skipatstart += 7; options |= PCRE_UTF8; continue; }
+#endif
+#ifdef COMPILE_PCRE16
+ if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 6) == 0)
+ { skipatstart += 8; options |= PCRE_UTF16; continue; }
+#endif
else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)
{ skipatstart += 6; options |= PCRE_UCP; continue; }
else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)
Modified: code/branches/pcre16/pcre_config.c
===================================================================
--- code/branches/pcre16/pcre_config.c 2011-12-07 16:52:34 UTC (rev 793)
+++ code/branches/pcre16/pcre_config.c 2011-12-08 07:36:41 UTC (rev 794)
@@ -73,7 +73,7 @@
switch (what)
{
case PCRE_CONFIG_UTF8:
-#if defined SUPPORT_UTF8 && defined COMPILE_PCRE8
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
*((int *)where) = 1;
#else
*((int *)where) = 0;
@@ -81,7 +81,7 @@
break;
case PCRE_CONFIG_UTF16:
-#if defined SUPPORT_UTF16 && defined COMPILE_PCRE16
+#if defined SUPPORT_UTF && defined COMPILE_PCRE16
*((int *)where) = 1;
#else
*((int *)where) = 0;
Modified: code/branches/pcre16/pcre_dfa_exec.c
===================================================================
--- code/branches/pcre16/pcre_dfa_exec.c 2011-12-07 16:52:34 UTC (rev 793)
+++ code/branches/pcre16/pcre_dfa_exec.c 2011-12-08 07:36:41 UTC (rev 794)
@@ -2683,7 +2683,7 @@
const pcre_uchar *p = start_subject + local_offsets[rc];
const pcre_uchar *pp = start_subject + local_offsets[rc+1];
int charcount = local_offsets[rc+1] - local_offsets[rc];
- while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
+ while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
if (charcount > 0)
{
ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
@@ -2780,7 +2780,7 @@
const pcre_uchar *p = ptr;
const pcre_uchar *pp = local_ptr;
charcount = pp - p;
- while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
+ while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
}
}
@@ -2862,7 +2862,7 @@
{
const pcre_uchar *p = start_subject + local_offsets[0];
const pcre_uchar *pp = start_subject + local_offsets[1];
- while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
+ while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
if (repeat_state_offset >= 0)
{ ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
@@ -3144,7 +3144,7 @@
/* Check a UTF-8 string if required. Unfortunately there's no way of passing
back the character offset. */
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
{
int erroroffset;
@@ -3159,17 +3159,9 @@
return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?
PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
}
-#ifdef COMPILE_PCRE8
if (start_offset > 0 && start_offset < length &&
- (((PCRE_PUCHAR)subject)[start_offset] & 0xc0) == 0x80)
+ NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
return PCRE_ERROR_BADUTF8_OFFSET;
-#else
-#ifdef COMPILE_PCRE16
- if (start_offset > 0 && start_offset < length &&
- (((PCRE_PUCHAR)subject)[start_offset] & 0xfc00) == 0xdc00)
- return PCRE_ERROR_BADUTF8_OFFSET;
-#endif /* COMPILE_PCRE16 */
-#endif /* COMPILE_PCRE8 */
}
#endif
Modified: code/branches/pcre16/pcre_exec.c
===================================================================
--- code/branches/pcre16/pcre_exec.c 2011-12-07 16:52:34 UTC (rev 793)
+++ code/branches/pcre16/pcre_exec.c 2011-12-08 07:36:41 UTC (rev 794)
@@ -6038,17 +6038,9 @@
}
/* Check that a start_offset points to the start of a UTF character. */
-#ifdef COMPILE_PCRE8
if (start_offset > 0 && start_offset < length &&
- (((PCRE_PUCHAR)subject)[start_offset] & 0xc0) == 0x80)
+ NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
return PCRE_ERROR_BADUTF8_OFFSET;
-#else
-#ifdef COMPILE_PCRE16
- if (start_offset > 0 && start_offset < length &&
- (((PCRE_PUCHAR)subject)[start_offset] & 0xfc00) == 0xdc00)
- return PCRE_ERROR_BADUTF8_OFFSET;
-#endif /* COMPILE_PCRE16 */
-#endif /* COMPILE_PCRE8 */
}
#endif
Modified: code/branches/pcre16/pcre_internal.h
===================================================================
--- code/branches/pcre16/pcre_internal.h 2011-12-07 16:52:34 UTC (rev 793)
+++ code/branches/pcre16/pcre_internal.h 2011-12-08 07:36:41 UTC (rev 794)
@@ -57,33 +57,32 @@
#define COMPILE_PCRE8
#endif
-/* We do not support both EBCDIC and UTF-8/16 at the same time. The "configure"
-script prevents both being selected, but not everybody uses "configure". */
+/* If SUPPORT_UCP is defined, SUPPORT_UTF must also be defined. The
+"configure" script ensures this, but not everybody uses "configure". */
-#if defined EBCDIC && (defined SUPPORT_UTF8 || defined SUPPORT_UTF16)
-#error The use of both EBCDIC and SUPPORT_UTF8/16 is not supported.
+#if defined SUPPORT_UCP && !(defined SUPPORT_UTF)
+#define SUPPORT_UTF 1
#endif
-/* If SUPPORT_UCP is defined, SUPPORT_UTF8 must also be defined. The
-"configure" script ensures this, but not everybody uses "configure". */
+/* We define SUPPORT_UTF if SUPPORT_UTF8 is enabled for compatibility
+reasons with existing code. */
-#if defined SUPPORT_UCP && !defined SUPPORT_UTF8
-#define SUPPORT_UTF8 1
+#if defined SUPPORT_UTF8 && !(defined SUPPORT_UTF)
+#define SUPPORT_UTF 1
#endif
-/* If SUPPORT_UCP is defined, SUPPORT_UTF16 must also be defined. The
-"configure" script ensures this, but not everybody uses "configure". */
+/* Fixme: SUPPORT_UTF8 should be eventually disappear from the code.
+Until then we define it if SUPPORT_UTF is defined. */
-#if defined SUPPORT_UCP && defined COMPILE_PCRE16 && !defined SUPPORT_UTF16
-#define SUPPORT_UTF16 1
+#if defined SUPPORT_UTF && !(defined SUPPORT_UTF8)
+#define SUPPORT_UTF8 1
#endif
-/* This macro is defined if either UTF-8 or UTF-16 support or both are
-enabled. */
+/* We do not support both EBCDIC and UTF-8/16 at the same time. The "configure"
+script prevents both being selected, but not everybody uses "configure". */
-#if defined SUPPORT_UTF8 || defined SUPPORT_UTF16
-/* Unicode Transformation Format is enabled. */
-#define SUPPORT_UTF 1
+#if defined EBCDIC && defined SUPPORT_UTF
+#error The use of both EBCDIC and SUPPORT_UTF8/16 is not supported.
#endif
/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
@@ -524,16 +523,18 @@
#define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE
-/* When UTF-8 encoding is being used, a character is no longer just a single
-byte. The macros for character handling generate simple sequences when used in
-byte-mode, and more complicated ones for UTF-8 characters. GETCHARLENTEST is
-not used when UTF-8 is not supported, so it is not defined, and BACKCHAR should
-never be called in byte mode. To make sure they can never even appear when
-UTF-8 support is omitted, we don't even define them. */
+/* When UTF encoding is being used, a character is no longer just a single
+character. The macros for character handling generate simple sequences when
+used in character-mode, and more complicated ones for UTF characters.
+GETCHARLENTEST and other macros are not used when UTF is not supported,
+so they are not defined. To make sure they can never even appear when
+UTF support is omitted, we don't even define them. */
+#ifndef SUPPORT_UTF
+
/* #define HAS_EXTRALEN(c) */
/* #define GET_EXTRALEN(c) */
-#ifndef SUPPORT_UTF
+/* #define NOT_FIRSTCHAR(c) */
#define GETCHAR(c, eptr) c = *eptr;
#define GETCHARTEST(c, eptr) c = *eptr;
#define GETCHARINC(c, eptr) c = *eptr++;
@@ -562,6 +563,11 @@
#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])
+/* Returns TRUE, if the given character is not the first character
+of a UTF sequence. */
+
+#define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80)
+
/* Base macro to pick up the remaining bytes of a UTF-8 character, not
advancing the pointer. */
@@ -724,6 +730,11 @@
#define GET_EXTRALEN(c) 1
+/* Returns TRUE, if the given character is not the first character
+of a UTF sequence. */
+
+#define NOT_FIRSTCHAR(c) (((c) & 0xfc00) == 0xdc00)
+
/* Base macro to pick up the low surrogate of a UTF-16 character, not
advancing the pointer. */
Modified: code/branches/pcre16/pcre_jit_compile.c
===================================================================
--- code/branches/pcre16/pcre_jit_compile.c 2011-12-07 16:52:34 UTC (rev 793)
+++ code/branches/pcre16/pcre_jit_compile.c 2011-12-08 07:36:41 UTC (rev 794)
@@ -1240,7 +1240,7 @@
OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_GENERAL_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
/* Copy the integer value to the output buffer */
#ifdef COMPILE_PCRE16
-OP2(SLJIT_LSHR, SLJIT_GENERAL_REG2, 0, SLJIT_GENERAL_REG2, 0, SLJIT_IMM, 1);
+OP2(SLJIT_ASHR, SLJIT_GENERAL_REG2, 0, SLJIT_GENERAL_REG2, 0, SLJIT_IMM, 1);
#endif
OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_GENERAL_REG2, 0);
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
@@ -1353,7 +1353,7 @@
#ifdef COMPILE_PCRE8
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (common->utf && c > 127)
{
n = GET_EXTRALEN(*cc);
@@ -1364,13 +1364,13 @@
}
return (n << 8) | bit;
}
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
return (0 << 8) | bit;
#else /* COMPILE_PCRE8 */
#ifdef COMPILE_PCRE16
-#ifdef SUPPORT_UTF16
+#ifdef SUPPORT_UTF
if (common->utf && c > 65535)
{
if (bit >= (1 << 10))
@@ -1378,7 +1378,7 @@
else
return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
}
-#endif /* SUPPORT_UTF16 */
+#endif /* SUPPORT_UTF */
return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
#endif /* COMPILE_PCRE16 */
Modified: code/branches/pcre16/pcre_jit_test.c
===================================================================
--- code/branches/pcre16/pcre_jit_test.c 2011-12-07 16:52:34 UTC (rev 793)
+++ code/branches/pcre16/pcre_jit_test.c 2011-12-08 07:36:41 UTC (rev 794)
@@ -616,7 +616,7 @@
{ 0, 0, NULL, NULL }
};
-pcre_jit_stack* callback(void *arg)
+static pcre_jit_stack* callback(void *arg)
{
return (pcre_jit_stack *)arg;
}