[Pcre-svn] [786] code/branches/pcre16: Updating pcre_jit

Autor: Subversion repository
Datum:
To: pcre-svn
Betreff: [Pcre-svn] [786] code/branches/pcre16: Updating pcre_jit_test.

Revision: 786

          http://vcs.pcre.org/viewvc?view=rev&revision=786
Author:   zherczeg
Date:     2011-12-06 11:33:41 +0000 (Tue, 06 Dec 2011)

Log Message:
-----------
Updating pcre_jit_test. Most of the JIT tests are working now in 16 bit mode.

Modified Paths:
--------------
    code/branches/pcre16/Makefile.am
    code/branches/pcre16/pcre.h.in
    code/branches/pcre16/pcre_compile.c
    code/branches/pcre16/pcre_config.c
    code/branches/pcre16/pcre_dfa_exec.c
    code/branches/pcre16/pcre_exec.c
    code/branches/pcre16/pcre_internal.h
    code/branches/pcre16/pcre_jit_compile.c
    code/branches/pcre16/pcre_jit_test.c

Added Paths:
-----------
    code/branches/pcre16/pcre16_config.c

Modified: code/branches/pcre16/Makefile.am
===================================================================
--- code/branches/pcre16/Makefile.am    2011-12-05 20:12:24 UTC (rev 785)
+++ code/branches/pcre16/Makefile.am    2011-12-06 11:33:41 UTC (rev 786)
@@ -211,6 +211,7 @@
 libpcre16_la_SOURCES = \
   pcre16_chartables.c \
   pcre16_compile.c \
+  pcre16_config.c \
   pcre16_exec.c \
   pcre16_fullinfo.c \
   pcre16_info.c \

Modified: code/branches/pcre16/pcre.h.in
===================================================================
--- code/branches/pcre16/pcre.h.in    2011-12-05 20:12:24 UTC (rev 785)
+++ code/branches/pcre16/pcre.h.in    2011-12-06 11:33:41 UTC (rev 786)
@@ -234,6 +234,7 @@
 #define PCRE_CONFIG_MATCH_LIMIT_RECURSION   7
 #define PCRE_CONFIG_BSR                     8
 #define PCRE_CONFIG_JIT                     9
+#define PCRE_CONFIG_UTF16                  10

 /* Request types for pcre_study(). Do not re-arrange, in order to remain
 compatible. */
@@ -353,6 +354,7 @@
 PCRE_EXP_DECL pcre *pcre16_compile2(PCRE_SPTR16, int, int *, const char **,
                   int *, const unsigned char *);
 PCRE_EXP_DECL int  pcre_config(int, void *);
+PCRE_EXP_DECL int  pcre16_config(int, void *);
 PCRE_EXP_DECL int  pcre_copy_named_substring(const pcre *, const char *,
                   int *, int, const char *, char *, int);
 PCRE_EXP_DECL int  pcre_copy_substring(const char *, int *, int, int, char *,

Added: code/branches/pcre16/pcre16_config.c
===================================================================
--- code/branches/pcre16/pcre16_config.c                            (rev 0)
+++ code/branches/pcre16/pcre16_config.c    2011-12-06 11:33:41 UTC (rev 786)
@@ -0,0 +1,45 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+           Copyright (c) 1997-2011 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 16 bit character support. */
+#define COMPILE_PCRE16
+
+#include "pcre_config.c"
+
+/* End of pcre16_config.c */

Modified: code/branches/pcre16/pcre_compile.c
===================================================================
--- code/branches/pcre16/pcre_compile.c    2011-12-05 20:12:24 UTC (rev 785)
+++ code/branches/pcre16/pcre_compile.c    2011-12-06 11:33:41 UTC (rev 786)
@@ -3738,8 +3738,8 @@
       {
       const pcre_uchar *oldptr;

-#ifdef SUPPORT_UTF8
-      if (utf && c > 127)
+#ifdef SUPPORT_UTF
+      if (utf && HAS_EXTRALEN(c))
         {                           /* Braces are required because the */
         GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */
         }
@@ -4317,11 +4317,10 @@

 #ifdef SUPPORT_UTF
       if (utf && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))
-#endif
-#ifndef COMPILE_PCRE8
+#elif !(defined COMPILE_PCRE8)
       if (c > 255)
 #endif
-#if defined SUPPORT_UTF || defined COMPILE_PCRE16
+#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
         {
         xclass = TRUE;
         *class_uchardata++ = XCL_SINGLE;
@@ -4345,8 +4344,7 @@

         }
       else
-#endif  /* SUPPORT_UTF8 */
-
+#endif  /* SUPPORT_UTF || COMPILE_PCRE16 */
       /* Handle a single-byte character */
         {
         classbits[c/8] |= (1 << (c&7));
@@ -4358,6 +4356,7 @@
         class_charcount++;
         class_lastchar = c;
         }
+
       }

     /* Loop until ']' reached. This "while" is the end of the "do" far above.
@@ -5849,7 +5848,7 @@

             for (i = 0; i < cd->names_found; i++)
               {
-              int crc = memcmp(name, slot+2, namelen);
+              int crc = memcmp(name, slot+2, IN_UCHARS(namelen));
               if (crc == 0)
                 {
                 if (slot[2+namelen] == 0)
@@ -7440,7 +7439,7 @@
   int newnl = 0;
   int newbsr = 0;

-  if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF8_RIGHTPAR, 5) == 0)
+  if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 5) == 0)
     { skipatstart += 7; options |= PCRE_UTF8; continue; }
   else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)
     { skipatstart += 6; options |= PCRE_UCP; continue; }
@@ -7805,8 +7804,7 @@
             if (cd->fcc[re->first_char] != re->first_char)
               re->flags |= PCRE_FCH_CASELESS;
             }
-          else if ((options & PCRE_UCP) != 0
-              && UCD_OTHERCASE(re->first_char) != re->first_char)
+          else if (UCD_OTHERCASE(re->first_char) != re->first_char)
             re->flags |= PCRE_FCH_CASELESS;
           }
         else
@@ -7843,13 +7841,12 @@
     /* We ignore non-ASCII first chars in 8 bit mode. */
     if (utf)
       {
-      if (re->first_char < 128)
+      if (re->req_char < 128)
         {
-        if (cd->fcc[re->first_char] != re->first_char)
+        if (cd->fcc[re->req_char] != re->req_char)
           re->flags |= PCRE_RCH_CASELESS;
         }
-      else if ((options & PCRE_UCP) != 0
-          && UCD_OTHERCASE(re->first_char) != re->first_char)
+      else if (UCD_OTHERCASE(re->req_char) != re->req_char)
         re->flags |= PCRE_RCH_CASELESS;
       }
     else

Modified: code/branches/pcre16/pcre_config.c
===================================================================
--- code/branches/pcre16/pcre_config.c    2011-12-05 20:12:24 UTC (rev 785)
+++ code/branches/pcre16/pcre_config.c    2011-12-06 11:33:41 UTC (rev 786)
@@ -62,19 +62,32 @@
 Returns:           0 if data returned, negative on error
 */

+#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_config(int what, void *where)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_config(int what, void *where)
+#endif
{
switch (what)
{
case PCRE_CONFIG_UTF8:
-#ifdef SUPPORT_UTF8
+#if defined SUPPORT_UTF8 && defined COMPILE_PCRE8
*((int *)where) = 1;
#else
*((int *)where) = 0;
#endif
break;

+ case PCRE_CONFIG_UTF16:
+#if defined SUPPORT_UTF16 && defined COMPILE_PCRE16
+ *((int *)where) = 1;
+#else
+ *((int *)where) = 0;
+#endif
+ break;
+
case PCRE_CONFIG_UNICODE_PROPERTIES:
#ifdef SUPPORT_UCP
*((int *)where) = 1;

Modified: code/branches/pcre16/pcre_dfa_exec.c
===================================================================
--- code/branches/pcre16/pcre_dfa_exec.c    2011-12-05 20:12:24 UTC (rev 785)
+++ code/branches/pcre16/pcre_dfa_exec.c    2011-12-06 11:33:41 UTC (rev 786)
@@ -3202,7 +3202,7 @@
       {
       first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
-      if (first_char > 127 && utf && md->use_ucp)
+      if (utf && first_char > 127)
         first_char2 = UCD_OTHERCASE(first_char);
 #endif
       }
@@ -3226,7 +3226,7 @@
     {
     req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
-    if (req_char > 127 && utf && md->use_ucp)
+    if (utf && req_char > 127)
       req_char2 = UCD_OTHERCASE(req_char);
 #endif
     }

Modified: code/branches/pcre16/pcre_exec.c
===================================================================
--- code/branches/pcre16/pcre_exec.c    2011-12-05 20:12:24 UTC (rev 785)
+++ code/branches/pcre16/pcre_exec.c    2011-12-06 11:33:41 UTC (rev 786)
@@ -6267,7 +6267,7 @@
       {
       first_char2 = TABLE_GET(first_char, tables + fcc_offset, first_char);
 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
-      if (first_char > 127 && utf && md->use_ucp)
+      if (utf && first_char > 127)
         first_char2 = UCD_OTHERCASE(first_char);
 #endif
       }
@@ -6289,7 +6289,7 @@
     {
     req_char2 = TABLE_GET(req_char, tables + fcc_offset, req_char);
 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
-    if (req_char > 127 && utf && md->use_ucp)
+    if (utf && req_char > 127)
       req_char2 = UCD_OTHERCASE(req_char);
 #endif
     }

Modified: code/branches/pcre16/pcre_internal.h
===================================================================
--- code/branches/pcre16/pcre_internal.h    2011-12-05 20:12:24 UTC (rev 785)
+++ code/branches/pcre16/pcre_internal.h    2011-12-06 11:33:41 UTC (rev 786)
@@ -1166,7 +1166,12 @@
 #define STRING_ANYCRLF_RIGHTPAR        "ANYCRLF)"
 #define STRING_BSR_ANYCRLF_RIGHTPAR    "BSR_ANYCRLF)"
 #define STRING_BSR_UNICODE_RIGHTPAR    "BSR_UNICODE)"
-#define STRING_UTF8_RIGHTPAR           "UTF8)"
+#ifdef COMPILE_PCRE8
+#define STRING_UTF_RIGHTPAR            "UTF8)"
+#endif
+#ifdef COMPILE_PCRE16
+#define STRING_UTF_RIGHTPAR            "UTF16)"
+#endif
 #define STRING_UCP_RIGHTPAR            "UCP)"
 #define STRING_NO_START_OPT_RIGHTPAR   "NO_START_OPT)"

@@ -1421,7 +1426,12 @@
 #define STRING_ANYCRLF_RIGHTPAR        STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
 #define STRING_BSR_ANYCRLF_RIGHTPAR    STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
 #define STRING_BSR_UNICODE_RIGHTPAR    STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
-#define STRING_UTF8_RIGHTPAR           STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
+#ifdef COMPILE_PCRE8
+#define STRING_UTF_RIGHTPAR            STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
+#endif
+#ifdef COMPILE_PCRE16
+#define STRING_UTF_RIGHTPAR            STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
+#endif
 #define STRING_UCP_RIGHTPAR            STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
 #define STRING_NO_START_OPT_RIGHTPAR   STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS

Modified: code/branches/pcre16/pcre_jit_compile.c
===================================================================
--- code/branches/pcre16/pcre_jit_compile.c    2011-12-05 20:12:24 UTC (rev 785)
+++ code/branches/pcre16/pcre_jit_compile.c    2011-12-06 11:33:41 UTC (rev 786)
@@ -297,7 +297,7 @@
   jump_list *casefulcmp;
   jump_list *caselesscmp;
   BOOL jscript_compat;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
   BOOL utf;
 #ifdef SUPPORT_UCP
   BOOL use_ucp;
@@ -306,7 +306,7 @@
 #ifdef COMPILE_PCRE8
   jump_list *utfreadtype8;
 #endif
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
 #ifdef SUPPORT_UCP
   jump_list *getucd;
 #endif
@@ -500,7 +500,7 @@
   return cc + 1;

case OP_ANYBYTE:
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (common->utf) return NULL;
#endif
return cc + 1;
@@ -576,6 +576,8 @@

case OP_NOTPROP:
case OP_PROP:
+ return cc + 1 + 2;
+
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
case OP_TYPEEXACT:
@@ -1267,7 +1269,7 @@
/* Detects if the character has an othercase. */
unsigned int c;

-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 if (common->utf)
   {
   GETCHAR(c, cc);
@@ -1279,6 +1281,9 @@
     return FALSE;
 #endif
     }
+#ifndef COMPILE_PCRE8
+  return common->fcc[c] != c;
+#endif
   }
 else
 #endif
@@ -1769,6 +1774,9 @@
   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
   COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
+#ifdef COMPILE_PCRE16
+  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+#endif
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
   nl = JUMP(SLJIT_JUMP);
   }
@@ -1776,7 +1784,7 @@
 mainloop = LABEL();

 /* Increasing the STR_PTR here requires one less jump in the most common case. */
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 if (common->utf) readuchar = TRUE;
 #endif
 if (newlinecheck) readuchar = TRUE;
@@ -1843,7 +1851,7 @@
   {
   oc = TABLE_GET(first_char, common->fcc, first_char);
 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
-  if (first_char > 127 && common->utf && common->use_ucp)
+  if (first_char > 127 && common->utf)
     oc = UCD_OTHERCASE(first_char);
 #endif
   }
@@ -2077,7 +2085,7 @@
   {
   oc = TABLE_GET(req_char, common->fcc, req_char);
 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
-  if (req_char > 127 && common->utf && common->use_ucp)
+  if (req_char > 127 && common->utf)
     oc = UCD_OTHERCASE(req_char);
 #endif
   }
@@ -2265,7 +2273,7 @@
 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 if (common->utf)
   {
   COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
@@ -2289,7 +2297,7 @@
 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 if (common->utf)
   {
   COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
@@ -2323,7 +2331,7 @@
 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 if (common->utf)
   {
   COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
@@ -2415,8 +2423,7 @@
 #undef CHAR1
 #undef CHAR2

-#ifdef SUPPORT_UTF8
-#ifdef SUPPORT_UCP
+#if defined SUPPORT_UTF && defined SUPPORT_UCP

static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
{
@@ -2436,8 +2443,7 @@
return src2;
}

-#endif
-#endif
+#endif /* SUPPORT_UTF && SUPPORT_UCP */

 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
     compare_context* context, jump_list **fallbacks)
@@ -2445,7 +2451,7 @@
 DEFINE_COMPILER;
 unsigned int othercasebit = 0;
 pcre_uchar *othercasechar = NULL;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 int utflength;
 #endif

@@ -2588,7 +2594,7 @@
#endif

   cc++;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
   utflength--;
   }
 while (utflength > 0);
@@ -2646,7 +2652,7 @@
   OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
 #ifndef COMPILE_PCRE8
   jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
-#elif defined SUPPORT_UTF8
+#elif defined SUPPORT_UTF
   if (common->utf)
     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
 #endif
@@ -2660,7 +2666,7 @@

 #ifndef COMPILE_PCRE8
   JUMPHERE(jump);
-#elif defined SUPPORT_UTF8
+#elif defined SUPPORT_UTF
   if (common->utf)
     JUMPHERE(jump);
 #endif
@@ -2795,7 +2801,7 @@
   if (*cc == XCL_SINGLE)
     {
     cc ++;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
     if (common->utf)
       {
       GETCHARINC(c, cc);
@@ -2826,7 +2832,7 @@
   else if (*cc == XCL_RANGE)
     {
     cc ++;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
     if (common->utf)
       {
       GETCHARINC(c, cc);
@@ -2835,7 +2841,7 @@
 #endif
       c = *cc++;
     SET_CHAR_OFFSET(c);
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
     if (common->utf)
       {
       GETCHARINC(c, cc);
@@ -2963,7 +2969,7 @@
 unsigned int c, oc, bit;
 compare_context context;
 struct sljit_jump *jump[4];
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 struct sljit_label *label;
 #ifdef SUPPORT_UCP
 pcre_uchar propdata[5];
@@ -3063,7 +3069,7 @@
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
   return cc;

-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 #ifdef SUPPORT_UCP
   case OP_NOTPROP:
   case OP_PROP:
@@ -3279,7 +3285,7 @@
     }
   check_input_end(common, fallbacks);
   read_char(common);
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
   if (common->utf)
     {
     GETCHAR(c, cc);
@@ -3296,16 +3302,14 @@

   case OP_NOT:
   case OP_NOTI:
+  check_input_end(common, fallbacks);
+  length = 1;
 #ifdef SUPPORT_UTF
   if (common->utf)
     {
-    length = 1;
-    if (HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
-
-    check_input_end(common, fallbacks);
-    GETCHAR(c, cc);
-
-    if (c <= 127)
+#ifdef COMPILE_PCRE8
+    c = *cc;
+    if (c < 128)
       {
       OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
       if (type == OP_NOT || !char_has_othercase(common, cc))
@@ -3317,24 +3321,24 @@
         add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
         }
       /* Skip the variable-length character. */
-      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
+      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
       jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
-#ifdef COMPILE_PCRE8
-      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
-#endif
+      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
       JUMPHERE(jump[0]);
-      return cc + length;
+      return cc + 1;
       }
     else
+#endif /* COMPILE_PCRE8 */
+      {
+      GETCHARLEN(c, cc, length);
       read_char(common);
+      }
     }
   else
-#endif
+#endif /* SUPPORT_UTF */
     {
-    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-    add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
-    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
+    read_char(common);
     c = *cc;
     }

@@ -3363,10 +3367,11 @@
   read_char(common);
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
   jump[0] = NULL;
-#ifdef SUPPORT_UTF8
-  /* This check can only be skipped in pure 8 bit mode. */
+#ifdef COMPILE_PCRE8
+  /* This check only affects 8 bit mode. In other modes, we
+  always need to compare the value with 255. */
   if (common->utf)
-#endif
+#endif /* COMPILE_PCRE8 */
     {
     jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
     if (type == OP_CLASS)
@@ -3375,7 +3380,7 @@
       jump[0] = NULL;
       }
     }
-#endif
+#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
@@ -3385,7 +3390,7 @@
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
   if (jump[0] != NULL)
     JUMPHERE(jump[0]);
-#endif
+#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
   return cc + 32 / sizeof(pcre_uchar);

 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
@@ -3399,7 +3404,7 @@
   SLJIT_ASSERT(length > 0);
   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
   if (common->utf)
     {
     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
@@ -3411,7 +3416,7 @@
     return cc + LINK_SIZE;
     }
 #endif
-  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, length);
+  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
   add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
   return cc + LINK_SIZE;
   }
@@ -3548,8 +3553,7 @@
 if (withchecks && !common->jscript_compat)
   add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));

-#ifdef SUPPORT_UTF8
-#ifdef SUPPORT_UCP
+#if defined SUPPORT_UTF && defined SUPPORT_UCP
if (common->utf && *cc == OP_REFI)
{
SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
@@ -3567,8 +3571,7 @@
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
}
else
-#endif
-#endif
+#endif /* SUPPORT_UTF && SUPPORT_UCP */
{
OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
if (withchecks)
@@ -6422,7 +6425,7 @@
common->casefulcmp = NULL;
common->caselesscmp = NULL;
common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
/* PCRE_UTF16 has the same value as PCRE_UTF8. */
common->utf = (re->options & PCRE_UTF8) != 0;
#ifdef SUPPORT_UCP
@@ -6432,7 +6435,7 @@
#ifdef COMPILE_PCRE8
common->utfreadtype8 = NULL;
#endif
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
#ifdef SUPPORT_UCP
common->getucd = NULL;
#endif

Modified: code/branches/pcre16/pcre_jit_test.c
===================================================================
--- code/branches/pcre16/pcre_jit_test.c    2011-12-05 20:12:24 UTC (rev 785)
+++ code/branches/pcre16/pcre_jit_test.c    2011-12-06 11:33:41 UTC (rev 786)
@@ -51,18 +51,35 @@
 #define PCRE_BUG 0x80000000

 /*
- Hungarian utf8 characters
- \xc3\xa9 = 0xe9 = 233 (e') \xc3\x89 = 0xc9 = 201 (E')
- \xc3\xa1 = 0xe1 = 225 (a') \xc3\x81 = 0xc1 = 193 (A')
- \xe6\x92\xad = 0x64ad = 25773 (a valid kanji)
- \xc2\x85 = 0x85 (NExt Line = NEL)
- \xc2\xa1 = 0xa1 (Inverted Exclamation Mark)
- \xe2\x80\xa8 = 0x2028 (Line Separator)
- \xc8\xba = 570 \xe2\xb1\xa5 = 11365 (lowercase length != uppercase length)
- \xcc\x8d = 781 (Something with Mark property)
+ Letter characters:
+   \xe6\x92\xad = 0x64ad = 25773 (kanji)
+ Non-letter characters:
+   \xc2\xa1 = 0xa1 =  (Inverted Exclamation Mark)
+   \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
+ Newlines:
+   \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
+   \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
+ Othercase pairs:
+   \xc3\xa9 = 0xe9 = 233 (e')
+      \xc3\x89 = 0xc9 = 201 (E')
+   \xc3\xa1 = 0xe1 = 225 (a')
+      \xc3\x81 = 0xc1 = 193 (A')
+   \xc8\xba = 0x23a = 570
+      \xe2\xb1\xa5 = 0x2c65 = 11365
+   \xe1\xbd\xb8 = 0x1f78 = 8056
+      \xe1\xbf\xb8 = 0x1ff8 = 8184
+   \xf0\x90\x90\x80 = 0x10400 = 66560
+      \xf0\x90\x90\xa8 = 0x10428 = 66600
+ Mark property:
+   \xcc\x8d = 0x30d = 781
+ Special:
+   \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
+   \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
+   \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
+   \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
+   \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
 */

-static void setstack(pcre_extra *extra);
static int regression_tests(void);

 int main(void)
@@ -76,21 +93,12 @@
     return regression_tests();
 }

-static pcre_jit_stack* callback(void *arg)
-{
-    return (pcre_jit_stack *)arg;
-}
+/* --------------------------------------------------------------------------------------- */

-static void setstack(pcre_extra *extra)
-{
-    static pcre_jit_stack *stack;
-    if (stack) pcre_jit_stack_free(stack);
-    stack = pcre_jit_stack_alloc(1, 1024 * 1024);
-    pcre_assign_jit_stack(extra, callback, stack);
-}
+#if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
+#error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
+#endif

-/* --------------------------------------------------------------------------------------- */
-
 #define MUA     (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
 #define MUAP    (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
 #define CMUA    (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
@@ -139,6 +147,10 @@
     { CMA, 0, "\\Ca", "CDA" },
     { MA, 0, "\\Cx", "cda" },
     { CMA, 0, "\\Cx", "CDA" },
+    { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
+    { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
+    { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
+    { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },

     /* Assertions. */
     { MUA, 0, "\\b[^A]", "A_B#" },
@@ -151,6 +163,7 @@
     { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
     { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
     { MUA, 0, "\\b.", "\xcd\xbe" },
+    { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
     { MA, 0, "\\R^", "\n" },
     { MA, 1, "^", "\n" },
     { 0, 0, "^ab", "ab" },
@@ -267,6 +280,7 @@
     { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
     { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
     { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
+    { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },

     /* Basic character sets. */
     { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
@@ -449,6 +463,7 @@
     { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwww" },
     { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwwww" },
     { PCRE_UCP, 0, "(\\P{N})\\1{2,}", ".www." },
+    { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },

     /* Assertions. */
     { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
@@ -601,111 +616,328 @@
     { 0, 0, NULL, NULL }
 };

+pcre_jit_stack* callback(void *arg)
+{
+    return (pcre_jit_stack *)arg;
+}
+
+static void setstack(pcre_extra *extra, int realloc)
+{
+    static pcre_jit_stack *stack;
+
+    if (realloc) {
+        if (stack)
+            pcre_jit_stack_free(stack);
+        stack = pcre_jit_stack_alloc(1, 1024 * 1024);
+    }
+    /* Extra can be NULL. */
+    pcre_assign_jit_stack(extra, callback, stack);
+}
+
+#ifdef SUPPORT_PCRE16
+
+static int convert_utf8_to_utf16(const char *input, PCRE_SCHAR16 *output, int *offsetmap, int max_length)
+{
+    unsigned char *ptr = (unsigned char*)input;
+    PCRE_SCHAR16 *optr = output;
+    unsigned int c;
+
+    if (max_length == 0)
+        return 0;
+
+    while (*ptr && max_length > 1) {
+        c = 0;
+        if (offsetmap)
+            *offsetmap++ = (int)(ptr - (unsigned char*)input);
+
+        if (!(*ptr & 0x80))
+            c = *ptr++;
+        else if (!(*ptr & 0x20)) {
+            c = ((ptr[0] & 0x1f) << 6) | (ptr[1] & 0x3f);
+            ptr += 2;
+        } else if (!(*ptr & 0x10)) {
+            c = ((ptr[0] & 0x0f) << 12) | ((ptr[1] & 0x3f) << 6) | (ptr[2] & 0x3f);
+            ptr += 3;
+        } else if (!(*ptr & 0x08)) {
+            c = ((ptr[0] & 0x07) << 18) | ((ptr[1] & 0x3f) << 12) | ((ptr[2] & 0x3f) << 6) | (ptr[3] & 0x3f);
+            ptr += 4;
+        }
+
+        if (c < 65536) {
+            *optr++ = c;
+            max_length--;
+        } else if (max_length <= 2) {
+            *optr = '\0';
+            return optr - output;
+        } else {
+            c -= 0x10000;
+            *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
+            *optr++ = 0xdc00 | (c & 0x3ff);
+            max_length -= 2;
+            if (offsetmap)
+                offsetmap++;
+        }
+    }
+    if (offsetmap)
+        *offsetmap = (int)(ptr - (unsigned char*)input);
+    *optr = '\0';
+    return optr - output;
+}
+
+static int copy_char8_to_char16(const char *input, PCRE_SCHAR16 *output, int max_length)
+{
+    PCRE_SCHAR16 *optr = output;
+
+    if (max_length == 0)
+        return 0;
+
+    while (*input && max_length > 1) {
+        *optr++ = *input++;
+        max_length--;
+    }
+    *optr = '\0';
+    return optr - output;
+}
+
+#define REGTEST_MAX_LENGTH 4096
+static PCRE_SCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
+static int regtest_offsetmap[REGTEST_MAX_LENGTH];
+
+#endif /* SUPPORT_PCRE16 */
+
 static int regression_tests(void)
 {
-    pcre *re;
     struct regression_test_case *current = regression_test_cases;
     const char *error;
-    pcre_extra *extra;
-    int utf8 = 0, ucp = 0;
-    int ovector1[32];
-    int ovector2[32];
-    int return_value1, return_value2;
-    int i, err_offs;
-    int total = 0, succesful = 0;
+    int i, err_offs, is_succesful;
+    int total = 0;
+    int succesful = 0;
     int counter = 0;
-    int disabled_flags = PCRE_BUG;
+#ifdef SUPPORT_PCRE8
+    pcre *re8;
+    pcre_extra *extra8;
+    int ovector8_1[32];
+    int ovector8_2[32];
+    int return_value8_1, return_value8_2;
+    int utf8 = 0, ucp8 = 0;
+    int disabled_flags8 = PCRE_BUG;
+#endif
+#ifdef SUPPORT_PCRE16
+    pcre *re16;
+    pcre_extra *extra16;
+    int ovector16_1[32];
+    int ovector16_2[32];
+    int return_value16_1, return_value16_2;
+    int utf16 = 0, ucp16 = 0;
+    int disabled_flags16 = PCRE_BUG;
+    int length16;
+#endif

     /* This test compares the behaviour of interpreter and JIT. Although disabling
-    utf8 or ucp may make tests fail, if the pcre_exec result is the SAME, it is
+    utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
     still considered successful from pcre_jit_test point of view. */

+    printf("Running JIT regression\n");
+
+#ifdef SUPPORT_PCRE8
     pcre_config(PCRE_CONFIG_UTF8, &utf8);
-    pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
+    pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
     if (!utf8)
-        disabled_flags |= PCRE_UTF8;
-    if (!ucp)
-        disabled_flags |= PCRE_UCP;
+        disabled_flags8 |= PCRE_UTF8;
+    if (!ucp8)
+        disabled_flags8 |= PCRE_UCP;
+    printf(" in  8 bit mode with utf8  %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
+#endif
+#ifdef SUPPORT_PCRE16
+    pcre16_config(PCRE_CONFIG_UTF16, &utf16);
+    pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
+    if (!utf16)
+        disabled_flags16 |= PCRE_UTF8;
+    if (!ucp16)
+        disabled_flags16 |= PCRE_UCP;
+    printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
+#endif

-    printf("Running JIT regression tests with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
     while (current->pattern) {
         /* printf("\nPattern: %s :\n", current->pattern); */
         total++;

         error = NULL;
-        re = pcre_compile(current->pattern, current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags), &error, &err_offs, NULL);
+#ifdef SUPPORT_PCRE8
+        re8 = pcre_compile(current->pattern,
+            current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
+            &error, &err_offs, NULL);

-        if (!re) {
-            if (utf8 && ucp)
-                printf("\nCannot compile pattern: %s\n", current->pattern);
-            else {
-                /* Some patterns cannot be compiled when either of utf8
-                or ucp is disabled. We just skip them. */
-                printf(".");
-                succesful++;
+        if (re8) {
+            error = NULL;
+            extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error);
+            if (!extra8) {
+                printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
+                pcre_free(re8);
+                re8 = NULL;
             }
-            current++;
-            continue;
-        }
+            if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
+                printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
+                pcre_free_study(extra8);
+                pcre_free(re8);
+                re8 = NULL;
+            }
+        } else if (utf8 && ucp8)
+            printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
+#endif
+#ifdef SUPPORT_PCRE16
+        convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
+        re16 = pcre16_compile(regtest_buf,
+            current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
+            &error, &err_offs, NULL);
+        if (re16) {
+            error = NULL;
+            extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error);
+            if (!extra16) {
+                printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
+                pcre_free(re16);
+                re16 = NULL;
+            }
+            if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
+                printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
+                pcre_free_study(extra16);
+                pcre_free(re16);
+                re16 = NULL;
+            }
+        } else if (utf16 && ucp16)
+            printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
+#endif

-        error = NULL;
-        extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);
-        if (!extra) {
-            printf("\nCannot study pattern: %s\n", current->pattern);
-            current++;
-            continue;
-        }
+        counter++;
+        if ((counter & 0x3) != 0)
+            setstack(NULL, 1);

-        if (!(extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
-            printf("\nJIT compiler does not support: %s\n", current->pattern);
-            current++;
-            continue;
+#ifdef SUPPORT_PCRE8
+        if (re8) {
+            setstack(extra8, 0);
+            for (i = 0; i < 32; ++i)
+                ovector8_1[i] = -2;
+            return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset,
+                current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32);
+
+            for (i = 0; i < 32; ++i)
+                ovector8_2[i] = -2;
+            return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset,
+                current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32);
         }
+#endif

-        counter++;
-        if ((counter & 0x3) != 0)
-            setstack(extra);
+#ifdef SUPPORT_PCRE16
+        if (re16) {
+            setstack(extra16, 0);
+            if (current->flags & PCRE_UTF8)
+                length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
+            else
+                length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);

-        for (i = 0; i < 32; ++i)
-            ovector1[i] = -2;
-        return_value1 = pcre_exec(re, extra, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector1, 32);
+            for (i = 0; i < 32; ++i)
+                ovector16_1[i] = -2;
+            return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset,
+                current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32);

-        for (i = 0; i < 32; ++i)
-            ovector2[i] = -2;
-        return_value2 = pcre_exec(re, NULL, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector2, 32);
+            for (i = 0; i < 32; ++i)
+                ovector16_2[i] = -2;
+            return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset,
+                current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32);
+        }
+#endif

         /* If PCRE_BUG is set, just run the test, but do not compare the results.
         Segfaults can still be captured. */
+
+        is_succesful = 1;
         if (!(current->flags & PCRE_BUG)) {
-            if (return_value1 != return_value2) {
-                printf("\nReturn value differs(%d:%d): '%s' @ '%s'\n", return_value1, return_value2, current->pattern, current->input);
-                current++;
-                continue;
-            }
+#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
+            if ((current->flags & PCRE_UTF8) && utf8 && utf16) {
+                /* All results must be the same. */
+                if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
+                    printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
+                        return_value8_1, return_value8_2, return_value16_1, return_value16_2,
+                        total, current->pattern, current->input);
+                    is_succesful = 0;
+                } else if (return_value8_1 >= 0) {
+                    return_value8_1 *= 2;
+                    /* Transform back the results. */
+                    for (i = 0; i < return_value8_1; ++i) {
+                        if (ovector16_1[i] >= 0)
+                            ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
+                        if (ovector16_2[i] >= 0)
+                            ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
+                    }

-            if (return_value1 >= 0) {
-                return_value1 *= 2;
-                err_offs = 0;
-                for (i = 0; i < return_value1; ++i)
-                    if (ovector1[i] != ovector2[i]) {
-                        printf("\nOvector[%d] value differs(%d:%d): '%s' @ '%s' \n", i, ovector1[i], ovector2[i], current->pattern, current->input);
-                        err_offs = 1;
-                    }
-                if (err_offs) {
-                    current++;
-                    continue;
+                    for (i = 0; i < return_value8_1; ++i)
+                        if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
+                            printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
+                                i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
+                                total, current->pattern, current->input);
+                            is_succesful = 0;
+                        }
                 }
+            } else {
+#endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
+                /* Only the 8 bit and 16 bit results must be equal. */
+#ifdef SUPPORT_PCRE8
+                if (return_value8_1 != return_value8_2) {
+                    printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
+                        return_value8_1, return_value8_2, total, current->pattern, current->input);
+                    is_succesful = 0;
+                } else if (return_value8_1 >= 0) {
+                    return_value8_1 *= 2;
+                    for (i = 0; i < return_value8_1; ++i)
+                        if (ovector8_1[i] != ovector8_2[i]) {
+                            printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s' \n",
+                                i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
+                            is_succesful = 0;
+                        }
+                }
+#endif
+
+#ifdef SUPPORT_PCRE16
+                if (return_value16_1 != return_value16_2) {
+                    printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
+                        return_value16_1, return_value16_2, total, current->pattern, current->input);
+                    is_succesful = 0;
+                } else if (return_value16_1 >= 0) {
+                    return_value16_1 *= 2;
+                    for (i = 0; i < return_value16_1; ++i)
+                        if (ovector16_1[i] != ovector16_2[i]) {
+                            printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s' \n",
+                                i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
+                            is_succesful = 0;
+                        }
+                }
+#endif
+
+#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
             }
+#endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
         }

-        pcre_free_study(extra);
-        pcre_free(re);
+        if (is_succesful)
+            succesful++;

+#ifdef SUPPORT_PCRE8
+        if (re8) {
+            pcre_free_study(extra8);
+            pcre_free(re8);
+        }
+#endif
+#ifdef SUPPORT_PCRE16
+        if (re16) {
+            pcre16_free_study(extra16);
+            pcre_free(re16);
+        }
+#endif
+
         /* printf("[%d-%d]%s", ovector1[0], ovector1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
         printf(".");
         fflush(stdout);
         current++;
-        succesful++;
     }

     if (total == succesful) {
@@ -717,4 +949,5 @@
     }
 }

+
/* End of pcre_jit_test.c */

Diese Nachricht ist Teil des folgenden Threads:
	Der komplette Thread sortiert nach Datum

[Pcre-svn] [786] code/branches/pcre16: Updating pcre_jit_tes…