[Pcre-svn] [774] code/branches/pcre16: better digit parsing,…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [774] code/branches/pcre16: better digit parsing, first_byte, req_byte are renamed to first_char req_char respectively
Revision: 774
          http://vcs.pcre.org/viewvc?view=rev&revision=774
Author:   zherczeg
Date:     2011-12-01 06:08:45 +0000 (Thu, 01 Dec 2011)


Log Message:
-----------
better digit parsing, first_byte, req_byte are renamed to first_char req_char respectively

Modified Paths:
--------------
    code/branches/pcre16/pcre_compile.c
    code/branches/pcre16/pcre_dfa_exec.c
    code/branches/pcre16/pcre_exec.c
    code/branches/pcre16/pcre_fullinfo.c
    code/branches/pcre16/pcre_info.c
    code/branches/pcre16/pcre_internal.h
    code/branches/pcre16/pcre_jit_compile.c
    code/branches/pcre16/pcre_try_flipped.c
    code/branches/pcre16/pcretest.c


Modified: code/branches/pcre16/pcre_compile.c
===================================================================
--- code/branches/pcre16/pcre_compile.c    2011-11-30 18:10:27 UTC (rev 773)
+++ code/branches/pcre16/pcre_compile.c    2011-12-01 06:08:45 UTC (rev 774)
@@ -97,7 +97,11 @@


#define WORK_SIZE_CHECK (COMPILE_WORK_SIZE - 100)

+/* Private flags added to firstchar and reqchar. */

+#define REQ_CASELESS   0x10000000l      /* Indicates caselessness */
+#define REQ_VARY       0x20000000l      /* Reqchar followed non-literal item */
+
 /* Table for handling escaped characters in the range '0'-'z'. Positive returns
 are simple data values; negative values are for special things like \d and so
 on. Zero means further processing is needed (for things like \x), or the escape
@@ -484,12 +488,18 @@


Then we can use ctype_digit and ctype_xdigit in the code. */

+/* Using a simple comparison for decimal numbers rather than a memory read
+is much faster, and the resulting code is simpler (the compiler turns it
+into a subtraction and unsigned comparison). */
+
+#define IS_DIGIT(x) ((x) >= CHAR_0 && (x) <= CHAR_9)
+
#ifndef EBCDIC

/* This is the "normal" case, for ASCII systems, and EBCDIC systems running in
UTF-8 mode. */

-static const unsigned char digitab[] =
+static const pcre_uint8 digitab[] =
{
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */
@@ -528,7 +538,7 @@

/* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */

-static const unsigned char digitab[] =
+static const pcre_unit8 digitab[] =
   {
   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */
   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   8- 15    */
@@ -563,7 +573,7 @@
   0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /*  0 - 7  F0 */
   0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};/*  8 -255    */


-static const unsigned char ebcdic_chartab[] = { /* chartable partial dup */
+static const pcre_uint8 ebcdic_chartab[] = { /* chartable partial dup */
0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 0- 7 */
0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 16- 23 */
@@ -651,15 +661,17 @@
static BOOL
is_counted_repeat(const pcre_uchar *p)
{
-if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
-while ((digitab[*p] & ctype_digit) != 0) p++;
+if (!IS_DIGIT(*p)) return FALSE;
+p++;
+while (IS_DIGIT(*p)) p++;
if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;

if (*p++ != CHAR_COMMA) return FALSE;
if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;

-if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
-while ((digitab[*p] & ctype_digit) != 0) p++;
+if (!IS_DIGIT(*p)) return FALSE;
+p++;
+while (IS_DIGIT(*p)) p++;

return (*p == CHAR_RIGHT_CURLY_BRACKET);
}
@@ -710,11 +722,13 @@
Otherwise further processing may be required. */

 #ifndef EBCDIC  /* ASCII/UTF-8 coding */
-else if (c < CHAR_0 || c > CHAR_z) {}                     /* Not alphanumeric */
+/* Not alphanumeric */
+else if (c < CHAR_0 || c > CHAR_z) {}
 else if ((i = escapes[c - CHAR_0]) != 0) c = i;


 #else           /* EBCDIC coding */
-else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphanumeric */
+/* Not alphanumeric */
+else if (c < 'a' || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {}
 else if ((i = escapes[c - 0x48]) != 0)  c = i;
 #endif


@@ -740,8 +754,10 @@
       {
       /* In JavaScript, \u must be followed by four hexadecimal numbers.
       Otherwise it is a lowercase u letter. */
-      if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0
-           && (digitab[ptr[3]] & ctype_xdigit) != 0 && (digitab[ptr[4]] & ctype_xdigit) != 0)
+      if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
+        && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0
+        && MAX_255(ptr[3]) && (digitab[ptr[3]] & ctype_xdigit) != 0
+        && MAX_255(ptr[4]) && (digitab[ptr[4]] & ctype_xdigit) != 0)
         {
         c = 0;
         for (i = 0; i < 4; ++i)
@@ -797,7 +813,7 @@
       {
       const pcre_uchar *p;
       for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
-        if (*p != CHAR_MINUS && (digitab[*p] & ctype_digit) == 0) break;
+        if (*p != CHAR_MINUS && !IS_DIGIT(*p)) break;
       if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)
         {
         c = -ESC_k;
@@ -815,12 +831,21 @@
       }
     else negated = FALSE;


+    /* The integer range is limited by the machine's int representation. */
     c = 0;
-    while ((digitab[ptr[1]] & ctype_digit) != 0)
+    while (IS_DIGIT(ptr[1]))
+      {
+      if (((unsigned int)c) > INT_MAX / 10) /* Integer overflow */
+        {
+        c = -1;
+        break;
+        }
       c = c * 10 + *(++ptr) - CHAR_0;
-
-    if (c < 0)   /* Integer overflow */
+      }
+    if (((unsigned int)c) > INT_MAX) /* Integer overflow */
       {
+      while (IS_DIGIT(ptr[1]))
+        ptr++;
       *errorcodeptr = ERR61;
       break;
       }
@@ -868,11 +893,21 @@
     if (!isclass)
       {
       oldptr = ptr;
+      /* The integer range is limited by the machine's int representation. */
       c -= CHAR_0;
-      while ((digitab[ptr[1]] & ctype_digit) != 0)
+      while (IS_DIGIT(ptr[1]))
+        {
+        if (((unsigned int)c) > INT_MAX / 10) /* Integer overflow */
+          {
+          c = -1;
+          break;
+          }
         c = c * 10 + *(++ptr) - CHAR_0;
-      if (c < 0)    /* Integer overflow */
+        }
+      if (((unsigned int)c) > INT_MAX) /* Integer overflow */
         {
+        while (IS_DIGIT(ptr[1]))
+          ptr++;
         *errorcodeptr = ERR61;
         break;
         }
@@ -905,7 +940,7 @@
     c -= CHAR_0;
     while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)
         c = c * 8 + *(++ptr) - CHAR_0;
-    if (!utf8 && c > 255) *errorcodeptr = ERR51;
+    if (!utf8 && c > 0xff) *errorcodeptr = ERR51;
     break;


     /* \x is complicated. \x{ddd} is a character number which can be greater
@@ -917,7 +952,8 @@
       {
       /* In JavaScript, \x must be followed by two hexadecimal numbers.
       Otherwise it is a lowercase x letter. */
-      if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0)
+      if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
+        && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0)
         {
         c = 0;
         for (i = 0; i < 2; ++i)
@@ -941,7 +977,7 @@
       int count = 0;


       c = 0;
-      while ((digitab[*pt] & ctype_xdigit) != 0)
+      while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0)
         {
         register int cc = *pt++;
         if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
@@ -958,7 +994,13 @@


       if (*pt == CHAR_RIGHT_CURLY_BRACKET)
         {
-        if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34;
+#ifdef COMPILE_PCRE8
+        if (c < 0 || count > (utf8? 8:2)) *errorcodeptr = ERR34;
+#else
+#ifdef COMPILE_PCRE16
+        if (c < 0 || count > (utf8? 8:4)) *errorcodeptr = ERR34;
+#endif
+#endif
         ptr = pt;
         break;
         }
@@ -970,7 +1012,7 @@
     /* Read just a single-byte hex-defined char */


     c = 0;
-    while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)
+    while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)
       {
       int cc;                                  /* Some compilers don't like */
       cc = *(++ptr);                           /* ++ in initializers */
@@ -1169,7 +1211,7 @@
 /* Read the minimum value and do a paranoid check: a negative value indicates
 an integer overflow. */


-while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - CHAR_0;
+while (IS_DIGIT(*p)) min = min * 10 + *p++ - CHAR_0;
 if (min < 0 || min > 65535)
   {
   *errorcodeptr = ERR5;
@@ -1184,7 +1226,7 @@
   if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
     {
     max = 0;
-    while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - CHAR_0;
+    while(IS_DIGIT(*p)) max = max * 10 + *p++ - CHAR_0;
     if (max < 0 || max > 65535)
       {
       *errorcodeptr = ERR5;
@@ -3258,8 +3300,8 @@
   codeptr        points to the pointer to the current code point
   ptrptr         points to the current pattern pointer
   errorcodeptr   points to error code variable
-  firstbyteptr   set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE)
-  reqbyteptr     set to the last literal character required, else < 0
+  firstcharptr   set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE)
+  reqcharptr     set to the last literal character required, else < 0
   bcptr          points to current branch chain
   cond_depth     conditional nesting depth
   cd             contains pointers to tables etc.
@@ -3272,17 +3314,17 @@


 static BOOL
 compile_branch(int *optionsptr, pcre_uchar **codeptr,
-  const pcre_uchar **ptrptr, int *errorcodeptr, int *firstbyteptr,
-  int *reqbyteptr, branch_chain *bcptr, int cond_depth, compile_data *cd,
-  int *lengthptr)
+  const pcre_uchar **ptrptr, int *errorcodeptr, pcre_int32 *firstcharptr,
+  pcre_int32 *reqcharptr, branch_chain *bcptr, int cond_depth,
+  compile_data *cd, int *lengthptr)
 {
 int repeat_type, op_type;
 int repeat_min = 0, repeat_max = 0;      /* To please picky compilers */
 int bravalue = 0;
 int greedy_default, greedy_non_default;
-int firstbyte, reqbyte;
-int zeroreqbyte, zerofirstbyte;
-int req_caseopt, reqvary, tempreqvary;
+pcre_int32 firstchar, reqchar;
+pcre_int32 zeroreqchar, zerofirstchar;
+pcre_int32 req_caseopt, reqvary, tempreqvary;
 int options = *optionsptr;               /* May change dynamically */
 int after_manual_callout = 0;
 int length_prevgroup = 0;
@@ -3292,7 +3334,7 @@
 pcre_uchar *orig_code = code;
 pcre_uchar *tempcode;
 BOOL inescq = FALSE;
-BOOL groupsetfirstbyte = FALSE;
+BOOL groupsetfirstchar = FALSE;
 const pcre_uchar *ptr = *ptrptr;
 const pcre_uchar *tempptr;
 const pcre_uchar *nestptr = NULL;
@@ -3331,22 +3373,23 @@


/* Initialize no first byte, no required byte. REQ_UNSET means "no char
matching encountered yet". It gets changed to REQ_NONE if we hit something that
-matches a non-fixed char first char; reqbyte just remains unset if we never
+matches a non-fixed char first char; reqchar just remains unset if we never
find one.

When we hit a repeat whose minimum is zero, we may have to adjust these values
to take the zero repeat into account. This is implemented by setting them to
-zerofirstbyte and zeroreqbyte when such a repeat is encountered. The individual
+zerofirstbyte and zeroreqchar when such a repeat is encountered. The individual
item types that can be repeated set these backoff variables appropriately. */

-firstbyte = reqbyte = zerofirstbyte = zeroreqbyte = REQ_UNSET;
+firstchar = reqchar = zerofirstchar = zeroreqchar = REQ_UNSET;

-/* The variable req_caseopt contains either the REQ_CASELESS value or zero,
-according to the current setting of the caseless flag. REQ_CASELESS is a bit
-value > 255. It is added into the firstbyte or reqbyte variables to record the
-case status of the value. This is used only for ASCII characters. */
+/* The variable req_caseopt contains either the REQ_CASELESS value
+or zero, according to the current setting of the caseless flag. The
+REQ_CASELESS leaves the lower 28 bit empty. It is added into the
+firstchar or reqchar variables to record the case status of the
+value. This is used only for ASCII characters. */

-req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
+req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS:0;

/* Switch on next character until the end of the branch */

@@ -3364,8 +3407,8 @@
   int recno;
   int refsign;
   int skipbytes;
-  int subreqbyte;
-  int subfirstbyte;
+  int subreqchar;
+  int subfirstchar;
   int terminator;
   int mclength;
   int tempbracount;
@@ -3528,8 +3571,8 @@
     case 0:                        /* The branch terminates at string end */
     case CHAR_VERTICAL_LINE:       /* or | or ) */
     case CHAR_RIGHT_PARENTHESIS:
-    *firstbyteptr = firstbyte;
-    *reqbyteptr = reqbyte;
+    *firstcharptr = firstchar;
+    *reqcharptr = reqchar;
     *codeptr = code;
     *ptrptr = ptr;
     if (lengthptr != NULL)
@@ -3553,7 +3596,7 @@
     previous = NULL;
     if ((options & PCRE_MULTILINE) != 0)
       {
-      if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
+      if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
       *code++ = OP_CIRCM;
       }
     else *code++ = OP_CIRC;
@@ -3565,12 +3608,12 @@
     break;


     /* There can never be a first char if '.' is first, whatever happens about
-    repeats. The value of reqbyte doesn't change either. */
+    repeats. The value of reqchar doesn't change either. */


     case CHAR_DOT:
-    if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
-    zerofirstbyte = firstbyte;
-    zeroreqbyte = reqbyte;
+    if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
+    zerofirstchar = firstchar;
+    zeroreqchar = reqchar;
     previous = code;
     *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;
     break;
@@ -3644,8 +3687,8 @@
         (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
       {
       *code++ = negate_class? OP_ALLANY : OP_FAIL;
-      if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
-      zerofirstbyte = firstbyte;
+      if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
+      zerofirstchar = firstchar;
       break;
       }


@@ -4335,9 +4378,9 @@
     The optimization throws away the bit map. We turn the item into a
     1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative.
     Note that OP_NOT[I] does not support multibyte characters. In the positive
-    case, it can cause firstbyte to be set. Otherwise, there can be no first
+    case, it can cause firstchar to be set. Otherwise, there can be no first
     char if this item is first, whatever repeat count may follow. In the case
-    of reqbyte, save the previous value for reinstating. */
+    of reqchar, save the previous value for reinstating. */


 #ifdef SUPPORT_UTF
     if (class_charcount == 1 && !xclass &&
@@ -4348,14 +4391,14 @@
     if (class_charcount == 1 && !xclass)
 #endif
       {
-      zeroreqbyte = reqbyte;
+      zeroreqchar = reqchar;


       /* The OP_NOT[I] opcodes work on one-byte characters only. */


       if (negate_class)
         {
-        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
-        zerofirstbyte = firstbyte;
+        if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
+        zerofirstchar = firstchar;
         *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;
         *code++ = class_lastchar;
         break;
@@ -4378,12 +4421,12 @@


     /* The general case - not the one-char optimization. If this is the first
     thing in the branch, there can be no first char setting, whatever the
-    repeat count. Any reqbyte setting must remain unchanged after any kind of
+    repeat count. Any reqchar setting must remain unchanged after any kind of
     repeat. */


-    if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
-    zerofirstbyte = firstbyte;
-    zeroreqbyte = reqbyte;
+    if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
+    zerofirstchar = firstchar;
+    zeroreqchar = reqchar;


     /* If there are characters with values > 255, we have to compile an
     extended class, with its own opcode, unless there was a negated special
@@ -4476,8 +4519,8 @@


     if (repeat_min == 0)
       {
-      firstbyte = zerofirstbyte;    /* Adjust for zero repeat */
-      reqbyte = zeroreqbyte;        /* Ditto */
+      firstchar = zerofirstchar;    /* Adjust for zero repeat */
+      reqchar = zeroreqchar;        /* Ditto */
       }


     /* Remember whether this is a variable length repeat */
@@ -4542,8 +4585,8 @@


     /* If previous was a character match, abolish the item and generate a
     repeat item instead. If a char item has a minumum of more than one, ensure
-    that it is set in reqbyte - it might not be if a sequence such as x{3} is
-    the first thing in a branch because the x will have gone into firstbyte
+    that it is set in reqchar - it might not be if a sequence such as x{3} is
+    the first thing in a branch because the x will have gone into firstchar
     instead.  */


     if (*previous == OP_CHAR || *previous == OP_CHARI)
@@ -4572,7 +4615,7 @@


         {
         c = code[-1];
-        if (repeat_min > 1) reqbyte = c | req_caseopt | cd->req_varyopt;
+        if (repeat_min > 1) reqchar = c | req_caseopt | cd->req_varyopt;
         }


       /* If the repetition is unlimited, it pays to see if the next thing on
@@ -4971,7 +5014,7 @@


           else
             {
-            if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;
+            if (groupsetfirstchar && reqchar < 0) reqchar = firstchar;
             for (i = 1; i < repeat_min; i++)
               {
               pcre_uchar *hc;
@@ -5274,7 +5317,7 @@
       }


     /* In all case we no longer have a previous item. We also set the
-    "follows varying string" flag for subsequently encountered reqbytes if
+    "follows varying string" flag for subsequently encountered reqchars if
     it isn't already set and we have just passed a varying length item. */


     END_REPEAT:
@@ -5352,8 +5395,8 @@
               }
             *code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;


-            /* Do not set firstbyte after *ACCEPT */
-            if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
+            /* Do not set firstchar after *ACCEPT */
+            if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
             }


           /* Handle other cases with/without an argument */
@@ -5506,8 +5549,7 @@
         while ((cd->ctypes[*ptr] & ctype_word) != 0)
           {
           if (recno >= 0)
-            recno = ((digitab[*ptr] & ctype_digit) != 0)?
-              recno * 10 + *ptr - CHAR_0 : -1;
+            recno = (IS_DIGIT(*ptr))? recno * 10 + *ptr - CHAR_0 : -1;
           ptr++;
           }
         namelen = (int)(ptr - name);
@@ -5597,7 +5639,7 @@
           recno = 0;
           for (i = 1; i < namelen; i++)
             {
-            if ((digitab[name[i]] & ctype_digit) == 0)
+            if (!IS_DIGIT(name[i]))
               {
               *errorcodeptr = ERR15;
               goto FAILED;
@@ -5697,8 +5739,9 @@
         *code++ = OP_CALLOUT;
           {
           int n = 0;
-          while ((digitab[*(++ptr)] & ctype_digit) != 0)
-            n = n * 10 + *ptr - CHAR_0;
+          ptr++;
+          while(IS_DIGIT(*ptr))
+            n = n * 10 + *ptr++ - CHAR_0;
           if (*ptr != CHAR_RIGHT_PARENTHESIS)
             {
             *errorcodeptr = ERR39;
@@ -5981,7 +6024,7 @@
           if ((refsign = *ptr) == CHAR_PLUS)
             {
             ptr++;
-            if ((digitab[*ptr] & ctype_digit) == 0)
+            if (!IS_DIGIT(*ptr))
               {
               *errorcodeptr = ERR63;
               goto FAILED;
@@ -5989,13 +6032,13 @@
             }
           else if (refsign == CHAR_MINUS)
             {
-            if ((digitab[ptr[1]] & ctype_digit) == 0)
+            if (!IS_DIGIT(ptr[1]))
               goto OTHER_CHAR_AFTER_QUERY;
             ptr++;
             }


           recno = 0;
-          while((digitab[*ptr] & ctype_digit) != 0)
+          while(IS_DIGIT(*ptr))
             recno = recno * 10 + *ptr++ - CHAR_0;


           if (*ptr != terminator)
@@ -6093,7 +6136,7 @@


         /* Can't determine a first byte now */


-        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
+        if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
         continue;



@@ -6150,7 +6193,7 @@
         both phases.


         If we are not at the pattern start, reset the greedy defaults and the
-        case value for firstbyte and reqbyte. */
+        case value for firstchar and reqchar. */


         if (*ptr == CHAR_RIGHT_PARENTHESIS)
           {
@@ -6163,7 +6206,7 @@
             {
             greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
             greedy_non_default = greedy_default ^ 1;
-            req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
+            req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
             }


           /* Change options at this level, and pass them back for use
@@ -6226,8 +6269,8 @@
          skipbytes,                       /* Skip over bracket number */
          cond_depth +
            ((bravalue == OP_COND)?1:0),   /* Depth of condition subpatterns */
-         &subfirstbyte,                   /* For possible first char */
-         &subreqbyte,                     /* For possible last char */
+         &subfirstchar,                   /* For possible first char */
+         &subreqchar,                     /* For possible last char */
          bcptr,                           /* Current branch chain */
          cd,                              /* Tables block */
          (lengthptr == NULL)? NULL :      /* Actual compile phase */
@@ -6278,7 +6321,7 @@
         }


       /* A "normal" conditional group. If there is just one branch, we must not
-      make use of its firstbyte or reqbyte, because this is equivalent to an
+      make use of its firstchar or reqchar, because this is equivalent to an
       empty second branch. */


       else
@@ -6288,7 +6331,7 @@
           *errorcodeptr = ERR27;
           goto FAILED;
           }
-        if (condcount == 1) subfirstbyte = subreqbyte = REQ_NONE;
+        if (condcount == 1) subfirstchar = subreqchar = REQ_NONE;
         }
       }


@@ -6332,55 +6375,55 @@
     /* Handle updating of the required and first characters for other types of
     group. Update for normal brackets of all kinds, and conditions with two
     branches (see code above). If the bracket is followed by a quantifier with
-    zero repeat, we have to back off. Hence the definition of zeroreqbyte and
-    zerofirstbyte outside the main loop so that they can be accessed for the
+    zero repeat, we have to back off. Hence the definition of zeroreqchar and
+    zerofirstchar outside the main loop so that they can be accessed for the
     back off. */


-    zeroreqbyte = reqbyte;
-    zerofirstbyte = firstbyte;
-    groupsetfirstbyte = FALSE;
+    zeroreqchar = reqchar;
+    zerofirstchar = firstchar;
+    groupsetfirstchar = FALSE;


     if (bravalue >= OP_ONCE)
       {
-      /* If we have not yet set a firstbyte in this branch, take it from the
+      /* If we have not yet set a firstchar in this branch, take it from the
       subpattern, remembering that it was set here so that a repeat of more
-      than one can replicate it as reqbyte if necessary. If the subpattern has
-      no firstbyte, set "none" for the whole branch. In both cases, a zero
-      repeat forces firstbyte to "none". */
+      than one can replicate it as reqchar if necessary. If the subpattern has
+      no firstchar, set "none" for the whole branch. In both cases, a zero
+      repeat forces firstchar to "none". */


-      if (firstbyte == REQ_UNSET)
+      if (firstchar == REQ_UNSET)
         {
-        if (subfirstbyte >= 0)
+        if (subfirstchar >= 0)
           {
-          firstbyte = subfirstbyte;
-          groupsetfirstbyte = TRUE;
+          firstchar = subfirstchar;
+          groupsetfirstchar = TRUE;
           }
-        else firstbyte = REQ_NONE;
-        zerofirstbyte = REQ_NONE;
+        else firstchar = REQ_NONE;
+        zerofirstchar = REQ_NONE;
         }


-      /* If firstbyte was previously set, convert the subpattern's firstbyte
-      into reqbyte if there wasn't one, using the vary flag that was in
+      /* If firstchar was previously set, convert the subpattern's firstchar
+      into reqchar if there wasn't one, using the vary flag that was in
       existence beforehand. */


-      else if (subfirstbyte >= 0 && subreqbyte < 0)
-        subreqbyte = subfirstbyte | tempreqvary;
+      else if (subfirstchar >= 0 && subreqchar < 0)
+        subreqchar = subfirstchar | tempreqvary;


       /* If the subpattern set a required byte (or set a first byte that isn't
       really the first byte - see above), set it. */


-      if (subreqbyte >= 0) reqbyte = subreqbyte;
+      if (subreqchar >= 0) reqchar = subreqchar;
       }


-    /* For a forward assertion, we take the reqbyte, if set. This can be
+    /* For a forward assertion, we take the reqchar, if set. This can be
     helpful if the pattern that follows the assertion doesn't set a different
-    char. For example, it's useful for /(?=abcde).+/. We can't set firstbyte
+    char. For example, it's useful for /(?=abcde).+/. We can't set firstchar
     for an assertion, however because it leads to incorrect effect for patterns
-    such as /(?=a)a.+/ when the "real" "a" would then become a reqbyte instead
-    of a firstbyte. This is overcome by a scan at the end if there's no
-    firstbyte, looking for an asserted first char. */
+    such as /(?=a)a.+/ when the "real" "a" would then become a reqchar instead
+    of a firstchar. This is overcome by a scan at the end if there's no
+    firstchar, looking for an asserted first char. */


-    else if (bravalue == OP_ASSERT && subreqbyte >= 0) reqbyte = subreqbyte;
+    else if (bravalue == OP_ASSERT && subreqchar >= 0) reqchar = subreqchar;
     break;     /* End of processing '(' */



@@ -6413,13 +6456,13 @@
       /* For metasequences that actually match a character, we disable the
       setting of a first character if it hasn't already been set. */


-      if (firstbyte == REQ_UNSET && -c > ESC_b && -c < ESC_Z)
-        firstbyte = REQ_NONE;
+      if (firstchar == REQ_UNSET && -c > ESC_b && -c < ESC_Z)
+        firstchar = REQ_NONE;


       /* Set values to reset to if this is followed by a zero repeat. */


-      zerofirstbyte = firstbyte;
-      zeroreqbyte = reqbyte;
+      zerofirstchar = firstchar;
+      zeroreqchar = reqchar;


       /* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n'
       is a subroutine call by number (Oniguruma syntax). In fact, the value
@@ -6470,7 +6513,7 @@
         /* Test a signed number in angle brackets or quotes. */


         p = ptr + 2;
-        while ((digitab[*p] & ctype_digit) != 0) p++;
+        while (IS_DIGIT(*p)) p++;
         if (*p != terminator)
           {
           *errorcodeptr = ERR57;
@@ -6498,7 +6541,7 @@
         goto NAMED_REF_OR_RECURSE;
         }


-      /* Back references are handled specially; must disable firstbyte if
+      /* Back references are handled specially; must disable firstchar if
       not set to cope with cases like (?=(\w+))\1: which would otherwise set
       ':' later. */


@@ -6508,7 +6551,7 @@
         recno = -c - ESC_REF;


         HANDLE_REFERENCE:    /* Come here from named backref handling */
-        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
+        if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
         previous = code;
         *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;
         PUT2INC(code, 0, recno);
@@ -6631,34 +6674,34 @@


     /* Set the first and required bytes appropriately. If no previous first
     byte, set it from this character, but revert to none on a zero repeat.
-    Otherwise, leave the firstbyte value alone, and don't change it on a zero
+    Otherwise, leave the firstchar value alone, and don't change it on a zero
     repeat. */


-    if (firstbyte == REQ_UNSET)
+    if (firstchar == REQ_UNSET)
       {
-      zerofirstbyte = REQ_NONE;
-      zeroreqbyte = reqbyte;
+      zerofirstchar = REQ_NONE;
+      zeroreqchar = reqchar;


-      /* If the character is more than one byte long, we can set firstbyte
+      /* If the character is more than one byte long, we can set firstchar
       only if it is not to be matched caselessly. */


       if (mclength == 1 || req_caseopt == 0)
         {
-        firstbyte = mcbuffer[0] | req_caseopt;
-        if (mclength != 1) reqbyte = code[-1] | cd->req_varyopt;
+        firstchar = mcbuffer[0] | req_caseopt;
+        if (mclength != 1) reqchar = code[-1] | cd->req_varyopt;
         }
-      else firstbyte = reqbyte = REQ_NONE;
+      else firstchar = reqchar = REQ_NONE;
       }


-    /* firstbyte was previously set; we can set reqbyte only if the length is
+    /* firstchar was previously set; we can set reqchar only if the length is
     1 or the matching is caseful. */


     else
       {
-      zerofirstbyte = firstbyte;
-      zeroreqbyte = reqbyte;
+      zerofirstchar = firstchar;
+      zeroreqchar = reqchar;
       if (mclength == 1 || req_caseopt == 0)
-        reqbyte = code[-1] | req_caseopt | cd->req_varyopt;
+        reqchar = code[-1] | req_caseopt | cd->req_varyopt;
       }


     break;            /* End of literal character handling */
@@ -6698,8 +6741,8 @@
   reset_bracount TRUE to reset the count for each branch
   skipbytes      skip this many bytes at start (for brackets and OP_COND)
   cond_depth     depth of nesting for conditional subpatterns
-  firstbyteptr   place to put the first required character, or a negative number
-  reqbyteptr     place to put the last required character, or a negative number
+  firstcharptr   place to put the first required character, or a negative number
+  reqcharptr     place to put the last required character, or a negative number
   bcptr          pointer to the chain of currently open branches
   cd             points to the data block with tables pointers etc.
   lengthptr      NULL during the real compile phase
@@ -6711,8 +6754,8 @@
 static BOOL
 compile_regex(int options, pcre_uchar **codeptr, const pcre_uchar **ptrptr,
   int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,
-  int cond_depth, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,
-  compile_data *cd, int *lengthptr)
+  int cond_depth, pcre_int32 *firstcharptr, pcre_int32 *reqcharptr,
+  branch_chain *bcptr, compile_data *cd, int *lengthptr)
 {
 const pcre_uchar *ptr = *ptrptr;
 pcre_uchar *code = *codeptr;
@@ -6721,8 +6764,8 @@
 pcre_uchar *reverse_count = NULL;
 open_capitem capitem;
 int capnumber = 0;
-int firstbyte, reqbyte;
-int branchfirstbyte, branchreqbyte;
+pcre_int32 firstchar, reqchar;
+pcre_int32 branchfirstchar, branchreqchar;
 int length;
 int orig_bracount;
 int max_bracount;
@@ -6731,7 +6774,7 @@
 bc.outer = bcptr;
 bc.current_branch = code;


-firstbyte = reqbyte = REQ_UNSET;
+firstchar = reqchar = REQ_UNSET;

/* Accumulate the length for use in the pre-compile phase. Start with the
length of the BRA and KET and any extra bytes that are required at the
@@ -6790,8 +6833,8 @@
/* Now compile the branch; in the pre-compile phase its length gets added
into the length. */

-  if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstbyte,
-        &branchreqbyte, &bc, cond_depth, cd,
+  if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstchar,
+        &branchreqchar, &bc, cond_depth, cd,
         (lengthptr == NULL)? NULL : &length))
     {
     *ptrptr = ptr;
@@ -6807,43 +6850,43 @@


   if (lengthptr == NULL)
     {
-    /* If this is the first branch, the firstbyte and reqbyte values for the
+    /* If this is the first branch, the firstchar and reqchar values for the
     branch become the values for the regex. */


     if (*last_branch != OP_ALT)
       {
-      firstbyte = branchfirstbyte;
-      reqbyte = branchreqbyte;
+      firstchar = branchfirstchar;
+      reqchar = branchreqchar;
       }


-    /* If this is not the first branch, the first char and reqbyte have to
+    /* If this is not the first branch, the first char and reqchar have to
     match the values from all the previous branches, except that if the
-    previous value for reqbyte didn't have REQ_VARY set, it can still match,
+    previous value for reqchar didn't have REQ_VARY set, it can still match,
     and we set REQ_VARY for the regex. */


     else
       {
-      /* If we previously had a firstbyte, but it doesn't match the new branch,
-      we have to abandon the firstbyte for the regex, but if there was
-      previously no reqbyte, it takes on the value of the old firstbyte. */
+      /* If we previously had a firstchar, but it doesn't match the new branch,
+      we have to abandon the firstchar for the regex, but if there was
+      previously no reqchar, it takes on the value of the old firstchar. */


-      if (firstbyte >= 0 && firstbyte != branchfirstbyte)
+      if (firstchar >= 0 && firstchar != branchfirstchar)
         {
-        if (reqbyte < 0) reqbyte = firstbyte;
-        firstbyte = REQ_NONE;
+        if (reqchar < 0) reqchar = firstchar;
+        firstchar = REQ_NONE;
         }


-      /* If we (now or from before) have no firstbyte, a firstbyte from the
-      branch becomes a reqbyte if there isn't a branch reqbyte. */
+      /* If we (now or from before) have no firstchar, a firstchar from the
+      branch becomes a reqchar if there isn't a branch reqchar. */


-      if (firstbyte < 0 && branchfirstbyte >= 0 && branchreqbyte < 0)
-          branchreqbyte = branchfirstbyte;
+      if (firstchar < 0 && branchfirstchar >= 0 && branchreqchar < 0)
+          branchreqchar = branchfirstchar;


-      /* Now ensure that the reqbytes match */
+      /* Now ensure that the reqchars match */


-      if ((reqbyte & ~REQ_VARY) != (branchreqbyte & ~REQ_VARY))
-        reqbyte = REQ_NONE;
-      else reqbyte |= branchreqbyte;   /* To "or" REQ_VARY */
+      if ((reqchar & ~REQ_VARY) != (branchreqchar & ~REQ_VARY))
+        reqchar = REQ_NONE;
+      else reqchar |= branchreqchar;   /* To "or" REQ_VARY */
       }


     /* If lookbehind, check that this branch matches a fixed-length string, and
@@ -6933,8 +6976,8 @@


     *codeptr = code;
     *ptrptr = ptr;
-    *firstbyteptr = firstbyte;
-    *reqbyteptr = reqbyte;
+    *firstcharptr = firstchar;
+    *reqcharptr = reqchar;
     if (lengthptr != NULL)
       {
       if (OFLOW_MAX - *lengthptr < length)
@@ -7313,7 +7356,8 @@
 {
 real_pcre *re;
 int length = 1;  /* For final END opcode */
-int firstbyte, reqbyte, newline;
+pcre_int32 firstchar, reqchar;
+int newline;
 int errorcode = 0;
 int skipatstart = 0;
 BOOL utf8;
@@ -7541,7 +7585,7 @@
 code = cworkspace;
 *code = OP_BRA;
 (void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE,
-  FALSE, 0, 0, &firstbyte, &reqbyte, NULL, cd, &length);
+  FALSE, 0, 0, &firstchar, &reqchar, NULL, cd, &length);
 if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;


DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
@@ -7578,8 +7622,8 @@
re->options = cd->external_options;
re->flags = cd->external_flags;
re->dummy1 = 0;
-re->first_byte = 0;
-re->req_byte = 0;
+re->first_char = 0;
+re->req_char = 0;
re->name_table_offset = sizeof(real_pcre) / sizeof(pcre_uchar);
re->name_entry_size = cd->name_entry_size;
re->name_count = cd->names_found;
@@ -7615,12 +7659,12 @@
code = (pcre_uchar *)codestart;
*code = OP_BRA;
(void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0,
- &firstbyte, &reqbyte, NULL, cd, NULL);
+ &firstchar, &reqchar, NULL, cd, NULL);
re->top_bracket = cd->bracount;
re->top_backref = cd->top_backref;
re->flags = cd->external_flags;

-if (cd->had_accept) reqbyte = REQ_NONE; /* Must disable after (*ACCEPT) */
+if (cd->had_accept) reqchar = REQ_NONE; /* Must disable after (*ACCEPT) */

/* If not reached end of pattern on success, there's an excess bracket. */

@@ -7726,13 +7770,21 @@
     re->options |= PCRE_ANCHORED;
   else
     {
-    if (firstbyte < 0)
-      firstbyte = find_firstassertedchar(codestart, FALSE);
-    if (firstbyte >= 0)   /* Remove caseless flag for non-caseable chars */
+    if (firstchar < 0)
+      firstchar = find_firstassertedchar(codestart, FALSE);
+    if (firstchar >= 0)   /* Remove caseless flag for non-caseable chars */
       {
-      int ch = firstbyte & 255;
-      re->first_byte = ((firstbyte & REQ_CASELESS) != 0 &&
-         cd->fcc[ch] == ch)? ch : firstbyte;
+#ifdef COMPILE_PCRE8
+      re->first_char = firstchar & 0xff;
+#else
+#ifdef COMPILE_PCRE16
+      re->first_char = firstchar & 0xffff;
+#endif
+#endif
+      if ((firstchar & REQ_CASELESS) != 0 && MAX_255(re->first_char)
+        && cd->fcc[re->first_char] != re->first_char)
+        re->flags |= PCRE_FCH_CASELESS;
+
       re->flags |= PCRE_FIRSTSET;
       }
     else if (is_startline(codestart, 0, cd->backref_map))
@@ -7744,12 +7796,20 @@
 variable length item in the regex. Remove the caseless flag for non-caseable
 bytes. */


-if (reqbyte >= 0 &&
-     ((re->options & PCRE_ANCHORED) == 0 || (reqbyte & REQ_VARY) != 0))
+if (reqchar >= 0 &&
+     ((re->options & PCRE_ANCHORED) == 0 || (reqchar & REQ_VARY) != 0))
   {
-  int ch = reqbyte & 255;
-  re->req_byte = ((reqbyte & REQ_CASELESS) != 0 &&
-    cd->fcc[ch] == ch)? (reqbyte & ~REQ_CASELESS) : reqbyte;
+#ifdef COMPILE_PCRE8
+  re->req_char = reqchar & 0xff;
+#else
+#ifdef COMPILE_PCRE16
+  re->req_char = reqchar & 0xffff;
+#endif
+#endif
+  if ((reqchar & REQ_CASELESS) != 0 && MAX_255(re->req_char)
+    && cd->fcc[re->req_char] != re->req_char)
+    re->flags |= PCRE_RCH_CASELESS;
+
   re->flags |= PCRE_REQCHSET;
   }


@@ -7764,19 +7824,19 @@

 if ((re->flags & PCRE_FIRSTSET) != 0)
   {
-  int ch = re->first_byte & 255;
-  const char *caseless = ((re->first_byte & REQ_CASELESS) == 0)?
-    "" : " (caseless)";
-  if (isprint(ch)) printf("First char = %c%s\n", ch, caseless);
+  pcre_uchar ch = re->first_char;
+  const char *caseless =
+    ((re->flags & PCRE_FCH_CASELESS) == 0)? "" : " (caseless)";
+  if (PRINTABLE(ch)) printf("First char = %c%s\n", ch, caseless);
     else printf("First char = \\x%02x%s\n", ch, caseless);
   }


 if ((re->flags & PCRE_REQCHSET) != 0)
   {
-  int ch = re->req_byte & 255;
-  const char *caseless = ((re->req_byte & REQ_CASELESS) == 0)?
-    "" : " (caseless)";
-  if (isprint(ch)) printf("Req char = %c%s\n", ch, caseless);
+  pcre_uchar ch = re->req_char;
+  const char *caseless =
+    ((re->flags & PCRE_RCH_CASELESS) == 0)? "" : " (caseless)";
+  if (PRINTABLE(ch)) printf("Req char = %c%s\n", ch, caseless);
     else printf("Req char = \\x%02x%s\n", ch, caseless);
   }



Modified: code/branches/pcre16/pcre_dfa_exec.c
===================================================================
--- code/branches/pcre16/pcre_dfa_exec.c    2011-11-30 18:10:27 UTC (rev 773)
+++ code/branches/pcre16/pcre_dfa_exec.c    2011-12-01 06:08:45 UTC (rev 774)
@@ -3013,13 +3013,14 @@
 const pcre_study_data *study = NULL;
 real_pcre internal_re;


-const pcre_uint8 *req_byte_ptr;
+const pcre_uchar *req_char_ptr;
const pcre_uint8 *start_bits = NULL;
-BOOL first_byte_caseless = FALSE;
-BOOL req_byte_caseless = FALSE;
-int first_byte = -1;
-int req_byte = -1;
-int req_byte2 = -1;
+BOOL has_first_char = FALSE;
+BOOL has_req_char = FALSE;
+pcre_uchar first_char = 0;
+pcre_uchar first_char2 = 0;
+pcre_uchar req_char = 0;
+pcre_uchar req_char2 = 0;
int newline;

/* Plausibility checks */
@@ -3069,7 +3070,7 @@

current_subject = (const unsigned char *)subject + start_offset;
end_subject = (const unsigned char *)subject + length;
-req_byte_ptr = current_subject - 1;
+req_char_ptr = current_subject - 1;

 #ifdef SUPPORT_UTF8
 utf8 = (re->options & PCRE_UTF8) != 0;
@@ -3189,9 +3190,10 @@
   {
   if ((re->flags & PCRE_FIRSTSET) != 0)
     {
-    first_byte = re->first_byte & 255;
-    if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
-      first_byte = lcc[first_byte];
+    has_first_char = TRUE;
+    first_char = first_char2 = re->first_char;
+    if ((re->flags & PCRE_FCH_CASELESS) != 0)
+      first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
     }
   else
     {
@@ -3206,9 +3208,10 @@


 if ((re->flags & PCRE_REQCHSET) != 0)
   {
-  req_byte = re->req_byte & 255;
-  req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
-  req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */
+  has_req_char = TRUE;
+  req_char = req_char2 = re->req_char;
+  if ((re->flags & PCRE_RCH_CASELESS) != 0)
+    req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
   }


/* Call the main matching function, looping for a non-anchored regex after a
@@ -3254,17 +3257,17 @@

     if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
       {
-      /* Advance to a known first byte. */
+      /* Advance to a known first char. */


-      if (first_byte >= 0)
+      if (has_first_char)
         {
-        if (first_byte_caseless)
+        if (first_char != first_char2)
           while (current_subject < end_subject &&
-                 lcc[*current_subject] != first_byte)
+              *current_subject != first_char && *current_subject != first_char2)
             current_subject++;
         else
           while (current_subject < end_subject &&
-                 *current_subject != first_byte)
+                 *current_subject != first_char)
             current_subject++;
         }


@@ -3344,8 +3347,8 @@
           (pcre_uint32)(end_subject - current_subject) < study->minlength)
         return PCRE_ERROR_NOMATCH;


-      /* If req_byte is set, we know that that character must appear in the
-      subject for the match to succeed. If the first character is set, req_byte
+      /* If req_char is set, we know that that character must appear in the
+      subject for the match to succeed. If the first character is set, req_char
       must be later in the subject; otherwise the test starts at the match
       point. This optimization can save a huge amount of work in patterns with
       nested unlimited repeats that aren't going to match. Writing separate
@@ -3357,28 +3360,28 @@
       patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
       string... so we don't do this when the string is sufficiently long. */


-      if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)
+      if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
         {
-        register const pcre_uchar *p = current_subject + ((first_byte >= 0)? 1 : 0);
+        register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);


         /* We don't need to repeat the search if we haven't yet reached the
         place we found it at last time. */


-        if (p > req_byte_ptr)
+        if (p > req_char_ptr)
           {
-          if (req_byte_caseless)
+          if (req_char != req_char2)
             {
             while (p < end_subject)
               {
               register int pp = *p++;
-              if (pp == req_byte || pp == req_byte2) { p--; break; }
+              if (pp == req_char || pp == req_char2) { p--; break; }
               }
             }
           else
             {
             while (p < end_subject)
               {
-              if (*p++ == req_byte) { p--; break; }
+              if (*p++ == req_char) { p--; break; }
               }
             }


@@ -3391,7 +3394,7 @@
           found it, so that we don't search again next time round the loop if
           the start hasn't passed this character yet. */


-          req_byte_ptr = p;
+          req_char_ptr = p;
           }
         }
       }


Modified: code/branches/pcre16/pcre_exec.c
===================================================================
--- code/branches/pcre16/pcre_exec.c    2011-11-30 18:10:27 UTC (rev 773)
+++ code/branches/pcre16/pcre_exec.c    2011-12-01 06:08:45 UTC (rev 774)
@@ -5964,17 +5964,18 @@
 #endif
 {
 int rc, ocount, arg_offset_max;
-int first_byte = -1;
-int req_byte = -1;
-int req_byte2 = -1;
 int newline;
 BOOL using_temporary_offsets = FALSE;
 BOOL anchored;
 BOOL startline;
 BOOL firstline;
-BOOL first_byte_caseless = FALSE;
-BOOL req_byte_caseless = FALSE;
 BOOL utf8;
+BOOL has_first_char = FALSE;
+BOOL has_req_char = FALSE;
+pcre_uchar first_char = 0;
+pcre_uchar first_char2 = 0;
+pcre_uchar req_char = 0;
+pcre_uchar req_char2 = 0;
 match_data match_block;
 match_data *md = &match_block;
 const pcre_uint8 *tables;
@@ -5982,7 +5983,7 @@
 PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
 PCRE_PUCHAR end_subject;
 PCRE_PUCHAR start_partial = NULL;
-PCRE_PUCHAR req_byte_ptr = start_match - 1;
+PCRE_PUCHAR req_char_ptr = start_match - 1;


pcre_study_data internal_study;
const pcre_study_data *study;
@@ -6252,7 +6253,7 @@
md->offset_vector[0] = md->offset_vector[1] = -1;
}

-/* Set up the first character to match, if available. The first_byte value is
+/* Set up the first character to match, if available. The first_char value is
 never set for an anchored regular expression, but the anchoring may be forced
 at run time, so we have to test for anchoring. The first char may be unset for
 an unanchored pattern, of course. If there's no first char and the pattern was
@@ -6262,9 +6263,10 @@
   {
   if ((re->flags & PCRE_FIRSTSET) != 0)
     {
-    first_byte = re->first_byte & 255;
-    if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
-      first_byte = md->lcc[first_byte];
+    has_first_char = TRUE;
+    first_char = first_char2 = re->first_char;
+    if ((re->flags & PCRE_FCH_CASELESS) != 0)
+      first_char2 = TABLE_GET(first_char, tables + fcc_offset, first_char);
     }
   else
     if (!startline && study != NULL &&
@@ -6277,14 +6279,13 @@


 if ((re->flags & PCRE_REQCHSET) != 0)
   {
-  req_byte = re->req_byte & 255;
-  req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
-  req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
+  has_req_char = TRUE;
+  req_char = req_char2 = re->req_char;
+  if ((re->flags & PCRE_RCH_CASELESS) != 0)
+    req_char2 = TABLE_GET(req_char, tables + fcc_offset, req_char);
   }



-
-
/* ==========================================================================*/

/* Loop for handling unanchored repeated matching attempts; for anchored regexs
@@ -6327,15 +6328,16 @@

   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
     {
-    /* Advance to a unique first byte if there is one. */
+    /* Advance to a unique first char if there is one. */


-    if (first_byte >= 0)
+    if (has_first_char)
       {
-      if (first_byte_caseless)
-        while (start_match < end_subject && md->lcc[*start_match] != first_byte)
+      if (first_char != first_char2)
+        while (start_match < end_subject &&
+            *start_match != first_char && *start_match != first_char2)
           start_match++;
       else
-        while (start_match < end_subject && *start_match != first_byte)
+        while (start_match < end_subject && *start_match != first_char)
           start_match++;
       }


@@ -6418,8 +6420,8 @@
       break;
       }


-    /* If req_byte is set, we know that that character must appear in the
-    subject for the match to succeed. If the first character is set, req_byte
+    /* If req_char is set, we know that that character must appear in the
+    subject for the match to succeed. If the first character is set, req_char
     must be later in the subject; otherwise the test starts at the match point.
     This optimization can save a huge amount of backtracking in patterns with
     nested unlimited repeats that aren't going to match. Writing separate code
@@ -6432,28 +6434,28 @@
     32-megabyte string... so we don't do this when the string is sufficiently
     long. */


-    if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
+    if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
       {
-      register PCRE_PUCHAR p = start_match + ((first_byte >= 0)? 1 : 0);
+      register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);


       /* We don't need to repeat the search if we haven't yet reached the
       place we found it at last time. */


-      if (p > req_byte_ptr)
+      if (p > req_char_ptr)
         {
-        if (req_byte_caseless)
+        if (req_char != req_char2)
           {
           while (p < end_subject)
             {
             register int pp = *p++;
-            if (pp == req_byte || pp == req_byte2) { p--; break; }
+            if (pp == req_char || pp == req_char2) { p--; break; }
             }
           }
         else
           {
           while (p < end_subject)
             {
-            if (*p++ == req_byte) { p--; break; }
+            if (*p++ == req_char) { p--; break; }
             }
           }


@@ -6470,7 +6472,7 @@
         found it, so that we don't search again next time round the loop if
         the start hasn't passed this character yet. */


-        req_byte_ptr = p;
+        req_char_ptr = p;
         }
       }
     }


Modified: code/branches/pcre16/pcre_fullinfo.c
===================================================================
--- code/branches/pcre16/pcre_fullinfo.c    2011-11-30 18:10:27 UTC (rev 773)
+++ code/branches/pcre16/pcre_fullinfo.c    2011-12-01 06:08:45 UTC (rev 774)
@@ -110,7 +110,7 @@


   case PCRE_INFO_FIRSTBYTE:
   *((int *)where) =
-    ((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte :
+    ((re->flags & PCRE_FIRSTSET) != 0)? re->first_char :
     ((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
   break;


@@ -137,7 +137,7 @@

   case PCRE_INFO_LASTLITERAL:
   *((int *)where) =
-    ((re->flags & PCRE_REQCHSET) != 0)? re->req_byte : -1;
+    ((re->flags & PCRE_REQCHSET) != 0)? re->req_char : -1;
   break;


case PCRE_INFO_NAMEENTRYSIZE:

Modified: code/branches/pcre16/pcre_info.c
===================================================================
--- code/branches/pcre16/pcre_info.c    2011-11-30 18:10:27 UTC (rev 773)
+++ code/branches/pcre16/pcre_info.c    2011-12-01 06:08:45 UTC (rev 774)
@@ -85,7 +85,7 @@
   }
 if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_COMPILE_OPTIONS);
 if (first_byte != NULL)
-  *first_byte = ((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte :
+  *first_byte = ((re->flags & PCRE_FIRSTSET) != 0)? re->first_char :
      ((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
 return re->top_bracket;
 }


Modified: code/branches/pcre16/pcre_internal.h
===================================================================
--- code/branches/pcre16/pcre_internal.h    2011-11-30 18:10:27 UTC (rev 773)
+++ code/branches/pcre16/pcre_internal.h    2011-12-01 06:08:45 UTC (rev 774)
@@ -230,15 +230,26 @@
 /* All character handling must be done as unsigned characters. Otherwise there
 are problems with top-bit-set characters and functions such as isspace().
 However, we leave the interface to the outside world as char * or short *,
-because that should make things easier for callers. We define a short type
-for the current character representation (either 8 or 16 bit) to save lots
-of typing. I tried "uchar", but it causes problems on Digital Unix, where
-it is defined in sys/types, so use "uschar" instead. */
+because that should make things easier for callers. This character type is
+called pcre_uchar.


+The IN_UCHARS macro multiply its argument with the byte size of the current
+pcre_uchar type. Useful for memcpy and such operations, whose require the
+byte size of their input/output buffers.
+
+The MAX_255 macro checks whether its pcre_uchar input is less than 256.
+
+The TABLE_GET macro is designed for accessing elements of tables whose contain
+exactly 256 items. When the character is able to contain more than 256
+items, some check is needed before accessing these tables.
+*/
+
#ifdef COMPILE_PCRE8

typedef unsigned char pcre_uchar;
#define IN_UCHARS(x) (x)
+#define MAX_255(c) 1
+#define TABLE_GET(c, table, default) ((table)[c])

#else

@@ -248,8 +259,11 @@
pcre.h(.in) and disable (comment out) this message. */
#error Warning: PCRE_SCHAR16 is not a 16 bit data type.
#endif
+
typedef pcre_uint16 pcre_uchar;
#define IN_UCHARS(x) ((x) << 1)
+#define MAX_255(c) ((c) <= 255u)
+#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))

#else
#error Unsupported compiling mode
@@ -693,12 +707,14 @@
compatibility. */

 #define PCRE_NOPARTIAL     0x0001  /* can't use partial with this regex */
-#define PCRE_FIRSTSET      0x0002  /* first_byte is set */
+#define PCRE_FIRSTSET      0x0002  /* first_char is set */
 #define PCRE_REQCHSET      0x0004  /* req_byte is set */
 #define PCRE_STARTLINE     0x0008  /* start after \n for multiline */
 #define PCRE_JCHANGED      0x0010  /* j option used in regex */
 #define PCRE_HASCRORLF     0x0020  /* explicit \r or \n in pattern */
 #define PCRE_HASTHEN       0x0040  /* pattern contains (*THEN) */
+#define PCRE_FCH_CASELESS  0x0080  /* caseless first char */
+#define PCRE_RCH_CASELESS  0x0100  /* caseless requested char */


/* Flags for the "extra" block produced by pcre_study(). */

@@ -747,12 +763,6 @@

#define REQ_BYTE_MAX 1000

-/* Flags added to firstbyte or reqbyte; a "non-literal" item is either a
-variable-length repeat, or a anything other than literal characters. */
-
-#define REQ_CASELESS 0x0100    /* indicates caselessness */
-#define REQ_VARY     0x0200    /* reqbyte followed non-literal item */
-
 /* Miscellaneous definitions. The #ifndef is to pacify compiler warnings in
 environments where these macros are defined elsewhere. Unfortunately, there
 is no way to do the same for the typedef. */
@@ -1801,8 +1811,8 @@
   pcre_uint16 dummy1;             /* For future use */
   pcre_uint16 top_bracket;
   pcre_uint16 top_backref;
-  pcre_uint16 first_byte;
-  pcre_uint16 req_byte;
+  pcre_uint16 first_char;         /* Starting character */
+  pcre_uint16 req_char;           /* This character must be seen */
   pcre_uint16 name_table_offset;  /* Offset to name table that follows */
   pcre_uint16 name_entry_size;    /* Size of any name items */
   pcre_uint16 name_count;         /* Number of name items */


Modified: code/branches/pcre16/pcre_jit_compile.c
===================================================================
--- code/branches/pcre16/pcre_jit_compile.c    2011-11-30 18:10:27 UTC (rev 773)
+++ code/branches/pcre16/pcre_jit_compile.c    2011-12-01 06:08:45 UTC (rev 774)
@@ -375,7 +375,7 @@
 /* Max limit of recursions. */
 #define CALL_LIMIT       (5 * sizeof(sljit_w))
 /* Last known position of the requested byte. */
-#define REQ_BYTE_PTR     (6 * sizeof(sljit_w))
+#define REQ_CHAR_PTR     (6 * sizeof(sljit_w))
 /* End pointer of the first line. */
 #define FIRSTLINE_END    (7 * sizeof(sljit_w))
 /* The output vector is stored on the stack, and contains pointers
@@ -1279,7 +1279,7 @@
 else
 #endif
   c = *cc;
-return common->fcc[c] != c;
+return MAX_255(c) ? common->fcc[c] != c : FALSE;
 }


static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
@@ -1295,7 +1295,7 @@
#endif
}
#endif
-return common->fcc[c];
+return TABLE_GET(c, common->fcc, c);
}

static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
@@ -1728,13 +1728,13 @@
return mainloop;
}

-static SLJIT_INLINE void fast_forward_first_byte(compiler_common *common, pcre_uint16 firstbyte, BOOL firstline)
+static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar firstchar, BOOL caseless, BOOL firstline)
{
DEFINE_COMPILER;
struct sljit_label *start;
struct sljit_jump *leave;
struct sljit_jump *found;
-pcre_uint16 oc, bit;
+pcre_uchar oc, bit;

if (firstline)
{
@@ -1744,23 +1744,24 @@

start = LABEL();
leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
-OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);

-if ((firstbyte & REQ_CASELESS) == 0)
-  found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, firstbyte & 0xff);
+oc = firstchar;
+if (caseless)
+  oc = TABLE_GET(firstchar, common->fcc, firstchar);
+if (firstchar == oc)
+  found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, firstchar);
 else
   {
-  firstbyte &= 0xff;
-  oc = common->fcc[firstbyte];
-  bit = firstbyte ^ oc;
+  bit = firstchar ^ oc;
   if (ispowerof2(bit))
     {
     OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
-    found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, firstbyte | bit);
+    found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, firstchar | bit);
     }
   else
     {
-    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, firstbyte);
+    OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, firstchar);
     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
@@ -1915,7 +1916,7 @@
   OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
 }


-static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uint16 reqbyte, BOOL has_firstbyte)
+static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar reqchar, BOOL caseless, BOOL has_firstchar)
{
DEFINE_COMPILER;
struct sljit_label *loop;
@@ -1924,14 +1925,14 @@
struct sljit_jump *found;
struct sljit_jump *foundoc = NULL;
struct sljit_jump *notfound;
-pcre_uint16 oc, bit;
+pcre_uchar oc, bit;

-OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR);
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR);
OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);

-if (has_firstbyte)
+if (has_firstchar)
OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, 1);
else
OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
@@ -1940,21 +1941,22 @@
notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);

 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), 0);
-if ((reqbyte & REQ_CASELESS) == 0)
-  found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte & 0xff);
+oc = reqchar;
+if (caseless)
+  oc = TABLE_GET(reqchar, common->fcc, reqchar);
+if (reqchar == oc)
+  found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqchar);
 else
   {
-  reqbyte &= 0xff;
-  oc = common->fcc[reqbyte];
-  bit = reqbyte ^ oc;
+  bit = reqchar ^ oc;
   if (ispowerof2(bit))
     {
     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
-    found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte | bit);
+    found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqchar | bit);
     }
   else
     {
-    found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte);
+    found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqchar);
     foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
     }
   }
@@ -1964,7 +1966,7 @@
 JUMPHERE(found);
 if (foundoc)
   JUMPHERE(foundoc);
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR, TMP1, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR, TMP1, 0);
 JUMPHERE(alreadyfound);
 JUMPHERE(toolong);
 return notfound;
@@ -3092,16 +3094,16 @@


   case OP_CHAR:
   case OP_CHARI:
-  length = IN_UCHARS(1);
+  length = 1;
 #ifdef SUPPORT_UTF8
   if (common->utf8 && *cc >= 0xc0) length += PRIV(utf8_table4)[*cc & 0x3f];
 #endif
   if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
     {
-    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, length);
+    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
     add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));


-    context.length = length;
+    context.length = IN_UCHARS(length);
     context.sourcereg = -1;
 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
     context.ucharptr = 0;
@@ -3307,7 +3309,6 @@
   context.ucharptr = 0;
 #endif
   do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, fallbacks); while (context.length > 0);
-sljit_emit_op0(compiler, SLJIT_NOP);
   return cc;
   }


@@ -6291,7 +6292,7 @@
/* Register init. */
reset_ovector(common, (re->top_bracket + 1) * 2);
if ((re->flags & PCRE_REQCHSET) != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR, SLJIT_TEMPORARY_REG1, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR, SLJIT_TEMPORARY_REG1, 0);

 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_GENERAL_REG1, 0);
 OP1(SLJIT_MOV, TMP1, 0, SLJIT_GENERAL_REG1, 0);
@@ -6309,14 +6310,14 @@
   mainloop = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
   /* Forward search if possible. */
   if ((re->flags & PCRE_FIRSTSET) != 0)
-    fast_forward_first_byte(common, re->first_byte, (re->options & PCRE_FIRSTLINE) != 0);
+    fast_forward_first_char(common, re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);
   else if ((re->flags & PCRE_STARTLINE) != 0)
     fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
   else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
     fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
   }
 if ((re->flags & PCRE_REQCHSET) != 0)
-  reqbyte_notfound = search_requested_char(common, re->req_byte, (re->flags & PCRE_FIRSTSET) != 0);
+  reqbyte_notfound = search_requested_char(common, re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);


/* Store the current STR_PTR in OVECTOR(0). */
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);

Modified: code/branches/pcre16/pcre_try_flipped.c
===================================================================
--- code/branches/pcre16/pcre_try_flipped.c    2011-11-30 18:10:27 UTC (rev 773)
+++ code/branches/pcre16/pcre_try_flipped.c    2011-12-01 06:08:45 UTC (rev 774)
@@ -113,10 +113,10 @@
   (pcre_uint16)byteflip(re->top_bracket, sizeof(re->top_bracket));
 internal_re->top_backref =
   (pcre_uint16)byteflip(re->top_backref, sizeof(re->top_backref));
-internal_re->first_byte =
-  (pcre_uint16)byteflip(re->first_byte, sizeof(re->first_byte));
-internal_re->req_byte =
-  (pcre_uint16)byteflip(re->req_byte, sizeof(re->req_byte));
+internal_re->first_char =
+  (pcre_uint16)byteflip(re->first_char, sizeof(re->first_char));
+internal_re->req_char =
+  (pcre_uint16)byteflip(re->req_char, sizeof(re->req_char));
 internal_re->name_table_offset =
   (pcre_uint16)byteflip(re->name_table_offset, sizeof(re->name_table_offset));
 internal_re->name_entry_size =


Modified: code/branches/pcre16/pcretest.c
===================================================================
--- code/branches/pcre16/pcretest.c    2011-11-30 18:10:27 UTC (rev 773)
+++ code/branches/pcre16/pcretest.c    2011-12-01 06:08:45 UTC (rev 774)
@@ -1920,10 +1920,10 @@
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
       rre->top_backref =
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
-      rre->first_byte =
-        (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
-      rre->req_byte =
-        (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
+      rre->first_char =
+        (pcre_uint16)byteflip(rre->first_char, sizeof(rre->first_char));
+      rre->req_char =
+        (pcre_uint16)byteflip(rre->req_char, sizeof(rre->req_char));
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
         sizeof(rre->name_table_offset));
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
@@ -2079,13 +2079,14 @@
         }
       else
         {
-        int ch = first_char & 255;
-        const char *caseless = ((first_char & REQ_CASELESS) == 0)?
+        const char *caseless =
+          ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
           "" : " (caseless)";
-        if (PRINTHEX(ch))
-          fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
+
+        if (PRINTHEX(first_char))
+          fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
         else
-          fprintf(outfile, "First char = %d%s\n", ch, caseless);
+          fprintf(outfile, "First char = %d%s\n", first_char, caseless);
         }


       if (need_char < 0)
@@ -2094,13 +2095,14 @@
         }
       else
         {
-        int ch = need_char & 255;
-        const char *caseless = ((need_char & REQ_CASELESS) == 0)?
+        const char *caseless =
+          ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
           "" : " (caseless)";
-        if (PRINTHEX(ch))
-          fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
+
+        if (PRINTHEX(need_char))
+          fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
         else
-          fprintf(outfile, "Need char = %d%s\n", ch, caseless);
+          fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
         }


       /* Don't output study size; at present it is in any case a fixed