[Pcre-svn] [1078] code/trunk: pcre32: compile: Separate firs…

Kezdőlap
Üzenet törlése
Szerző: Subversion repository
Dátum:  
Címzett: pcre-svn
Tárgy: [Pcre-svn] [1078] code/trunk: pcre32: compile: Separate first/ req char flags from the character itself
Revision: 1078
          http://vcs.pcre.org/viewvc?view=rev&revision=1078
Author:   chpe
Date:     2012-10-16 16:55:00 +0100 (Tue, 16 Oct 2012)


Log Message:
-----------
pcre32: compile: Separate first/req char flags from the character itself

This is necessary so that 32-bit characters in non-UTF-32 mode can
be from the whole 32-bit range.

Modified Paths:
--------------
    code/trunk/pcre_compile.c
    code/trunk/pcre_internal.h


Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c    2012-10-16 15:54:57 UTC (rev 1077)
+++ code/trunk/pcre_compile.c    2012-10-16 15:55:00 UTC (rev 1078)
@@ -84,8 +84,9 @@
     const pcre_uint32 *, unsigned int);


 static BOOL
-  compile_regex(int, pcre_uchar **, const pcre_uchar **, int *, BOOL, BOOL, 
-    int, int, int *, int *, branch_chain *, compile_data *, int *);
+  compile_regex(int, pcre_uchar **, const pcre_uchar **, int *, BOOL, BOOL, int, int,
+    pcre_uint32 *, pcre_int32 *, pcre_uint32 *, pcre_int32 *, branch_chain *, 
+    compile_data *, int *);




@@ -121,9 +122,11 @@

/* Private flags added to firstchar and reqchar. */

-#define REQ_CASELESS   0x10000000l      /* Indicates caselessness */
-#define REQ_VARY       0x20000000l      /* Reqchar followed non-literal item */
-#define REQ_MASK       (REQ_CASELESS | REQ_VARY)
+#define REQ_CASELESS    (1 << 0)        /* Indicates caselessness */
+#define REQ_VARY        (1 << 1)        /* Reqchar followed non-literal item */
+/* Negative values for the firstchar and reqchar flags */
+#define REQ_UNSET       (-2)
+#define REQ_NONE        (-1)


/* Repeated character flags. */

@@ -645,7 +648,6 @@



-
 /*************************************************
 *            Find an error text                  *
 *************************************************/
@@ -3666,8 +3668,10 @@
   codeptr        points to the pointer to the current code point
   ptrptr         points to the current pattern pointer
   errorcodeptr   points to error code variable
-  firstcharptr   set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE)
-  reqcharptr     set to the last literal character required, else < 0
+  firstcharptr    place to put the first required character
+  firstcharflagsptr place to put the first character flags, or a negative number
+  reqcharptr     place to put the last required character
+  reqcharflagsptr place to put the last required character flags, or a negative number
   bcptr          points to current branch chain
   cond_depth     conditional nesting depth
   cd             contains pointers to tables etc.
@@ -3680,16 +3684,20 @@


 static BOOL
 compile_branch(int *optionsptr, pcre_uchar **codeptr,
-  const pcre_uchar **ptrptr, int *errorcodeptr, pcre_int32 *firstcharptr,
-  pcre_int32 *reqcharptr, branch_chain *bcptr, int cond_depth,
+  const pcre_uchar **ptrptr, int *errorcodeptr,
+  pcre_uint32 *firstcharptr, pcre_int32 *firstcharflagsptr,
+  pcre_uint32 *reqcharptr, pcre_int32 *reqcharflagsptr,
+  branch_chain *bcptr, int cond_depth,
   compile_data *cd, int *lengthptr)
 {
 int repeat_type, op_type;
 int repeat_min = 0, repeat_max = 0;      /* To please picky compilers */
 int bravalue = 0;
 int greedy_default, greedy_non_default;
-pcre_int32 firstchar, reqchar;
-pcre_int32 zeroreqchar, zerofirstchar;
+pcre_uint32 firstchar, reqchar;
+pcre_int32 firstcharflags, reqcharflags;
+pcre_uint32 zeroreqchar, zerofirstchar;
+pcre_int32 zeroreqcharflags, zerofirstcharflags;
 pcre_int32 req_caseopt, reqvary, tempreqvary;
 int options = *optionsptr;               /* May change dynamically */
 int after_manual_callout = 0;
@@ -3752,7 +3760,8 @@
 zerofirstbyte and zeroreqchar when such a repeat is encountered. The individual
 item types that can be repeated set these backoff variables appropriately. */


-firstchar = reqchar = zerofirstchar = zeroreqchar = REQ_UNSET;
+firstchar = reqchar = zerofirstchar = zeroreqchar = 0;
+firstcharflags = reqcharflags = zerofirstcharflags = zeroreqcharflags = REQ_UNSET;

 /* The variable req_caseopt contains either the REQ_CASELESS value
 or zero, according to the current setting of the caseless flag. The
@@ -3778,8 +3787,8 @@
   int recno;
   int refsign;
   int skipbytes;
-  int subreqchar;
-  int subfirstchar;
+  pcre_uint32 subreqchar, subfirstchar;
+  pcre_int32 subreqcharflags, subfirstcharflags;
   int terminator;
   int mclength;
   int tempbracount;
@@ -3946,7 +3955,9 @@
     case CHAR_VERTICAL_LINE:       /* or | or ) */
     case CHAR_RIGHT_PARENTHESIS:
     *firstcharptr = firstchar;
+    *firstcharflagsptr = firstcharflags;
     *reqcharptr = reqchar;
+    *reqcharflagsptr = reqcharflags;
     *codeptr = code;
     *ptrptr = ptr;
     if (lengthptr != NULL)
@@ -3970,7 +3981,7 @@
     previous = NULL;
     if ((options & PCRE_MULTILINE) != 0)
       {
-      if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
+      if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
       *code++ = OP_CIRCM;
       }
     else *code++ = OP_CIRC;
@@ -3985,9 +3996,11 @@
     repeats. The value of reqchar doesn't change either. */


     case CHAR_DOT:
-    if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
+    if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
     zerofirstchar = firstchar;
+    zerofirstcharflags = firstcharflags;
     zeroreqchar = reqchar;
+    zeroreqcharflags = reqcharflags;
     previous = code;
     *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;
     break;
@@ -4061,8 +4074,9 @@
         (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
       {
       *code++ = negate_class? OP_ALLANY : OP_FAIL;
-      if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
+      if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
       zerofirstchar = firstchar;
+      zerofirstcharflags = firstcharflags;
       break;
       }


@@ -4522,15 +4536,16 @@
         {
         ptr++;
         zeroreqchar = reqchar;
+        zeroreqcharflags = reqcharflags;


         if (negate_class)
           {
 #ifdef SUPPORT_UCP           
-            // FIXMEchpe pcreuint32?
           int d;
 #endif           
-          if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
+          if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
           zerofirstchar = firstchar;
+          zerofirstcharflags = firstcharflags;


           /* For caseless UTF-8 mode when UCP support is available, check
           whether this character has more than one other case. If so, generate
@@ -4618,9 +4633,11 @@
     setting, whatever the repeat count. Any reqchar setting must remain
     unchanged after any kind of repeat. */


-    if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
+    if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
     zerofirstchar = firstchar;
+    zerofirstcharflags = firstcharflags;
     zeroreqchar = reqchar;
+    zeroreqcharflags = reqcharflags;


     /* If there are characters with values > 255, we have to compile an
     extended class, with its own opcode, unless there was a negated special
@@ -4715,7 +4732,9 @@
     if (repeat_min == 0)
       {
       firstchar = zerofirstchar;    /* Adjust for zero repeat */
+      firstcharflags = zerofirstcharflags;
       reqchar = zeroreqchar;        /* Ditto */
+      reqcharflags = zeroreqcharflags;
       }


     /* Remember whether this is a variable length repeat */
@@ -4818,7 +4837,10 @@
         {
         c = code[-1];
         if (*previous <= OP_CHARI && repeat_min > 1)
-          reqchar = c | req_caseopt | cd->req_varyopt;
+          {
+          reqchar = c;
+          reqcharflags = req_caseopt | cd->req_varyopt;
+          }
         }


       /* If the repetition is unlimited, it pays to see if the next thing on
@@ -5200,7 +5222,11 @@


           else
             {
-            if (groupsetfirstchar && reqchar < 0) reqchar = firstchar;
+            if (groupsetfirstchar && reqcharflags < 0)
+              {
+              reqchar = firstchar;
+              reqcharflags = firstcharflags;
+              }


             for (i = 1; i < repeat_min; i++)
               {
@@ -5620,7 +5646,7 @@
               (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;


             /* Do not set firstchar after *ACCEPT */
-            if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
+            if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
             }


           /* Handle other cases with/without an argument */
@@ -6383,7 +6409,7 @@


         /* Can't determine a first byte now */


-        if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
+        if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
         continue;



@@ -6517,7 +6543,9 @@
          cond_depth +
            ((bravalue == OP_COND)?1:0),   /* Depth of condition subpatterns */
          &subfirstchar,                   /* For possible first char */
+         &subfirstcharflags,
          &subreqchar,                     /* For possible last char */
+         &subreqcharflags,
          bcptr,                           /* Current branch chain */
          cd,                              /* Tables block */
          (lengthptr == NULL)? NULL :      /* Actual compile phase */
@@ -6578,7 +6606,7 @@
           *errorcodeptr = ERR27;
           goto FAILED;
           }
-        if (condcount == 1) subfirstchar = subreqchar = REQ_NONE;
+        if (condcount == 1) subfirstcharflags = subreqcharflags = REQ_NONE;
         }
       }


@@ -6627,7 +6655,9 @@
     back off. */


     zeroreqchar = reqchar;
+    zeroreqcharflags = reqcharflags;
     zerofirstchar = firstchar;
+    zerofirstcharflags = firstcharflags;
     groupsetfirstchar = FALSE;


     if (bravalue >= OP_ONCE)
@@ -6638,28 +6668,36 @@
       no firstchar, set "none" for the whole branch. In both cases, a zero
       repeat forces firstchar to "none". */


-      if (firstchar == REQ_UNSET)
+      if (firstcharflags == REQ_UNSET)
         {
-        if (subfirstchar >= 0)
+        if (subfirstcharflags >= 0)
           {
           firstchar = subfirstchar;
+          firstcharflags = subfirstcharflags;
           groupsetfirstchar = TRUE;
           }
-        else firstchar = REQ_NONE;
-        zerofirstchar = REQ_NONE;
+        else firstcharflags = REQ_NONE;
+        zerofirstcharflags = REQ_NONE;
         }


       /* If firstchar was previously set, convert the subpattern's firstchar
       into reqchar if there wasn't one, using the vary flag that was in
       existence beforehand. */


-      else if (subfirstchar >= 0 && subreqchar < 0)
-        subreqchar = subfirstchar | tempreqvary;
+      else if (subfirstcharflags >= 0 && subreqcharflags < 0)
+        {
+        subreqchar = subfirstchar;
+        subreqcharflags = subfirstcharflags | tempreqvary;
+        }


       /* If the subpattern set a required byte (or set a first byte that isn't
       really the first byte - see above), set it. */


-      if (subreqchar >= 0) reqchar = subreqchar;
+      if (subreqcharflags >= 0) 
+        {
+        reqchar = subreqchar;
+        reqcharflags = subreqcharflags;
+        }
       }


     /* For a forward assertion, we take the reqchar, if set. This can be
@@ -6670,7 +6708,11 @@
     of a firstchar. This is overcome by a scan at the end if there's no
     firstchar, looking for an asserted first char. */


-    else if (bravalue == OP_ASSERT && subreqchar >= 0) reqchar = subreqchar;
+    else if (bravalue == OP_ASSERT && subreqcharflags >= 0)
+      {
+      reqchar = subreqchar;
+      reqcharflags = subreqcharflags;
+      }
     break;     /* End of processing '(' */



@@ -6706,13 +6748,15 @@
       /* For metasequences that actually match a character, we disable the
       setting of a first character if it hasn't already been set. */


-      if (firstchar == REQ_UNSET && escape > ESC_b && escape < ESC_Z)
-        firstchar = REQ_NONE;
+      if (firstcharflags == REQ_UNSET && escape > ESC_b && escape < ESC_Z)
+        firstcharflags = REQ_NONE;


       /* Set values to reset to if this is followed by a zero repeat. */


       zerofirstchar = firstchar;
+      zerofirstcharflags = firstcharflags;
       zeroreqchar = reqchar;
+      zeroreqcharflags = reqcharflags;


       /* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n'
       is a subroutine call by number (Oniguruma syntax). In fact, the value
@@ -6802,7 +6846,7 @@
         recno = -escape;


         HANDLE_REFERENCE:    /* Come here from named backref handling */
-        if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
+        if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
         previous = code;
         *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;
         PUT2INC(code, 0, recno);
@@ -6929,7 +6973,7 @@
         *code++ = OP_PROP;
         *code++ = PT_CLIST;
         *code++ = c;
-        if (firstchar == REQ_UNSET) firstchar = zerofirstchar = REQ_NONE; 
+        if (firstcharflags == REQ_UNSET) firstcharflags = zerofirstcharflags = REQ_NONE; 
         break;   
         }   
       }  
@@ -6950,10 +6994,11 @@
     Otherwise, leave the firstchar value alone, and don't change it on a zero
     repeat. */


-    if (firstchar == REQ_UNSET)
+    if (firstcharflags == REQ_UNSET)
       {
-      zerofirstchar = REQ_NONE;
+      zerofirstcharflags = REQ_NONE;
       zeroreqchar = reqchar;
+      zeroreqcharflags = reqcharflags;


       /* If the character is more than one byte long, we can set firstchar
       only if it is not to be matched caselessly. */
@@ -6961,9 +7006,16 @@
       if (mclength == 1 || req_caseopt == 0)
         {
         firstchar = mcbuffer[0] | req_caseopt;
-        if (mclength != 1) reqchar = code[-1] | cd->req_varyopt;
+        firstchar = mcbuffer[0];
+        firstcharflags = req_caseopt;
+
+        if (mclength != 1)
+          {
+          reqchar = code[-1];
+          reqcharflags = cd->req_varyopt;
+          }
         }
-      else firstchar = reqchar = REQ_NONE;
+      else firstcharflags = reqcharflags = REQ_NONE;
       }


     /* firstchar was previously set; we can set reqchar only if the length is
@@ -6972,9 +7024,14 @@
     else
       {
       zerofirstchar = firstchar;
+      zerofirstcharflags = firstcharflags;
       zeroreqchar = reqchar;
+      zeroreqcharflags = reqcharflags;
       if (mclength == 1 || req_caseopt == 0)
-        reqchar = code[-1] | req_caseopt | cd->req_varyopt;
+        {
+        reqchar = code[-1];
+        reqcharflags = req_caseopt | cd->req_varyopt;
+        }
       }


     break;            /* End of literal character handling */
@@ -6993,7 +7050,6 @@




-
 /*************************************************
 *     Compile sequence of alternatives           *
 *************************************************/
@@ -7014,8 +7070,10 @@
   reset_bracount TRUE to reset the count for each branch
   skipbytes      skip this many bytes at start (for brackets and OP_COND)
   cond_depth     depth of nesting for conditional subpatterns
-  firstcharptr   place to put the first required character, or a negative number
-  reqcharptr     place to put the last required character, or a negative number
+  firstcharptr    place to put the first required character
+  firstcharflagsptr place to put the first character flags, or a negative number
+  reqcharptr     place to put the last required character
+  reqcharflagsptr place to put the last required character flags, or a negative number
   bcptr          pointer to the chain of currently open branches
   cd             points to the data block with tables pointers etc.
   lengthptr      NULL during the real compile phase
@@ -7027,7 +7085,9 @@
 static BOOL
 compile_regex(int options, pcre_uchar **codeptr, const pcre_uchar **ptrptr,
   int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,
-  int cond_depth, pcre_int32 *firstcharptr, pcre_int32 *reqcharptr,
+  int cond_depth,
+  pcre_uint32 *firstcharptr, pcre_int32 *firstcharflagsptr,
+  pcre_uint32 *reqcharptr, pcre_int32 *reqcharflagsptr,
   branch_chain *bcptr, compile_data *cd, int *lengthptr)
 {
 const pcre_uchar *ptr = *ptrptr;
@@ -7037,8 +7097,10 @@
 pcre_uchar *reverse_count = NULL;
 open_capitem capitem;
 int capnumber = 0;
-pcre_int32 firstchar, reqchar;
-pcre_int32 branchfirstchar, branchreqchar;
+pcre_uint32 firstchar, reqchar;
+pcre_int32 firstcharflags, reqcharflags;
+pcre_uint32 branchfirstchar, branchreqchar;
+pcre_int32 branchfirstcharflags, branchreqcharflags;
 int length;
 int orig_bracount;
 int max_bracount;
@@ -7047,7 +7109,8 @@
 bc.outer = bcptr;
 bc.current_branch = code;


-firstchar = reqchar = REQ_UNSET;
+firstchar = reqchar = 0;
+firstcharflags = reqcharflags = REQ_UNSET;

/* Accumulate the length for use in the pre-compile phase. Start with the
length of the BRA and KET and any extra bytes that are required at the
@@ -7107,8 +7170,8 @@
into the length. */

   if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstchar,
-        &branchreqchar, &bc, cond_depth, cd,
-        (lengthptr == NULL)? NULL : &length))
+        &branchfirstcharflags, &branchreqchar, &branchreqcharflags, &bc, 
+        cond_depth, cd, (lengthptr == NULL)? NULL : &length))
     {
     *ptrptr = ptr;
     return FALSE;
@@ -7129,7 +7192,9 @@
     if (*last_branch != OP_ALT)
       {
       firstchar = branchfirstchar;
+      firstcharflags = branchfirstcharflags;
       reqchar = branchreqchar;
+      reqcharflags = branchreqcharflags;
       }


     /* If this is not the first branch, the first char and reqchar have to
@@ -7143,23 +7208,36 @@
       we have to abandon the firstchar for the regex, but if there was
       previously no reqchar, it takes on the value of the old firstchar. */


-      if (firstchar >= 0 && firstchar != branchfirstchar)
+      if (firstcharflags >= 0 && 
+          (firstcharflags != branchfirstcharflags || firstchar != branchfirstchar))
         {
-        if (reqchar < 0) reqchar = firstchar;
-        firstchar = REQ_NONE;
+        if (reqcharflags < 0) 
+          {
+          reqchar = firstchar;
+          reqcharflags = firstcharflags;
+          }
+        firstcharflags = REQ_NONE;
         }


       /* If we (now or from before) have no firstchar, a firstchar from the
       branch becomes a reqchar if there isn't a branch reqchar. */


-      if (firstchar < 0 && branchfirstchar >= 0 && branchreqchar < 0)
-          branchreqchar = branchfirstchar;
+      if (firstcharflags < 0 && branchfirstcharflags >= 0 && branchreqcharflags < 0)
+        {
+        branchreqchar = branchfirstchar;
+        branchreqcharflags = branchfirstcharflags;
+        }


       /* Now ensure that the reqchars match */


-      if ((reqchar & ~REQ_VARY) != (branchreqchar & ~REQ_VARY))
-        reqchar = REQ_NONE;
-      else reqchar |= branchreqchar;   /* To "or" REQ_VARY */
+      if (((reqcharflags & ~REQ_VARY) != (branchreqcharflags & ~REQ_VARY)) ||
+          reqchar != branchreqchar)
+        reqcharflags = REQ_NONE;
+      else
+        {
+        reqchar = branchreqchar;
+        reqcharflags |= branchreqcharflags; /* To "or" REQ_VARY */
+        }
       }


     /* If lookbehind, check that this branch matches a fixed-length string, and
@@ -7255,7 +7333,9 @@
     *codeptr = code;
     *ptrptr = ptr;
     *firstcharptr = firstchar;
+    *firstcharflagsptr = firstcharflags;
     *reqcharptr = reqchar;
+    *reqcharflagsptr = reqcharflags;
     if (lengthptr != NULL)
       {
       if (OFLOW_MAX - *lengthptr < length)
@@ -7542,17 +7622,23 @@


 Arguments:
   code       points to start of expression (the bracket)
+  flags       points to the first char flags, or to REQ_NONE
   inassert   TRUE if in an assertion


-Returns:     -1 or the fixed first char
+Returns:     the fixed first char, or 0 with REQ_NONE in flags
 */


-static int
-find_firstassertedchar(const pcre_uchar *code, BOOL inassert)
+static pcre_uint32
+find_firstassertedchar(const pcre_uchar *code, pcre_int32 *flags,
+  BOOL inassert)
 {
-register int c = -1;
+register pcre_uint32 c = 0;
+int cflags = REQ_NONE;
+
+*flags = REQ_NONE;
 do {
-   int d;
+   pcre_uint32 d;
+   int dflags;
    int xl = (*code == OP_CBRA || *code == OP_SCBRA ||
              *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0;
    const pcre_uchar *scode = first_significant_code(code + 1+LINK_SIZE + xl,
@@ -7562,7 +7648,7 @@
    switch(op)
      {
      default:
-     return -1;
+     return 0;


      case OP_BRA:
      case OP_BRAPOS:
@@ -7574,9 +7660,10 @@
      case OP_ONCE:
      case OP_ONCE_NC:
      case OP_COND:
-     if ((d = find_firstassertedchar(scode, op == OP_ASSERT)) < 0)
-       return -1;
-     if (c < 0) c = d; else if (c != d) return -1;
+     d = find_firstassertedchar(scode, &dflags, op == OP_ASSERT);
+     if (dflags < 0)
+       return 0;
+     if (cflags < 0) { c = d; cflags = dflags; } else if (c != d || cflags != dflags) return 0;
      break;


      case OP_EXACT:
@@ -7587,9 +7674,9 @@
      case OP_PLUS:
      case OP_MINPLUS:
      case OP_POSPLUS:
-     if (!inassert) return -1;
-     if (c < 0) c = scode[1];
-       else if (c != scode[1]) return -1;
+     if (!inassert) return 0;
+     if (cflags < 0) { c = scode[1]; cflags = 0; }
+       else if (c != scode[1]) return 0;
      break;


      case OP_EXACTI:
@@ -7600,15 +7687,17 @@
      case OP_PLUSI:
      case OP_MINPLUSI:
      case OP_POSPLUSI:
-     if (!inassert) return -1;
-     if (c < 0) c = scode[1] | REQ_CASELESS;
-       else if (c != scode[1]) return -1;
+     if (!inassert) return 0;
+     if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; }
+       else if (c != scode[1]) return 0;
      break;
      }


    code += GET(code, 1);
    }
 while (*code == OP_ALT);
+
+*flags = cflags;
 return c;
 }


@@ -7676,7 +7765,8 @@
{
REAL_PCRE *re;
int length = 1; /* For final END opcode */
-pcre_int32 firstchar, reqchar;
+pcre_uint32 firstchar, reqchar;
+pcre_int32 firstcharflags, reqcharflags;
int newline;
int errorcode = 0;
int skipatstart = 0;
@@ -7926,7 +8016,8 @@
code = cworkspace;
*code = OP_BRA;
(void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE,
- FALSE, 0, 0, &firstchar, &reqchar, NULL, cd, &length);
+ FALSE, 0, 0, &firstchar, &firstcharflags, &reqchar, &reqcharflags, NULL,
+ cd, &length);
if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;

DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
@@ -8004,13 +8095,17 @@
code = (pcre_uchar *)codestart;
*code = OP_BRA;
(void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0,
- &firstchar, &reqchar, NULL, cd, NULL);
+ &firstchar, &firstcharflags, &reqchar, &reqcharflags, NULL, cd, NULL);
re->top_bracket = cd->bracount;
re->top_backref = cd->top_backref;
re->max_lookbehind = cd->max_lookbehind;
re->flags = cd->external_flags | PCRE_MODE;

-if (cd->had_accept) reqchar = REQ_NONE;   /* Must disable after (*ACCEPT) */
+if (cd->had_accept)
+  {
+  reqchar = 0;              /* Must disable after (*ACCEPT) */
+  reqcharflags = REQ_NONE;
+  }


/* If not reached end of pattern on success, there's an excess bracket. */

@@ -8060,7 +8155,7 @@

 /* If there were any lookbehind assertions that contained OP_RECURSE
 (recursions or subroutine calls), a flag is set for them to be checked here,
-because they may contain forward references. Actual recursions can't be fixed
+because they may contain forward references. Actual recursions cannot be fixed
 length, but subroutine calls can. It is done like this so that those without
 OP_RECURSE that are not fixed length get a diagnosic with a useful offset. The
 exceptional ones forgo this. We scan the pattern to check that they are fixed
@@ -8131,18 +8226,18 @@
   if (is_anchored(codestart, 0, cd, 0)) re->options |= PCRE_ANCHORED;
   else
     {
-    if (firstchar < 0)
-      firstchar = find_firstassertedchar(codestart, FALSE);
-    if (firstchar >= 0)   /* Remove caseless flag for non-caseable chars */
+    if (firstcharflags < 0)
+      firstchar = find_firstassertedchar(codestart, &firstcharflags, FALSE);
+    if (firstcharflags >= 0)   /* Remove caseless flag for non-caseable chars */
       {
 #if defined COMPILE_PCRE8
       re->first_char = firstchar & 0xff;
 #elif defined COMPILE_PCRE16
       re->first_char = firstchar & 0xffff;
 #elif defined COMPILE_PCRE32
-      re->first_char = firstchar & ~REQ_MASK;
+      re->first_char = firstchar;
 #endif
-      if ((firstchar & REQ_CASELESS) != 0)
+      if ((firstcharflags & REQ_CASELESS) != 0)
         {
 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
         /* We ignore non-ASCII first chars in 8 bit mode. */
@@ -8174,17 +8269,17 @@
 variable length item in the regex. Remove the caseless flag for non-caseable
 bytes. */


-if (reqchar >= 0 &&
-     ((re->options & PCRE_ANCHORED) == 0 || (reqchar & REQ_VARY) != 0))
+if (reqcharflags >= 0 &&
+     ((re->options & PCRE_ANCHORED) == 0 || (reqcharflags & REQ_VARY) != 0))
   {
 #if defined COMPILE_PCRE8
   re->req_char = reqchar & 0xff;
 #elif defined COMPILE_PCRE16
   re->req_char = reqchar & 0xffff;
 #elif defined COMPILE_PCRE32
-  re->req_char = reqchar & ~REQ_MASK;
+  re->req_char = reqchar;
 #endif
-  if ((reqchar & REQ_CASELESS) != 0)
+  if ((reqcharflags & REQ_CASELESS) != 0)
     {
 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
     /* We ignore non-ASCII first chars in 8 bit mode. */


Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h    2012-10-16 15:54:57 UTC (rev 1077)
+++ code/trunk/pcre_internal.h    2012-10-16 15:55:00 UTC (rev 1078)
@@ -1095,11 +1095,6 @@


#define REVERSED_MAGIC_NUMBER 0x45524350UL /* 'ERCP' */

-/* Negative values for the firstchar and reqchar variables */
-
-#define REQ_UNSET (-2)
-#define REQ_NONE (-1)
-
/* The maximum remaining length of subject we are prepared to search for a
req_byte match. */