[Pcre-svn] [924] code/trunk: OP_NOT now supports any UTF cha…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [924] code/trunk: OP_NOT now supports any UTF character
Revision: 924
          http://vcs.pcre.org/viewvc?view=rev&revision=924
Author:   zherczeg
Date:     2012-02-22 10:23:56 +0000 (Wed, 22 Feb 2012)


Log Message:
-----------
OP_NOT now supports any UTF character

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_compile.c
    code/trunk/pcre_exec.c
    code/trunk/pcre_jit_compile.c
    code/trunk/pcre_printint.c
    code/trunk/testdata/testinput17
    code/trunk/testdata/testinput5
    code/trunk/testdata/testoutput11-16
    code/trunk/testdata/testoutput11-8
    code/trunk/testdata/testoutput15
    code/trunk/testdata/testoutput16
    code/trunk/testdata/testoutput17
    code/trunk/testdata/testoutput18
    code/trunk/testdata/testoutput5


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/ChangeLog    2012-02-22 10:23:56 UTC (rev 924)
@@ -42,7 +42,9 @@
 7.  Individual JIT compile options can be set in pcretest by following -s+[+] 
     or /S+[+] with a digit between 1 and 7.


+8. OP_NOT now supports any UTF character not just single character ones.

+
Version 8.30 04-February-2012
-----------------------------


Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c    2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/pcre_compile.c    2012-02-22 10:23:56 UTC (rev 924)
@@ -2225,32 +2225,60 @@
       {
       case OP_CHAR:
       case OP_CHARI:
+      case OP_NOT:
+      case OP_NOTI:
       case OP_EXACT:
       case OP_EXACTI:
+      case OP_NOTEXACT:
+      case OP_NOTEXACTI:
       case OP_UPTO:
       case OP_UPTOI:
+      case OP_NOTUPTO:
+      case OP_NOTUPTOI:
       case OP_MINUPTO:
       case OP_MINUPTOI:
+      case OP_NOTMINUPTO:
+      case OP_NOTMINUPTOI:
       case OP_POSUPTO:
       case OP_POSUPTOI:
+      case OP_NOTPOSUPTO:
+      case OP_NOTPOSUPTOI:
       case OP_STAR:
       case OP_STARI:
+      case OP_NOTSTAR:
+      case OP_NOTSTARI:
       case OP_MINSTAR:
       case OP_MINSTARI:
+      case OP_NOTMINSTAR:
+      case OP_NOTMINSTARI:
       case OP_POSSTAR:
       case OP_POSSTARI:
+      case OP_NOTPOSSTAR:
+      case OP_NOTPOSSTARI:
       case OP_PLUS:
       case OP_PLUSI:
+      case OP_NOTPLUS:
+      case OP_NOTPLUSI:
       case OP_MINPLUS:
       case OP_MINPLUSI:
+      case OP_NOTMINPLUS:
+      case OP_NOTMINPLUSI:
       case OP_POSPLUS:
       case OP_POSPLUSI:
+      case OP_NOTPOSPLUS:
+      case OP_NOTPOSPLUSI:
       case OP_QUERY:
       case OP_QUERYI:
+      case OP_NOTQUERY:
+      case OP_NOTQUERYI:
       case OP_MINQUERY:
       case OP_MINQUERYI:
+      case OP_NOTMINQUERY:
+      case OP_NOTMINQUERYI:
       case OP_POSQUERY:
       case OP_POSQUERYI:
+      case OP_NOTPOSQUERY:
+      case OP_NOTPOSQUERYI:
       if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
       break;
       }
@@ -3069,22 +3097,28 @@
 #endif  /* SUPPORT_UTF */
   return (c != TABLE_GET((unsigned int)next, cd->fcc, next));  /* Non-UTF-8 mode */


- /* For OP_NOT and OP_NOTI, the data is always a single-byte character. These
- opcodes are not used for multi-byte characters, because they are coded using
- an XCLASS instead. */
-
case OP_NOT:
- return (c = *previous) == next;
+#ifdef SUPPORT_UTF
+ GETCHARTEST(c, previous);
+#else
+ c = *previous;
+#endif
+ return c == next;

   case OP_NOTI:
-  if ((c = *previous) == next) return TRUE;
 #ifdef SUPPORT_UTF
+  GETCHARTEST(c, previous);
+#else
+  c = *previous;
+#endif
+  if (c == next) return TRUE;
+#ifdef SUPPORT_UTF
   if (utf)
     {
     unsigned int othercase;
     if (next < 128) othercase = cd->fcc[next]; else
 #ifdef SUPPORT_UCP
-    othercase = UCD_OTHERCASE(next);
+    othercase = UCD_OTHERCASE((unsigned int)next);
 #else
     othercase = NOTACHAR;
 #endif
@@ -3092,7 +3126,7 @@
     }
   else
 #endif  /* SUPPORT_UTF */
-  return (c == (int)(TABLE_GET((unsigned int)next, cd->fcc, next)));  /* Non-UTF-8 mode */
+  return (c == TABLE_GET((unsigned int)next, cd->fcc, next));  /* Non-UTF-8 mode */


   /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
   When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
@@ -4485,27 +4519,16 @@
       if (class_single_char < 2) class_single_char++;


       /* If class_charcount is 1, we saw precisely one character. As long as
-      there were no negated characters >= 128 and there was no use of \p or \P,
-      in other words, no use of any XCLASS features, we can optimize.
+      there was no use of \p or \P, in other words, no use of any XCLASS features,
+      we can optimize.


-      In UTF-8 mode, we can optimize the negative case only if there were no
-      characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR
-      operate on single-bytes characters only. This is an historical hangover.
-      Maybe one day we can tidy these opcodes to handle multi-byte characters.
-
       The optimization throws away the bit map. We turn the item into a
       1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative.
-      Note that OP_NOT[I] does not support multibyte characters. In the positive
-      case, it can cause firstchar to be set. Otherwise, there can be no first
-      char if this item is first, whatever repeat count may follow. In the case
-      of reqchar, save the previous value for reinstating. */
+      In the positive case, it can cause firstchar to be set. Otherwise, there
+      can be no first char if this item is first, whatever repeat count may
+      follow. In the case of reqchar, save the previous value for reinstating. */


-#ifdef SUPPORT_UTF
-      if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET
-        && (!utf || !negate_class || c < (MAX_VALUE_FOR_SINGLE_CHAR + 1)))
-#else
       if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
-#endif
         {
         ptr++;
         zeroreqchar = reqchar;
@@ -4517,7 +4540,12 @@
           if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
           zerofirstchar = firstchar;
           *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;
-          *code++ = c;
+#ifdef SUPPORT_UTF
+          if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
+            code += PRIV(ord2utf)(c, code);
+          else
+#endif
+            *code++ = c;
           goto NOT_CHAR;
           }


@@ -4775,15 +4803,22 @@

     /* Now handle repetition for the different types of item. */


-    /* If previous was a character match, abolish the item and generate a
-    repeat item instead. If a char item has a minumum of more than one, ensure
-    that it is set in reqchar - it might not be if a sequence such as x{3} is
-    the first thing in a branch because the x will have gone into firstchar
-    instead.  */
+    /* If previous was a character or negated character match, abolish the item
+    and generate a repeat item instead. If a char item has a minumum of more
+    than one, ensure  that it is set in reqchar - it might not be if a sequence
+    such as x{3} is  the first thing in a branch because the x will have gone
+    into firstchar instead.  */


-    if (*previous == OP_CHAR || *previous == OP_CHARI)
+    if (*previous == OP_CHAR || *previous == OP_CHARI
+        || *previous == OP_NOT || *previous == OP_NOTI)
       {
-      op_type = (*previous == OP_CHAR)? 0 : OP_STARI - OP_STAR;
+      switch (*previous) {
+      default: /* Make compiler happy. */
+      case OP_CHAR:  op_type = OP_STAR - OP_STAR; break;
+      case OP_CHARI: op_type = OP_STARI - OP_STAR; break;
+      case OP_NOT:   op_type = OP_NOTSTAR - OP_STAR; break;
+      case OP_NOTI:  op_type = OP_NOTSTARI - OP_STAR; break;
+      }


       /* Deal with UTF characters that take up more than one character. It's
       easier to write this out separately than try to macrify it. Use c to
@@ -4806,7 +4841,8 @@
       with UTF disabled, or for a single character UTF character. */
         {
         c = code[-1];
-        if (repeat_min > 1) reqchar = c | req_caseopt | cd->req_varyopt;
+        if (*previous <= OP_CHARI && repeat_min > 1)
+          reqchar = c | req_caseopt | cd->req_varyopt;
         }


       /* If the repetition is unlimited, it pays to see if the next thing on
@@ -4825,26 +4861,6 @@
       goto OUTPUT_SINGLE_REPEAT;   /* Code shared with single character types */
       }


-    /* If previous was a single negated character ([^a] or similar), we use
-    one of the special opcodes, replacing it. The code is shared with single-
-    character repeats by setting opt_type to add a suitable offset into
-    repeat_type. We can also test for auto-possessification. OP_NOT and OP_NOTI
-    are currently used only for single-byte chars. */
-
-    else if (*previous == OP_NOT || *previous == OP_NOTI)
-      {
-      op_type = ((*previous == OP_NOT)? OP_NOTSTAR : OP_NOTSTARI) - OP_STAR;
-      c = previous[1];
-      if (!possessive_quantifier &&
-          repeat_max < 0 &&
-          check_auto_possessive(previous, utf, ptr + 1, options, cd))
-        {
-        repeat_type = 0;    /* Force greedy */
-        possessive_quantifier = TRUE;
-        }
-      goto OUTPUT_SINGLE_REPEAT;
-      }
-
     /* If previous was a character type match (\d or similar), abolish it and
     create a suitable repeat item. The code is shared with single-character
     repeats by setting op_type to add a suitable offset into repeat_type. Note


Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c    2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/pcre_exec.c    2012-02-22 10:23:56 UTC (rev 924)
@@ -3565,33 +3565,41 @@
       SCHECK_PARTIAL();
       RRETURN(MATCH_NOMATCH);
       }
-    ecode++;
-    GETCHARINCTEST(c, eptr);
-    if (op == OP_NOTI)         /* The caseless case */
+#ifdef SUPPORT_UTF
+    if (utf)
       {
       register unsigned int ch, och;
-      ch = *ecode++;
-#ifdef COMPILE_PCRE8
-      /* ch must be < 128 if UTF is enabled. */
-      och = md->fcc[ch];
-#else
-#ifdef SUPPORT_UTF
+
+      ecode++;
+      GETCHARINC(ch, ecode);
+      GETCHARINC(c, eptr);
+
+      if (op == OP_NOT)
+        {
+        if (ch == c) RRETURN(MATCH_NOMATCH);
+        }
+      else
+        {
 #ifdef SUPPORT_UCP
-      if (utf && ch > 127)
-        och = UCD_OTHERCASE(ch);
+        if (ch > 127)
+          och = UCD_OTHERCASE(ch);
 #else
-      if (utf && ch > 127)
-        och = ch;
+        if (ch > 127)
+          och = ch;
 #endif /* SUPPORT_UCP */
-      else
-#endif /* SUPPORT_UTF */
-        och = TABLE_GET(ch, md->fcc, ch);
-#endif /* COMPILE_PCRE8 */
-      if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
+        else
+          och = TABLE_GET(ch, md->fcc, ch);
+        if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
+        }
       }
-    else    /* Caseful */
+    else
+#endif
       {
-      if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
+      register unsigned int ch = ecode[1];
+      c = *eptr++;
+      if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
+        RRETURN(MATCH_NOMATCH);
+      ecode += 2;
       }
     break;


@@ -3671,7 +3679,7 @@
     /* Common code for all repeated single-byte matches. */


     REPEATNOTCHAR:
-    fc = *ecode++;
+    GETCHARINCTEST(fc, ecode);


     /* The code is duplicated for the caseless and caseful cases, for speed,
     since matching characters is likely to be quite common. First, ensure the
@@ -3686,10 +3694,6 @@


     if (op >= OP_NOTSTARI)     /* Caseless */
       {
-#ifdef COMPILE_PCRE8
-      /* fc must be < 128 if UTF is enabled. */
-      foc = md->fcc[fc];
-#else
 #ifdef SUPPORT_UTF
 #ifdef SUPPORT_UCP
       if (utf && fc > 127)
@@ -3701,7 +3705,6 @@
       else
 #endif /* SUPPORT_UTF */
         foc = TABLE_GET(fc, md->fcc, fc);
-#endif /* COMPILE_PCRE8 */


 #ifdef SUPPORT_UTF
       if (utf)
@@ -3715,7 +3718,7 @@
             RRETURN(MATCH_NOMATCH);
             }
           GETCHARINC(d, eptr);
-          if (fc == d || (unsigned int) foc == d) RRETURN(MATCH_NOMATCH);
+          if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
           }
         }
       else


Modified: code/trunk/pcre_jit_compile.c
===================================================================
--- code/trunk/pcre_jit_compile.c    2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/pcre_jit_compile.c    2012-02-22 10:23:56 UTC (rev 924)
@@ -3602,7 +3602,7 @@
       add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
       }
     }
-  return cc + 1;
+  return cc + length;


case OP_CLASS:
case OP_NCLASS:
@@ -6652,9 +6652,9 @@
tables = PRIV(default_tables);

memset(&rootfallback, 0, sizeof(fallback_common));
+memset(common, 0, sizeof(compiler_common));
rootfallback.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;

-common->compiler = NULL;
common->start = rootfallback.cc;
common->fcc = tables + fcc_offset;
common->lcc = (sljit_w)(tables + lcc_offset);
@@ -6696,22 +6696,6 @@
common->name_table = (sljit_w)((pcre_uchar *)re + re->name_table_offset);
common->name_count = re->name_count;
common->name_entry_size = re->name_entry_size;
-common->partialmatchlabel = NULL;
-common->acceptlabel = NULL;
-common->stubs = NULL;
-common->entries = NULL;
-common->currententry = NULL;
-common->partialmatch = NULL;
-common->accept = NULL;
-common->calllimit = NULL;
-common->stackalloc = NULL;
-common->revertframes = NULL;
-common->wordboundary = NULL;
-common->anynewline = NULL;
-common->hspace = NULL;
-common->vspace = NULL;
-common->casefulcmp = NULL;
-common->caselesscmp = NULL;
common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
#ifdef SUPPORT_UTF
/* PCRE_UTF16 has the same value as PCRE_UTF8. */
@@ -6719,23 +6703,11 @@
#ifdef SUPPORT_UCP
common->use_ucp = (re->options & PCRE_UCP) != 0;
#endif
-common->utfreadchar = NULL;
-#ifdef COMPILE_PCRE8
-common->utfreadtype8 = NULL;
-#endif
#endif /* SUPPORT_UTF */
-#ifdef SUPPORT_UCP
-common->getucd = NULL;
-#endif
ccend = bracketend(rootfallback.cc);

/* Calculate the local space size on the stack. */
common->ovector_start = CALL_LIMIT + sizeof(sljit_w);
-common->req_char_ptr = 0;
-common->recursive_head = 0;
-common->start_used_ptr = 0;
-common->hit_start = 0;
-common->first_line_end = 0;

SLJIT_ASSERT(*rootfallback.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
common->localsize = get_localspace(common, rootfallback.cc, ccend);
@@ -6768,6 +6740,7 @@
if ((common->ovector_start & sizeof(sljit_w)) != 0)
common->ovector_start += sizeof(sljit_w);

+SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
common->cbraptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_w);
common->localsize += common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w);
if (common->localsize > SLJIT_MAX_LOCAL_SIZE)
@@ -7170,7 +7143,12 @@
int
PRIV(jit_get_size)(void *executable_funcs)
{
-return ((executable_functions *)executable_funcs)->executable_sizes[PCRE_STUDY_JIT_COMPILE];
+int i;
+sljit_uw size = 0;
+sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes;
+for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
+ size += executable_sizes[i];
+return (int)size;
}

const char*

Modified: code/trunk/pcre_printint.c
===================================================================
--- code/trunk/pcre_printint.c    2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/pcre_printint.c    2012-02-22 10:23:56 UTC (rev 924)
@@ -477,12 +477,9 @@
     flag = "/i";
     /* Fall through */
     case OP_NOT:
-    c = code[1];
-    if (PRINTABLE(c)) fprintf(f, " %s [^%c]", flag, c);
-    else if (utf || c > 0xff)
-      fprintf(f, " %s [^\\x{%02x}]", flag, c);
-    else
-      fprintf(f, " %s [^\\x%02x]", flag, c);
+    fprintf(f, " %s [^", flag);
+    extra = print_char(f, code + 1, utf);
+    fprintf(f, "]");
     break;


     case OP_NOTSTARI:
@@ -506,10 +503,9 @@
     case OP_NOTQUERY:
     case OP_NOTMINQUERY:
     case OP_NOTPOSQUERY:
-    c = code[1];
-    if (PRINTABLE(c)) fprintf(f, " %s [^%c]", flag, c);
-      else fprintf(f, " %s [^\\x%02x]", flag, c);
-    fprintf(f, "%s", priv_OP_names[*code]);
+    fprintf(f, " %s [^", flag);
+    extra = print_char(f, code + 1, utf);
+    fprintf(f, "]%s", priv_OP_names[*code]);
     break;


     case OP_NOTEXACTI:
@@ -523,9 +519,9 @@
     case OP_NOTUPTO:
     case OP_NOTMINUPTO:
     case OP_NOTPOSUPTO:
-    c = code[1 + IMM2_SIZE];
-    if (PRINTABLE(c)) fprintf(f, " %s [^%c]{", flag, c);
-      else fprintf(f, " %s [^\\x%02x]{", flag, c);
+    fprintf(f, " %s [^", flag);
+    extra = print_char(f, code + 1 + IMM2_SIZE, utf);
+    fprintf(f, "]{");
     if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
     fprintf(f, "%d}", GET2(code,1));
     if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");


Modified: code/trunk/testdata/testinput17
===================================================================
--- code/trunk/testdata/testinput17    2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/testdata/testinput17    2012-02-22 10:23:56 UTC (rev 924)
@@ -272,4 +272,12 @@
 /\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
     \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}


+/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/BZ
+
+/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/BZi
+
+/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/BZ
+
+/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/BZi
+
/-- End of testinput17 --/

Modified: code/trunk/testdata/testinput5
===================================================================
--- code/trunk/testdata/testinput5    2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/testdata/testinput5    2012-02-22 10:23:56 UTC (rev 924)
@@ -785,4 +785,12 @@
     \r\r\r\P
     \r\r\r\P\P     


+/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/8BZ
+
+/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/8BZi
+
+/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/8BZ
+
+/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/8BZi
+
/-- End of testinput5 --/

Modified: code/trunk/testdata/testoutput11-16
===================================================================
--- code/trunk/testdata/testoutput11-16    2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/testdata/testoutput11-16    2012-02-22 10:23:56 UTC (rev 924)
@@ -636,7 +636,7 @@
 Memory allocation (code space): 14
 ------------------------------------------------------------------
   0   4 Bra
-  2     [^\x{aa}]
+  2     [^\xaa]
   4   4 Ket
   6     End
 ------------------------------------------------------------------


Modified: code/trunk/testdata/testoutput11-8
===================================================================
--- code/trunk/testdata/testoutput11-8    2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/testdata/testoutput11-8    2012-02-22 10:23:56 UTC (rev 924)
@@ -633,12 +633,12 @@
 ------------------------------------------------------------------


 /[^\xaa]/8BM
-Memory allocation (code space): 40
+Memory allocation (code space): 10
 ------------------------------------------------------------------
-  0  36 Bra
-  3     [\x00-\xa9\xab-\xff] (neg)
- 36  36 Ket
- 39     End
+  0   6 Bra
+  3     [^\x{aa}]
+  6   6 Ket
+  9     End
 ------------------------------------------------------------------


/[^\d]/8WB

Modified: code/trunk/testdata/testoutput15
===================================================================
--- code/trunk/testdata/testoutput15    2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/testdata/testoutput15    2012-02-22 10:23:56 UTC (rev 924)
@@ -606,7 +606,7 @@
 /[^\xff]/8DZ
 ------------------------------------------------------------------
         Bra
-        [\x00-\xfe] (neg)
+        [^\x{ff}]
         Ket
         End
 ------------------------------------------------------------------
@@ -868,7 +868,7 @@
 /[^\x{c4}]/8DZ
 ------------------------------------------------------------------
         Bra
-        [\x00-\xc3\xc5-\xff] (neg)
+        [^\x{c4}]
         Ket
         End
 ------------------------------------------------------------------


Modified: code/trunk/testdata/testoutput16
===================================================================
--- code/trunk/testdata/testoutput16    2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/testdata/testoutput16    2012-02-22 10:23:56 UTC (rev 924)
@@ -81,7 +81,7 @@
 /[^ⱥ]/8iBZ
 ------------------------------------------------------------------
         Bra
-        [^\x{2c65}\x{23a}]
+     /i [^\x{2c65}]
         Ket
         End
 ------------------------------------------------------------------


Modified: code/trunk/testdata/testoutput17
===================================================================
--- code/trunk/testdata/testoutput17    2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/testdata/testoutput17    2012-02-22 10:23:56 UTC (rev 924)
@@ -448,4 +448,62 @@
     \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
  0: \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}


+/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/BZ
+------------------------------------------------------------------
+        Bra
+        [^\x80]
+        [^\xff]
+        [^\x{100}]
+        [^\x{1000}]
+        [^\x{ffff}]
+        Ket
+        End
+------------------------------------------------------------------
+
+/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/BZi
+------------------------------------------------------------------
+        Bra
+     /i [^\x80]
+     /i [^\xff]
+     /i [^\x{100}]
+     /i [^\x{1000}]
+     /i [^\x{ffff}]
+        Ket
+        End
+------------------------------------------------------------------
+
+/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/BZ
+------------------------------------------------------------------
+        Bra
+        [^\x{100}]*
+        [^\x{1000}]+
+        [^\x{ffff}]??
+        [^\x{8000}]{4}
+        [^\x{8000}]*
+        [^\x{7fff}]{2}
+        [^\x{7fff}]{0,7}?
+        [^\x{100}]{5}
+        [^\x{100}]?+
+        Ket
+        End
+------------------------------------------------------------------
+
+/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/BZi
+------------------------------------------------------------------
+        Bra
+     /i [^\x{100}]*
+     /i [^\x{1000}]+
+     /i [^\x{ffff}]??
+     /i [^\x{8000}]{4}
+     /i [^\x{8000}]*
+     /i [^\x{7fff}]{2}
+     /i [^\x{7fff}]{0,7}?
+        Once
+     /i [^\x{100}]{5}
+     /i [^\x{100}]?
+        Ket
+        Ket
+        End
+------------------------------------------------------------------
+
 /-- End of testinput17 --/


Modified: code/trunk/testdata/testoutput18
===================================================================
--- code/trunk/testdata/testoutput18    2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/testdata/testoutput18    2012-02-22 10:23:56 UTC (rev 924)
@@ -535,7 +535,7 @@
 /[^\xff]/8DZ
 ------------------------------------------------------------------
         Bra
-        [^\x{ff}]
+        [^\xff]
         Ket
         End
 ------------------------------------------------------------------
@@ -798,7 +798,7 @@
 /[^\x{c4}]/8DZ
 ------------------------------------------------------------------
         Bra
-        [^\x{c4}]
+        [^\xc4]
         Ket
         End
 ------------------------------------------------------------------


Modified: code/trunk/testdata/testoutput5
===================================================================
--- code/trunk/testdata/testoutput5    2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/testdata/testoutput5    2012-02-22 10:23:56 UTC (rev 924)
@@ -1815,4 +1815,62 @@
     \r\r\r\P\P     
  0: \x{0d}\x{0d}


+/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/8BZ
+------------------------------------------------------------------
+        Bra
+        [^\x{100}]
+        [^\x{1234}]
+        [^\x{ffff}]
+        [^\x{10000}]
+        [^\x{10ffff}]
+        Ket
+        End
+------------------------------------------------------------------
+
+/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/8BZi
+------------------------------------------------------------------
+        Bra
+     /i [^\x{100}]
+     /i [^\x{1234}]
+     /i [^\x{ffff}]
+     /i [^\x{10000}]
+     /i [^\x{10ffff}]
+        Ket
+        End
+------------------------------------------------------------------
+
+/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/8BZ
+------------------------------------------------------------------
+        Bra
+        [^\x{100}]*
+        [^\x{10000}]+
+        [^\x{10ffff}]??
+        [^\x{8000}]{4}
+        [^\x{8000}]*
+        [^\x{7fff}]{2}
+        [^\x{7fff}]{0,7}?
+        [^\x{fffff}]{5}
+        [^\x{fffff}]?+
+        Ket
+        End
+------------------------------------------------------------------
+
+/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/8BZi
+------------------------------------------------------------------
+        Bra
+     /i [^\x{100}]*
+     /i [^\x{10000}]+
+     /i [^\x{10ffff}]??
+     /i [^\x{8000}]{4}
+     /i [^\x{8000}]*
+     /i [^\x{7fff}]{2}
+     /i [^\x{7fff}]{0,7}?
+        Once
+     /i [^\x{fffff}]{5}
+     /i [^\x{fffff}]?
+        Ket
+        Ket
+        End
+------------------------------------------------------------------
+
 /-- End of testinput5 --/