Revision: 924
http://vcs.pcre.org/viewvc?view=rev&revision=924
Author: zherczeg
Date: 2012-02-22 10:23:56 +0000 (Wed, 22 Feb 2012)
Log Message:
-----------
OP_NOT now supports any UTF character
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_compile.c
code/trunk/pcre_exec.c
code/trunk/pcre_jit_compile.c
code/trunk/pcre_printint.c
code/trunk/testdata/testinput17
code/trunk/testdata/testinput5
code/trunk/testdata/testoutput11-16
code/trunk/testdata/testoutput11-8
code/trunk/testdata/testoutput15
code/trunk/testdata/testoutput16
code/trunk/testdata/testoutput17
code/trunk/testdata/testoutput18
code/trunk/testdata/testoutput5
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/ChangeLog 2012-02-22 10:23:56 UTC (rev 924)
@@ -42,7 +42,9 @@
7. Individual JIT compile options can be set in pcretest by following -s+[+]
or /S+[+] with a digit between 1 and 7.
+8. OP_NOT now supports any UTF character not just single character ones.
+
Version 8.30 04-February-2012
-----------------------------
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/pcre_compile.c 2012-02-22 10:23:56 UTC (rev 924)
@@ -2225,32 +2225,60 @@
{
case OP_CHAR:
case OP_CHARI:
+ case OP_NOT:
+ case OP_NOTI:
case OP_EXACT:
case OP_EXACTI:
+ case OP_NOTEXACT:
+ case OP_NOTEXACTI:
case OP_UPTO:
case OP_UPTOI:
+ case OP_NOTUPTO:
+ case OP_NOTUPTOI:
case OP_MINUPTO:
case OP_MINUPTOI:
+ case OP_NOTMINUPTO:
+ case OP_NOTMINUPTOI:
case OP_POSUPTO:
case OP_POSUPTOI:
+ case OP_NOTPOSUPTO:
+ case OP_NOTPOSUPTOI:
case OP_STAR:
case OP_STARI:
+ case OP_NOTSTAR:
+ case OP_NOTSTARI:
case OP_MINSTAR:
case OP_MINSTARI:
+ case OP_NOTMINSTAR:
+ case OP_NOTMINSTARI:
case OP_POSSTAR:
case OP_POSSTARI:
+ case OP_NOTPOSSTAR:
+ case OP_NOTPOSSTARI:
case OP_PLUS:
case OP_PLUSI:
+ case OP_NOTPLUS:
+ case OP_NOTPLUSI:
case OP_MINPLUS:
case OP_MINPLUSI:
+ case OP_NOTMINPLUS:
+ case OP_NOTMINPLUSI:
case OP_POSPLUS:
case OP_POSPLUSI:
+ case OP_NOTPOSPLUS:
+ case OP_NOTPOSPLUSI:
case OP_QUERY:
case OP_QUERYI:
+ case OP_NOTQUERY:
+ case OP_NOTQUERYI:
case OP_MINQUERY:
case OP_MINQUERYI:
+ case OP_NOTMINQUERY:
+ case OP_NOTMINQUERYI:
case OP_POSQUERY:
case OP_POSQUERYI:
+ case OP_NOTPOSQUERY:
+ case OP_NOTPOSQUERYI:
if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
break;
}
@@ -3069,22 +3097,28 @@
#endif /* SUPPORT_UTF */
return (c != TABLE_GET((unsigned int)next, cd->fcc, next)); /* Non-UTF-8 mode */
- /* For OP_NOT and OP_NOTI, the data is always a single-byte character. These
- opcodes are not used for multi-byte characters, because they are coded using
- an XCLASS instead. */
-
case OP_NOT:
- return (c = *previous) == next;
+#ifdef SUPPORT_UTF
+ GETCHARTEST(c, previous);
+#else
+ c = *previous;
+#endif
+ return c == next;
case OP_NOTI:
- if ((c = *previous) == next) return TRUE;
#ifdef SUPPORT_UTF
+ GETCHARTEST(c, previous);
+#else
+ c = *previous;
+#endif
+ if (c == next) return TRUE;
+#ifdef SUPPORT_UTF
if (utf)
{
unsigned int othercase;
if (next < 128) othercase = cd->fcc[next]; else
#ifdef SUPPORT_UCP
- othercase = UCD_OTHERCASE(next);
+ othercase = UCD_OTHERCASE((unsigned int)next);
#else
othercase = NOTACHAR;
#endif
@@ -3092,7 +3126,7 @@
}
else
#endif /* SUPPORT_UTF */
- return (c == (int)(TABLE_GET((unsigned int)next, cd->fcc, next))); /* Non-UTF-8 mode */
+ return (c == TABLE_GET((unsigned int)next, cd->fcc, next)); /* Non-UTF-8 mode */
/* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
@@ -4485,27 +4519,16 @@
if (class_single_char < 2) class_single_char++;
/* If class_charcount is 1, we saw precisely one character. As long as
- there were no negated characters >= 128 and there was no use of \p or \P,
- in other words, no use of any XCLASS features, we can optimize.
+ there was no use of \p or \P, in other words, no use of any XCLASS features,
+ we can optimize.
- In UTF-8 mode, we can optimize the negative case only if there were no
- characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR
- operate on single-bytes characters only. This is an historical hangover.
- Maybe one day we can tidy these opcodes to handle multi-byte characters.
-
The optimization throws away the bit map. We turn the item into a
1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative.
- Note that OP_NOT[I] does not support multibyte characters. In the positive
- case, it can cause firstchar to be set. Otherwise, there can be no first
- char if this item is first, whatever repeat count may follow. In the case
- of reqchar, save the previous value for reinstating. */
+ In the positive case, it can cause firstchar to be set. Otherwise, there
+ can be no first char if this item is first, whatever repeat count may
+ follow. In the case of reqchar, save the previous value for reinstating. */
-#ifdef SUPPORT_UTF
- if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET
- && (!utf || !negate_class || c < (MAX_VALUE_FOR_SINGLE_CHAR + 1)))
-#else
if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
-#endif
{
ptr++;
zeroreqchar = reqchar;
@@ -4517,7 +4540,12 @@
if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
zerofirstchar = firstchar;
*code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;
- *code++ = c;
+#ifdef SUPPORT_UTF
+ if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
+ code += PRIV(ord2utf)(c, code);
+ else
+#endif
+ *code++ = c;
goto NOT_CHAR;
}
@@ -4775,15 +4803,22 @@
/* Now handle repetition for the different types of item. */
- /* If previous was a character match, abolish the item and generate a
- repeat item instead. If a char item has a minumum of more than one, ensure
- that it is set in reqchar - it might not be if a sequence such as x{3} is
- the first thing in a branch because the x will have gone into firstchar
- instead. */
+ /* If previous was a character or negated character match, abolish the item
+ and generate a repeat item instead. If a char item has a minumum of more
+ than one, ensure that it is set in reqchar - it might not be if a sequence
+ such as x{3} is the first thing in a branch because the x will have gone
+ into firstchar instead. */
- if (*previous == OP_CHAR || *previous == OP_CHARI)
+ if (*previous == OP_CHAR || *previous == OP_CHARI
+ || *previous == OP_NOT || *previous == OP_NOTI)
{
- op_type = (*previous == OP_CHAR)? 0 : OP_STARI - OP_STAR;
+ switch (*previous) {
+ default: /* Make compiler happy. */
+ case OP_CHAR: op_type = OP_STAR - OP_STAR; break;
+ case OP_CHARI: op_type = OP_STARI - OP_STAR; break;
+ case OP_NOT: op_type = OP_NOTSTAR - OP_STAR; break;
+ case OP_NOTI: op_type = OP_NOTSTARI - OP_STAR; break;
+ }
/* Deal with UTF characters that take up more than one character. It's
easier to write this out separately than try to macrify it. Use c to
@@ -4806,7 +4841,8 @@
with UTF disabled, or for a single character UTF character. */
{
c = code[-1];
- if (repeat_min > 1) reqchar = c | req_caseopt | cd->req_varyopt;
+ if (*previous <= OP_CHARI && repeat_min > 1)
+ reqchar = c | req_caseopt | cd->req_varyopt;
}
/* If the repetition is unlimited, it pays to see if the next thing on
@@ -4825,26 +4861,6 @@
goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */
}
- /* If previous was a single negated character ([^a] or similar), we use
- one of the special opcodes, replacing it. The code is shared with single-
- character repeats by setting opt_type to add a suitable offset into
- repeat_type. We can also test for auto-possessification. OP_NOT and OP_NOTI
- are currently used only for single-byte chars. */
-
- else if (*previous == OP_NOT || *previous == OP_NOTI)
- {
- op_type = ((*previous == OP_NOT)? OP_NOTSTAR : OP_NOTSTARI) - OP_STAR;
- c = previous[1];
- if (!possessive_quantifier &&
- repeat_max < 0 &&
- check_auto_possessive(previous, utf, ptr + 1, options, cd))
- {
- repeat_type = 0; /* Force greedy */
- possessive_quantifier = TRUE;
- }
- goto OUTPUT_SINGLE_REPEAT;
- }
-
/* If previous was a character type match (\d or similar), abolish it and
create a suitable repeat item. The code is shared with single-character
repeats by setting op_type to add a suitable offset into repeat_type. Note
Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c 2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/pcre_exec.c 2012-02-22 10:23:56 UTC (rev 924)
@@ -3565,33 +3565,41 @@
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- ecode++;
- GETCHARINCTEST(c, eptr);
- if (op == OP_NOTI) /* The caseless case */
+#ifdef SUPPORT_UTF
+ if (utf)
{
register unsigned int ch, och;
- ch = *ecode++;
-#ifdef COMPILE_PCRE8
- /* ch must be < 128 if UTF is enabled. */
- och = md->fcc[ch];
-#else
-#ifdef SUPPORT_UTF
+
+ ecode++;
+ GETCHARINC(ch, ecode);
+ GETCHARINC(c, eptr);
+
+ if (op == OP_NOT)
+ {
+ if (ch == c) RRETURN(MATCH_NOMATCH);
+ }
+ else
+ {
#ifdef SUPPORT_UCP
- if (utf && ch > 127)
- och = UCD_OTHERCASE(ch);
+ if (ch > 127)
+ och = UCD_OTHERCASE(ch);
#else
- if (utf && ch > 127)
- och = ch;
+ if (ch > 127)
+ och = ch;
#endif /* SUPPORT_UCP */
- else
-#endif /* SUPPORT_UTF */
- och = TABLE_GET(ch, md->fcc, ch);
-#endif /* COMPILE_PCRE8 */
- if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
+ else
+ och = TABLE_GET(ch, md->fcc, ch);
+ if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
+ }
}
- else /* Caseful */
+ else
+#endif
{
- if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
+ register unsigned int ch = ecode[1];
+ c = *eptr++;
+ if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
+ RRETURN(MATCH_NOMATCH);
+ ecode += 2;
}
break;
@@ -3671,7 +3679,7 @@
/* Common code for all repeated single-byte matches. */
REPEATNOTCHAR:
- fc = *ecode++;
+ GETCHARINCTEST(fc, ecode);
/* The code is duplicated for the caseless and caseful cases, for speed,
since matching characters is likely to be quite common. First, ensure the
@@ -3686,10 +3694,6 @@
if (op >= OP_NOTSTARI) /* Caseless */
{
-#ifdef COMPILE_PCRE8
- /* fc must be < 128 if UTF is enabled. */
- foc = md->fcc[fc];
-#else
#ifdef SUPPORT_UTF
#ifdef SUPPORT_UCP
if (utf && fc > 127)
@@ -3701,7 +3705,6 @@
else
#endif /* SUPPORT_UTF */
foc = TABLE_GET(fc, md->fcc, fc);
-#endif /* COMPILE_PCRE8 */
#ifdef SUPPORT_UTF
if (utf)
@@ -3715,7 +3718,7 @@
RRETURN(MATCH_NOMATCH);
}
GETCHARINC(d, eptr);
- if (fc == d || (unsigned int) foc == d) RRETURN(MATCH_NOMATCH);
+ if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
}
}
else
Modified: code/trunk/pcre_jit_compile.c
===================================================================
--- code/trunk/pcre_jit_compile.c 2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/pcre_jit_compile.c 2012-02-22 10:23:56 UTC (rev 924)
@@ -3602,7 +3602,7 @@
add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
}
}
- return cc + 1;
+ return cc + length;
case OP_CLASS:
case OP_NCLASS:
@@ -6652,9 +6652,9 @@
tables = PRIV(default_tables);
memset(&rootfallback, 0, sizeof(fallback_common));
+memset(common, 0, sizeof(compiler_common));
rootfallback.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
-common->compiler = NULL;
common->start = rootfallback.cc;
common->fcc = tables + fcc_offset;
common->lcc = (sljit_w)(tables + lcc_offset);
@@ -6696,22 +6696,6 @@
common->name_table = (sljit_w)((pcre_uchar *)re + re->name_table_offset);
common->name_count = re->name_count;
common->name_entry_size = re->name_entry_size;
-common->partialmatchlabel = NULL;
-common->acceptlabel = NULL;
-common->stubs = NULL;
-common->entries = NULL;
-common->currententry = NULL;
-common->partialmatch = NULL;
-common->accept = NULL;
-common->calllimit = NULL;
-common->stackalloc = NULL;
-common->revertframes = NULL;
-common->wordboundary = NULL;
-common->anynewline = NULL;
-common->hspace = NULL;
-common->vspace = NULL;
-common->casefulcmp = NULL;
-common->caselesscmp = NULL;
common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
#ifdef SUPPORT_UTF
/* PCRE_UTF16 has the same value as PCRE_UTF8. */
@@ -6719,23 +6703,11 @@
#ifdef SUPPORT_UCP
common->use_ucp = (re->options & PCRE_UCP) != 0;
#endif
-common->utfreadchar = NULL;
-#ifdef COMPILE_PCRE8
-common->utfreadtype8 = NULL;
-#endif
#endif /* SUPPORT_UTF */
-#ifdef SUPPORT_UCP
-common->getucd = NULL;
-#endif
ccend = bracketend(rootfallback.cc);
/* Calculate the local space size on the stack. */
common->ovector_start = CALL_LIMIT + sizeof(sljit_w);
-common->req_char_ptr = 0;
-common->recursive_head = 0;
-common->start_used_ptr = 0;
-common->hit_start = 0;
-common->first_line_end = 0;
SLJIT_ASSERT(*rootfallback.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
common->localsize = get_localspace(common, rootfallback.cc, ccend);
@@ -6768,6 +6740,7 @@
if ((common->ovector_start & sizeof(sljit_w)) != 0)
common->ovector_start += sizeof(sljit_w);
+SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
common->cbraptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_w);
common->localsize += common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w);
if (common->localsize > SLJIT_MAX_LOCAL_SIZE)
@@ -7170,7 +7143,12 @@
int
PRIV(jit_get_size)(void *executable_funcs)
{
-return ((executable_functions *)executable_funcs)->executable_sizes[PCRE_STUDY_JIT_COMPILE];
+int i;
+sljit_uw size = 0;
+sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes;
+for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
+ size += executable_sizes[i];
+return (int)size;
}
const char*
Modified: code/trunk/pcre_printint.c
===================================================================
--- code/trunk/pcre_printint.c 2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/pcre_printint.c 2012-02-22 10:23:56 UTC (rev 924)
@@ -477,12 +477,9 @@
flag = "/i";
/* Fall through */
case OP_NOT:
- c = code[1];
- if (PRINTABLE(c)) fprintf(f, " %s [^%c]", flag, c);
- else if (utf || c > 0xff)
- fprintf(f, " %s [^\\x{%02x}]", flag, c);
- else
- fprintf(f, " %s [^\\x%02x]", flag, c);
+ fprintf(f, " %s [^", flag);
+ extra = print_char(f, code + 1, utf);
+ fprintf(f, "]");
break;
case OP_NOTSTARI:
@@ -506,10 +503,9 @@
case OP_NOTQUERY:
case OP_NOTMINQUERY:
case OP_NOTPOSQUERY:
- c = code[1];
- if (PRINTABLE(c)) fprintf(f, " %s [^%c]", flag, c);
- else fprintf(f, " %s [^\\x%02x]", flag, c);
- fprintf(f, "%s", priv_OP_names[*code]);
+ fprintf(f, " %s [^", flag);
+ extra = print_char(f, code + 1, utf);
+ fprintf(f, "]%s", priv_OP_names[*code]);
break;
case OP_NOTEXACTI:
@@ -523,9 +519,9 @@
case OP_NOTUPTO:
case OP_NOTMINUPTO:
case OP_NOTPOSUPTO:
- c = code[1 + IMM2_SIZE];
- if (PRINTABLE(c)) fprintf(f, " %s [^%c]{", flag, c);
- else fprintf(f, " %s [^\\x%02x]{", flag, c);
+ fprintf(f, " %s [^", flag);
+ extra = print_char(f, code + 1 + IMM2_SIZE, utf);
+ fprintf(f, "]{");
if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
fprintf(f, "%d}", GET2(code,1));
if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
Modified: code/trunk/testdata/testinput17
===================================================================
--- code/trunk/testdata/testinput17 2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/testdata/testinput17 2012-02-22 10:23:56 UTC (rev 924)
@@ -272,4 +272,12 @@
/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
+/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/BZ
+
+/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/BZi
+
+/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/BZ
+
+/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/BZi
+
/-- End of testinput17 --/
Modified: code/trunk/testdata/testinput5
===================================================================
--- code/trunk/testdata/testinput5 2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/testdata/testinput5 2012-02-22 10:23:56 UTC (rev 924)
@@ -785,4 +785,12 @@
\r\r\r\P
\r\r\r\P\P
+/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/8BZ
+
+/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/8BZi
+
+/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/8BZ
+
+/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/8BZi
+
/-- End of testinput5 --/
Modified: code/trunk/testdata/testoutput11-16
===================================================================
--- code/trunk/testdata/testoutput11-16 2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/testdata/testoutput11-16 2012-02-22 10:23:56 UTC (rev 924)
@@ -636,7 +636,7 @@
Memory allocation (code space): 14
------------------------------------------------------------------
0 4 Bra
- 2 [^\x{aa}]
+ 2 [^\xaa]
4 4 Ket
6 End
------------------------------------------------------------------
Modified: code/trunk/testdata/testoutput11-8
===================================================================
--- code/trunk/testdata/testoutput11-8 2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/testdata/testoutput11-8 2012-02-22 10:23:56 UTC (rev 924)
@@ -633,12 +633,12 @@
------------------------------------------------------------------
/[^\xaa]/8BM
-Memory allocation (code space): 40
+Memory allocation (code space): 10
------------------------------------------------------------------
- 0 36 Bra
- 3 [\x00-\xa9\xab-\xff] (neg)
- 36 36 Ket
- 39 End
+ 0 6 Bra
+ 3 [^\x{aa}]
+ 6 6 Ket
+ 9 End
------------------------------------------------------------------
/[^\d]/8WB
Modified: code/trunk/testdata/testoutput15
===================================================================
--- code/trunk/testdata/testoutput15 2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/testdata/testoutput15 2012-02-22 10:23:56 UTC (rev 924)
@@ -606,7 +606,7 @@
/[^\xff]/8DZ
------------------------------------------------------------------
Bra
- [\x00-\xfe] (neg)
+ [^\x{ff}]
Ket
End
------------------------------------------------------------------
@@ -868,7 +868,7 @@
/[^\x{c4}]/8DZ
------------------------------------------------------------------
Bra
- [\x00-\xc3\xc5-\xff] (neg)
+ [^\x{c4}]
Ket
End
------------------------------------------------------------------
Modified: code/trunk/testdata/testoutput16
===================================================================
--- code/trunk/testdata/testoutput16 2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/testdata/testoutput16 2012-02-22 10:23:56 UTC (rev 924)
@@ -81,7 +81,7 @@
/[^ⱥ]/8iBZ
------------------------------------------------------------------
Bra
- [^\x{2c65}\x{23a}]
+ /i [^\x{2c65}]
Ket
End
------------------------------------------------------------------
Modified: code/trunk/testdata/testoutput17
===================================================================
--- code/trunk/testdata/testoutput17 2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/testdata/testoutput17 2012-02-22 10:23:56 UTC (rev 924)
@@ -448,4 +448,62 @@
\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
0: \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
+/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/BZ
+------------------------------------------------------------------
+ Bra
+ [^\x80]
+ [^\xff]
+ [^\x{100}]
+ [^\x{1000}]
+ [^\x{ffff}]
+ Ket
+ End
+------------------------------------------------------------------
+
+/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/BZi
+------------------------------------------------------------------
+ Bra
+ /i [^\x80]
+ /i [^\xff]
+ /i [^\x{100}]
+ /i [^\x{1000}]
+ /i [^\x{ffff}]
+ Ket
+ End
+------------------------------------------------------------------
+
+/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/BZ
+------------------------------------------------------------------
+ Bra
+ [^\x{100}]*
+ [^\x{1000}]+
+ [^\x{ffff}]??
+ [^\x{8000}]{4}
+ [^\x{8000}]*
+ [^\x{7fff}]{2}
+ [^\x{7fff}]{0,7}?
+ [^\x{100}]{5}
+ [^\x{100}]?+
+ Ket
+ End
+------------------------------------------------------------------
+
+/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/BZi
+------------------------------------------------------------------
+ Bra
+ /i [^\x{100}]*
+ /i [^\x{1000}]+
+ /i [^\x{ffff}]??
+ /i [^\x{8000}]{4}
+ /i [^\x{8000}]*
+ /i [^\x{7fff}]{2}
+ /i [^\x{7fff}]{0,7}?
+ Once
+ /i [^\x{100}]{5}
+ /i [^\x{100}]?
+ Ket
+ Ket
+ End
+------------------------------------------------------------------
+
/-- End of testinput17 --/
Modified: code/trunk/testdata/testoutput18
===================================================================
--- code/trunk/testdata/testoutput18 2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/testdata/testoutput18 2012-02-22 10:23:56 UTC (rev 924)
@@ -535,7 +535,7 @@
/[^\xff]/8DZ
------------------------------------------------------------------
Bra
- [^\x{ff}]
+ [^\xff]
Ket
End
------------------------------------------------------------------
@@ -798,7 +798,7 @@
/[^\x{c4}]/8DZ
------------------------------------------------------------------
Bra
- [^\x{c4}]
+ [^\xc4]
Ket
End
------------------------------------------------------------------
Modified: code/trunk/testdata/testoutput5
===================================================================
--- code/trunk/testdata/testoutput5 2012-02-21 13:25:05 UTC (rev 923)
+++ code/trunk/testdata/testoutput5 2012-02-22 10:23:56 UTC (rev 924)
@@ -1815,4 +1815,62 @@
\r\r\r\P\P
0: \x{0d}\x{0d}
+/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/8BZ
+------------------------------------------------------------------
+ Bra
+ [^\x{100}]
+ [^\x{1234}]
+ [^\x{ffff}]
+ [^\x{10000}]
+ [^\x{10ffff}]
+ Ket
+ End
+------------------------------------------------------------------
+
+/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/8BZi
+------------------------------------------------------------------
+ Bra
+ /i [^\x{100}]
+ /i [^\x{1234}]
+ /i [^\x{ffff}]
+ /i [^\x{10000}]
+ /i [^\x{10ffff}]
+ Ket
+ End
+------------------------------------------------------------------
+
+/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/8BZ
+------------------------------------------------------------------
+ Bra
+ [^\x{100}]*
+ [^\x{10000}]+
+ [^\x{10ffff}]??
+ [^\x{8000}]{4}
+ [^\x{8000}]*
+ [^\x{7fff}]{2}
+ [^\x{7fff}]{0,7}?
+ [^\x{fffff}]{5}
+ [^\x{fffff}]?+
+ Ket
+ End
+------------------------------------------------------------------
+
+/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/8BZi
+------------------------------------------------------------------
+ Bra
+ /i [^\x{100}]*
+ /i [^\x{10000}]+
+ /i [^\x{10ffff}]??
+ /i [^\x{8000}]{4}
+ /i [^\x{8000}]*
+ /i [^\x{7fff}]{2}
+ /i [^\x{7fff}]{0,7}?
+ Once
+ /i [^\x{fffff}]{5}
+ /i [^\x{fffff}]?
+ Ket
+ Ket
+ End
+------------------------------------------------------------------
+
/-- End of testinput5 --/