Revision: 1260
http://vcs.pcre.org/viewvc?view=rev&revision=1260
Author: ph10
Date: 2013-02-27 15:41:22 +0000 (Wed, 27 Feb 2013)
Log Message:
-----------
Add \p{Xuc} to match characters identifiable by Universal Character Names.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/doc/pcrepattern.3
code/trunk/doc/pcresyntax.3
code/trunk/maint/GenerateUtt.py
code/trunk/pcre_dfa_exec.c
code/trunk/pcre_exec.c
code/trunk/pcre_internal.h
code/trunk/pcre_tables.c
code/trunk/pcre_xclass.c
code/trunk/testdata/testinput10
code/trunk/testdata/testinput7
code/trunk/testdata/testoutput10
code/trunk/testdata/testoutput7
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2013-02-25 08:25:19 UTC (rev 1259)
+++ code/trunk/ChangeLog 2013-02-27 15:41:22 UTC (rev 1260)
@@ -75,6 +75,10 @@
19. Redefined some pcre_uchar variables in pcre_exec.c as pcre_uint32; this
gives some modest performance improvement in 8-bit mode.
+
+20. Added the PCRE-specific property \p{Xuc} for matching characters that can
+ be expressed in certain programming languages using Universal Character
+ Names.
Version 8.32 30-November-2012
Modified: code/trunk/doc/pcrepattern.3
===================================================================
--- code/trunk/doc/pcrepattern.3 2013-02-25 08:25:19 UTC (rev 1259)
+++ code/trunk/doc/pcrepattern.3 2013-02-27 15:41:22 UTC (rev 1260)
@@ -1,4 +1,4 @@
-.TH PCREPATTERN 3 "23 February 2013" "PCRE 8.33"
+.TH PCREPATTERN 3 "27 February 2013" "PCRE 8.33"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "PCRE REGULAR EXPRESSION DETAILS"
@@ -863,7 +863,8 @@
As well as the standard Unicode properties described above, PCRE supports four
more that make it possible to convert traditional escape sequences such as \ew
and \es and POSIX character classes to use Unicode properties. PCRE uses these
-non-standard, non-Perl properties internally when PCRE_UCP is set. They are:
+non-standard, non-Perl properties internally when PCRE_UCP is set. However,
+they may also be used explicitly. These properties are:
.sp
Xan Any alphanumeric character
Xps Any POSIX space character
@@ -875,6 +876,15 @@
carriage return, and any other character that has the Z (separator) property.
Xsp is the same as Xps, except that vertical tab is excluded. Xwd matches the
same characters as Xan, plus underscore.
+.P
+There is another non-standard property, Xuc, which matches any character that
+can be represented by a Universal Character Name in C++ and other programming
+languages. These are the characters $, @, ` (grave accent), and all characters
+with Unicode code points greater than or equal to U+00A0, except for the
+surrogates U+D800 to U+DFFF. Note that most base (ASCII) characters are
+excluded. (Universal Character Names are of the form \euHHHH or \eUHHHHHHHH
+where H is a hexadecimal digit. Note that the Xuc property does not match these
+sequences but the characters that they represent.)
.
.
.\" HTML <a name="resetmatchstart"></a>
@@ -2979,6 +2989,6 @@
.rs
.sp
.nf
-Last updated: 23 February 2013
+Last updated: 27 February 2013
Copyright (c) 1997-2013 University of Cambridge.
.fi
Modified: code/trunk/doc/pcresyntax.3
===================================================================
--- code/trunk/doc/pcresyntax.3 2013-02-25 08:25:19 UTC (rev 1259)
+++ code/trunk/doc/pcresyntax.3 2013-02-27 15:41:22 UTC (rev 1260)
@@ -1,4 +1,4 @@
-.TH PCRESYNTAX 3 "11 November 2012" "PCRE 8.32"
+.TH PCRESYNTAX 3 "27 February 2013" "PCRE 8.33"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "PCRE REGULAR EXPRESSION SYNTAX SUMMARY"
@@ -116,6 +116,8 @@
Xan Alphanumeric: union of properties L and N
Xps POSIX space: property Z or tab, NL, VT, FF, CR
Xsp Perl space: property Z or tab, NL, FF, CR
+ Xuc Univerally-named character: one that can be
+ represented by a Universal Character Name
Xwd Perl word: property Xan or underscore
.
.
@@ -491,6 +493,6 @@
.rs
.sp
.nf
-Last updated: 11 November 2012
-Copyright (c) 1997-2012 University of Cambridge.
+Last updated: 27 February 2013
+Copyright (c) 1997-2013 University of Cambridge.
.fi
Modified: code/trunk/maint/GenerateUtt.py
===================================================================
--- code/trunk/maint/GenerateUtt.py 2013-02-25 08:25:19 UTC (rev 1259)
+++ code/trunk/maint/GenerateUtt.py 2013-02-27 15:41:22 UTC (rev 1260)
@@ -1,6 +1,8 @@
#! /usr/bin/python
-# Generate utt tables.
+# Generate utt tables. Note: this script is written in Python 2 and is
+# incompatible with Python 3. However, the 2to3 conversion script has been
+# successfully tested on it.
# The source file pcre_tables.c contains (amongst other things), a table that
# is indexed by script name. In order to reduce the number of relocations when
@@ -18,6 +20,7 @@
# Modified by PH 30-April-2011 to add new scripts for Unicode 6.0.0
# Modified by ChPe 30-September-2012 to add this note; no other changes were
# necessary for Unicode 6.2.0 support.
+# Modfied by PH 26-February-2013 to add the Xuc special category.
script_names = ['Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal', \
'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian', \
@@ -60,6 +63,7 @@
utt_table.append(('Xan', 'PT_ALNUM'))
utt_table.append(('Xps', 'PT_PXSPACE'))
utt_table.append(('Xsp', 'PT_SPACE'))
+utt_table.append(('Xuc', 'PT_UCNC'))
utt_table.append(('Xwd', 'PT_WORD'))
# Sort the table.
@@ -86,8 +90,8 @@
print 'const char PRIV(utt_names)[] =';
last = ''
for utt in utt_table:
- if utt == utt_table[-1]:
- last = ';'
+ if utt == utt_table[-1]:
+ last = ';'
print ' STRING_%s0%s' % (utt[0].replace('&', '_AMPERSAND'), last)
# This was how it was done before the EBCDIC-compatible modification.
# print ' "%s\\0"%s' % (utt[0], last)
@@ -96,13 +100,13 @@
offset = 0
last = ','
for utt in utt_table:
- if utt[1] in ('PT_ANY', 'PT_LAMP', 'PT_ALNUM', 'PT_PXSPACE',
- 'PT_SPACE', 'PT_WORD'):
- value = '0'
- else:
- value = 'ucp_' + utt[0]
- if utt == utt_table[-1]:
- last = ''
- print ' { %3d, %s, %s }%s' % (offset, utt[1], value, last)
- offset += len(utt[0]) + 1
+ if utt[1] in ('PT_ANY', 'PT_LAMP', 'PT_ALNUM', 'PT_PXSPACE',
+ 'PT_SPACE', 'PT_UCNC', 'PT_WORD'):
+ value = '0'
+ else:
+ value = 'ucp_' + utt[0]
+ if utt == utt_table[-1]:
+ last = ''
+ print ' { %3d, %s, %s }%s' % (offset, utt[1], value, last)
+ offset += len(utt[0]) + 1
print '};'
Modified: code/trunk/pcre_dfa_exec.c
===================================================================
--- code/trunk/pcre_dfa_exec.c 2013-02-25 08:25:19 UTC (rev 1259)
+++ code/trunk/pcre_dfa_exec.c 2013-02-27 15:41:22 UTC (rev 1260)
@@ -1119,6 +1119,12 @@
if (c == *cp++) { OK = TRUE; break; }
}
break;
+
+ case PT_UCNC:
+ OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000;
+ break;
/* Should never occur, but keep compilers from grumbling. */
@@ -1364,6 +1370,12 @@
}
break;
+ case PT_UCNC:
+ OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000;
+ break;
+
/* Should never occur, but keep compilers from grumbling. */
default:
@@ -1602,6 +1614,12 @@
}
break;
+ case PT_UCNC:
+ OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000;
+ break;
+
/* Should never occur, but keep compilers from grumbling. */
default:
@@ -1865,6 +1883,12 @@
}
break;
+ case PT_UCNC:
+ OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000;
+ break;
+
/* Should never occur, but keep compilers from grumbling. */
default:
Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c 2013-02-25 08:25:19 UTC (rev 1259)
+++ code/trunk/pcre_exec.c 2013-02-27 15:41:22 UTC (rev 1260)
@@ -308,7 +308,7 @@
RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
- RM61, RM62, RM63, RM64, RM65, RM66, RM67 };
+ RM61, RM62, RM63, RM64, RM65, RM66, RM67, RM68 };
/* These versions of the macros use the stack, as normal. There are debugging
versions and production versions. Note that the "rw" argument of RMATCH isn't
@@ -2628,6 +2628,13 @@
{ if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
}
break;
+
+ case PT_UCNC:
+ if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000) == (op == OP_NOTPROP))
+ RRETURN(MATCH_NOMATCH);
+ break;
/* This should never occur */
@@ -4246,6 +4253,22 @@
}
}
break;
+
+ case PT_UCNC:
+ for (i = 1; i <= min; i++)
+ {
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ GETCHARINCTEST(c, eptr);
+ if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000) == prop_fail_result)
+ RRETURN(MATCH_NOMATCH);
+ }
+ break;
/* This should not occur */
@@ -4992,6 +5015,25 @@
}
}
/* Control never gets here */
+
+ case PT_UCNC:
+ for (fi = min;; fi++)
+ {
+ RMATCH(eptr, ecode, offset_top, md, eptrb, RM68);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ GETCHARINCTEST(c, eptr);
+ if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000) == prop_fail_result)
+ RRETURN(MATCH_NOMATCH);
+ }
+ /* Control never gets here */
/* This should never occur */
default:
@@ -5487,6 +5529,24 @@
GOT_MAX:
break;
+ case PT_UCNC:
+ for (i = min; i < max; i++)
+ {
+ int len = 1;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ GETCHARLENTEST(c, eptr, len);
+ if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000) == prop_fail_result)
+ break;
+ eptr += len;
+ }
+ break;
+
default:
RRETURN(PCRE_ERROR_INTERNAL);
}
@@ -6128,7 +6188,7 @@
LBL(32) LBL(34) LBL(42) LBL(46)
#ifdef SUPPORT_UCP
LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
- LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
+ LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) LBL(68)
#endif /* SUPPORT_UCP */
#endif /* SUPPORT_UTF */
default:
Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h 2013-02-25 08:25:19 UTC (rev 1259)
+++ code/trunk/pcre_internal.h 2013-02-27 15:41:22 UTC (rev 1260)
@@ -1835,6 +1835,7 @@
#define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */
#define PT_WORD 8 /* Word - L plus N plus underscore */
#define PT_CLIST 9 /* Pseudo-property: match character list */
+#define PT_UCNC 10 /* Universal Character nameable character */
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
contain characters with values greater than 255. */
Modified: code/trunk/pcre_tables.c
===================================================================
--- code/trunk/pcre_tables.c 2013-02-25 08:25:19 UTC (rev 1259)
+++ code/trunk/pcre_tables.c 2013-02-27 15:41:22 UTC (rev 1260)
@@ -346,6 +346,7 @@
#define STRING_Xan0 STR_X STR_a STR_n "\0"
#define STRING_Xps0 STR_X STR_p STR_s "\0"
#define STRING_Xsp0 STR_X STR_s STR_p "\0"
+#define STRING_Xuc0 STR_X STR_u STR_c "\0"
#define STRING_Xwd0 STR_X STR_w STR_d "\0"
#define STRING_Yi0 STR_Y STR_i "\0"
#define STRING_Z0 STR_Z "\0"
@@ -493,6 +494,7 @@
STRING_Xan0
STRING_Xps0
STRING_Xsp0
+ STRING_Xuc0
STRING_Xwd0
STRING_Yi0
STRING_Z0
@@ -640,12 +642,13 @@
{ 1011, PT_ALNUM, 0 },
{ 1015, PT_PXSPACE, 0 },
{ 1019, PT_SPACE, 0 },
- { 1023, PT_WORD, 0 },
- { 1027, PT_SC, ucp_Yi },
- { 1030, PT_GC, ucp_Z },
- { 1032, PT_PC, ucp_Zl },
- { 1035, PT_PC, ucp_Zp },
- { 1038, PT_PC, ucp_Zs }
+ { 1023, PT_UCNC, 0 },
+ { 1027, PT_WORD, 0 },
+ { 1031, PT_SC, ucp_Yi },
+ { 1034, PT_GC, ucp_Z },
+ { 1036, PT_PC, ucp_Zl },
+ { 1039, PT_PC, ucp_Zp },
+ { 1042, PT_PC, ucp_Zs }
};
const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
Modified: code/trunk/pcre_xclass.c
===================================================================
--- code/trunk/pcre_xclass.c 2013-02-25 08:25:19 UTC (rev 1259)
+++ code/trunk/pcre_xclass.c 2013-02-27 15:41:22 UTC (rev 1260)
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -179,6 +179,20 @@
== (t == XCL_PROP))
return !negated;
break;
+
+ case PT_UCNC:
+ if (c < 0xa0)
+ {
+ if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT) == (t == XCL_PROP))
+ return !negated;
+ }
+ else
+ {
+ if ((c < 0xd800 || c > 0xdfff) == (t == XCL_PROP))
+ return !negated;
+ }
+ break;
/* This should never occur, but compilers may mutter if there is no
default. */
Modified: code/trunk/testdata/testinput10
===================================================================
--- code/trunk/testdata/testinput10 2013-02-25 08:25:19 UTC (rev 1259)
+++ code/trunk/testdata/testinput10 2013-02-27 15:41:22 UTC (rev 1260)
@@ -1334,4 +1334,68 @@
/is{2}t/8i
iskt
+/^\p{Xuc}/8
+ $abc
+ @abc
+ `abc
+ \x{1234}abc
+ ** Failers
+ abc
+
+/^\p{Xuc}+/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\p{Xuc}+?/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\p{Xuc}+?\*/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\p{Xuc}++/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\p{Xuc}{3,5}/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\p{Xuc}{3,5}?/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^[\p{Xuc}]/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^[\p{Xuc}]+/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\P{Xuc}/8
+ abc
+ ** Failers
+ $abc
+ @abc
+ `abc
+ \x{1234}abc
+
+/^[\P{Xuc}]/8
+ abc
+ ** Failers
+ $abc
+ @abc
+ `abc
+ \x{1234}abc
+
/-- End of testinput10 --/
Modified: code/trunk/testdata/testinput7
===================================================================
--- code/trunk/testdata/testinput7 2013-02-25 08:25:19 UTC (rev 1259)
+++ code/trunk/testdata/testinput7 2013-02-27 15:41:22 UTC (rev 1260)
@@ -668,5 +668,71 @@
/is{2}t/8i
iskt
+
+/-- This property is a PCRE special --/
+/^\p{Xuc}/8
+ $abc
+ @abc
+ `abc
+ \x{1234}abc
+ ** Failers
+ abc
+
+/^\p{Xuc}+/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\p{Xuc}+?/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\p{Xuc}+?\*/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\p{Xuc}++/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\p{Xuc}{3,5}/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\p{Xuc}{3,5}?/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^[\p{Xuc}]/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^[\p{Xuc}]+/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ ** Failers
+ \x{9f}
+
+/^\P{Xuc}/8
+ abc
+ ** Failers
+ $abc
+ @abc
+ `abc
+ \x{1234}abc
+
+/^[\P{Xuc}]/8
+ abc
+ ** Failers
+ $abc
+ @abc
+ `abc
+ \x{1234}abc
+
/-- End of testinput7 --/
Modified: code/trunk/testdata/testoutput10
===================================================================
--- code/trunk/testdata/testoutput10 2013-02-25 08:25:19 UTC (rev 1259)
+++ code/trunk/testdata/testoutput10 2013-02-27 15:41:22 UTC (rev 1260)
@@ -2723,4 +2723,129 @@
iskt
No match
+/^\p{Xuc}/8
+ $abc
+ 0: $
+ @abc
+ 0: @
+ `abc
+ 0: `
+ \x{1234}abc
+ 0: \x{1234}
+ ** Failers
+No match
+ abc
+No match
+
+/^\p{Xuc}+/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}\x{e000}
+ 1: $@`\x{a0}\x{1234}
+ 2: $@`\x{a0}
+ 3: $@`
+ 4: $@
+ 5: $
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\p{Xuc}+?/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}\x{e000}
+ 1: $@`\x{a0}\x{1234}
+ 2: $@`\x{a0}
+ 3: $@`
+ 4: $@
+ 5: $
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\p{Xuc}+?\*/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}\x{e000}*
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\p{Xuc}++/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}\x{e000}
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\p{Xuc}{3,5}/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}
+ 1: $@`\x{a0}
+ 2: $@`
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\p{Xuc}{3,5}?/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}
+ 1: $@`\x{a0}
+ 2: $@`
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^[\p{Xuc}]/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^[\p{Xuc}]+/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}\x{e000}
+ 1: $@`\x{a0}\x{1234}
+ 2: $@`\x{a0}
+ 3: $@`
+ 4: $@
+ 5: $
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\P{Xuc}/8
+ abc
+ 0: a
+ ** Failers
+ 0: *
+ $abc
+No match
+ @abc
+No match
+ `abc
+No match
+ \x{1234}abc
+No match
+
+/^[\P{Xuc}]/8
+ abc
+ 0: a
+ ** Failers
+ 0: *
+ $abc
+No match
+ @abc
+No match
+ `abc
+No match
+ \x{1234}abc
+No match
+
/-- End of testinput10 --/
Modified: code/trunk/testdata/testoutput7
===================================================================
--- code/trunk/testdata/testoutput7 2013-02-25 08:25:19 UTC (rev 1259)
+++ code/trunk/testdata/testoutput7 2013-02-27 15:41:22 UTC (rev 1260)
@@ -1469,5 +1469,113 @@
/is{2}t/8i
iskt
No match
+
+/-- This property is a PCRE special --/
+/^\p{Xuc}/8
+ $abc
+ 0: $
+ @abc
+ 0: @
+ `abc
+ 0: `
+ \x{1234}abc
+ 0: \x{1234}
+ ** Failers
+No match
+ abc
+No match
+
+/^\p{Xuc}+/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}\x{e000}
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\p{Xuc}+?/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\p{Xuc}+?\*/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}\x{e000}*
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\p{Xuc}++/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}\x{e000}
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\p{Xuc}{3,5}/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\p{Xuc}{3,5}?/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^[\p{Xuc}]/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^[\p{Xuc}]+/8
+ $@`\x{a0}\x{1234}\x{e000}**
+ 0: $@`\x{a0}\x{1234}\x{e000}
+ ** Failers
+No match
+ \x{9f}
+No match
+
+/^\P{Xuc}/8
+ abc
+ 0: a
+ ** Failers
+ 0: *
+ $abc
+No match
+ @abc
+No match
+ `abc
+No match
+ \x{1234}abc
+No match
+
+/^[\P{Xuc}]/8
+ abc
+ 0: a
+ ** Failers
+ 0: *
+ $abc
+No match
+ @abc
+No match
+ `abc
+No match
+ \x{1234}abc
+No match
+
/-- End of testinput7 --/