Revision: 826
http://vcs.pcre.org/viewvc?view=rev&revision=826
Author: zherczeg
Date: 2011-12-27 09:42:33 +0000 (Tue, 27 Dec 2011)
Log Message:
-----------
fix horizontal and vertical white space ranges in 16 bit mode
Modified Paths:
--------------
code/branches/pcre16/pcre_compile.c
code/branches/pcre16/pcre_printint.c
code/branches/pcre16/testdata/testinput14
code/branches/pcre16/testdata/testinput16
code/branches/pcre16/testdata/testinput17
code/branches/pcre16/testdata/testinput19
code/branches/pcre16/testdata/testinput2
code/branches/pcre16/testdata/testinput7
code/branches/pcre16/testdata/testoutput14
code/branches/pcre16/testdata/testoutput16
code/branches/pcre16/testdata/testoutput17
code/branches/pcre16/testdata/testoutput19
code/branches/pcre16/testdata/testoutput2
code/branches/pcre16/testdata/testoutput5
code/branches/pcre16/testdata/testoutput7
Modified: code/branches/pcre16/pcre_compile.c
===================================================================
--- code/branches/pcre16/pcre_compile.c 2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/pcre_compile.c 2011-12-27 09:42:33 UTC (rev 826)
@@ -4023,7 +4023,22 @@
SETBIT(classbits, 0x09); /* VT */
SETBIT(classbits, 0x20); /* SPACE */
SETBIT(classbits, 0xa0); /* NSBP */
-#ifdef SUPPORT_UTF
+#ifndef COMPILE_PCRE8
+ xclass = TRUE;
+ *class_uchardata++ = XCL_SINGLE;
+ *class_uchardata++ = 0x1680;
+ *class_uchardata++ = XCL_SINGLE;
+ *class_uchardata++ = 0x180e;
+ *class_uchardata++ = XCL_RANGE;
+ *class_uchardata++ = 0x2000;
+ *class_uchardata++ = 0x200a;
+ *class_uchardata++ = XCL_SINGLE;
+ *class_uchardata++ = 0x202f;
+ *class_uchardata++ = XCL_SINGLE;
+ *class_uchardata++ = 0x205f;
+ *class_uchardata++ = XCL_SINGLE;
+ *class_uchardata++ = 0x3000;
+#elif defined SUPPORT_UTF
if (utf)
{
xclass = TRUE;
@@ -4033,7 +4048,7 @@
class_uchardata += PRIV(ord2utf)(0x180e, class_uchardata);
*class_uchardata++ = XCL_RANGE;
class_uchardata += PRIV(ord2utf)(0x2000, class_uchardata);
- class_uchardata += PRIV(ord2utf)(0x200A, class_uchardata);
+ class_uchardata += PRIV(ord2utf)(0x200a, class_uchardata);
*class_uchardata++ = XCL_SINGLE;
class_uchardata += PRIV(ord2utf)(0x202f, class_uchardata);
*class_uchardata++ = XCL_SINGLE;
@@ -4057,9 +4072,36 @@
}
classbits[c] |= x;
}
-
+#ifndef COMPILE_PCRE8
+ xclass = TRUE;
+ *class_uchardata++ = XCL_RANGE;
+ *class_uchardata++ = 0x0100;
+ *class_uchardata++ = 0x167f;
+ *class_uchardata++ = XCL_RANGE;
+ *class_uchardata++ = 0x1681;
+ *class_uchardata++ = 0x180d;
+ *class_uchardata++ = XCL_RANGE;
+ *class_uchardata++ = 0x180f;
+ *class_uchardata++ = 0x1fff;
+ *class_uchardata++ = XCL_RANGE;
+ *class_uchardata++ = 0x200b;
+ *class_uchardata++ = 0x202e;
+ *class_uchardata++ = XCL_RANGE;
+ *class_uchardata++ = 0x2030;
+ *class_uchardata++ = 0x205e;
+ *class_uchardata++ = XCL_RANGE;
+ *class_uchardata++ = 0x2060;
+ *class_uchardata++ = 0x2fff;
+ *class_uchardata++ = XCL_RANGE;
+ *class_uchardata++ = 0x3001;
#ifdef SUPPORT_UTF
if (utf)
+ class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
+ else
+#endif
+ *class_uchardata++ = 0xffff;
+#elif defined SUPPORT_UTF
+ if (utf)
{
xclass = TRUE;
*class_uchardata++ = XCL_RANGE;
@@ -4072,7 +4114,7 @@
class_uchardata += PRIV(ord2utf)(0x180f, class_uchardata);
class_uchardata += PRIV(ord2utf)(0x1fff, class_uchardata);
*class_uchardata++ = XCL_RANGE;
- class_uchardata += PRIV(ord2utf)(0x200B, class_uchardata);
+ class_uchardata += PRIV(ord2utf)(0x200b, class_uchardata);
class_uchardata += PRIV(ord2utf)(0x202e, class_uchardata);
*class_uchardata++ = XCL_RANGE;
class_uchardata += PRIV(ord2utf)(0x2030, class_uchardata);
@@ -4093,7 +4135,12 @@
SETBIT(classbits, 0x0c); /* FF */
SETBIT(classbits, 0x0d); /* CR */
SETBIT(classbits, 0x85); /* NEL */
-#ifdef SUPPORT_UTF
+#ifndef COMPILE_PCRE8
+ xclass = TRUE;
+ *class_uchardata++ = XCL_RANGE;
+ *class_uchardata++ = 0x2028;
+ *class_uchardata++ = 0x2029;
+#elif defined SUPPORT_UTF
if (utf)
{
xclass = TRUE;
@@ -4121,15 +4168,28 @@
classbits[c] |= x;
}
+#ifndef COMPILE_PCRE8
+ xclass = TRUE;
+ *class_uchardata++ = XCL_RANGE;
+ *class_uchardata++ = 0x0100;
+ *class_uchardata++ = 0x2027;
+ *class_uchardata++ = XCL_RANGE;
+ *class_uchardata++ = 0x202a;
#ifdef SUPPORT_UTF
if (utf)
+ class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
+ else
+#endif
+ *class_uchardata++ = 0xffff;
+#elif defined SUPPORT_UTF
+ if (utf)
{
xclass = TRUE;
*class_uchardata++ = XCL_RANGE;
class_uchardata += PRIV(ord2utf)(0x0100, class_uchardata);
class_uchardata += PRIV(ord2utf)(0x2027, class_uchardata);
*class_uchardata++ = XCL_RANGE;
- class_uchardata += PRIV(ord2utf)(0x2029, class_uchardata);
+ class_uchardata += PRIV(ord2utf)(0x202a, class_uchardata);
class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
}
#endif
Modified: code/branches/pcre16/pcre_printint.c
===================================================================
--- code/branches/pcre16/pcre_printint.c 2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/pcre_printint.c 2011-12-27 09:42:33 UTC (rev 826)
@@ -114,8 +114,11 @@
int c = *ptr;
#ifndef SUPPORT_UTF
+
(void)utf; /* Avoid compiler warning */
-if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
+if (PRINTABLE(c)) fprintf(f, "%c", c);
+else if (c <= 0xff) fprintf(f, "\\x%02x", c);
+else fprintf(f, "\\x{%x}", c);
return 0;
#else
Modified: code/branches/pcre16/testdata/testinput14
===================================================================
--- code/branches/pcre16/testdata/testinput14 2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testinput14 2011-12-27 09:42:33 UTC (rev 826)
@@ -283,4 +283,26 @@
\) )* # optional trailing comment
/xSI
+/\h/SI
+
+/\v/SI
+
+/\R/SI
+
+/[\h]/BZ
+ >\x09<
+
+/[\h]+/BZ
+ >\x09\x20\xa0<
+
+/[\v]/BZ
+
+/[\H]/BZ
+
+/[^\h]/BZ
+
+/[\V]/BZ
+
+/[\x0a\V]/BZ
+
/-- End of testinput14 --/
Modified: code/branches/pcre16/testdata/testinput16
===================================================================
--- code/branches/pcre16/testdata/testinput16 2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testinput16 2011-12-27 09:42:33 UTC (rev 826)
@@ -30,4 +30,6 @@
/\R/SI
+/[[:blank:]]/WBZ
+
/-- End of testinput16 --/
Modified: code/branches/pcre16/testdata/testinput17
===================================================================
--- code/branches/pcre16/testdata/testinput17 2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testinput17 2011-12-27 09:42:33 UTC (rev 826)
@@ -219,4 +219,20 @@
/\R/SI
+/[\h]/BZ
+ >\x09<
+
+/[\h]+/BZ
+ >\x09\x20\xa0<
+
+/[\v]/BZ
+
+/[\H]/BZ
+
+/[^\h]/BZ
+
+/[\V]/BZ
+
+/[\x0a\V]/BZ
+
/-- End of testinput17 --/
Modified: code/branches/pcre16/testdata/testinput19
===================================================================
--- code/branches/pcre16/testdata/testinput19 2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testinput19 2011-12-27 09:42:33 UTC (rev 826)
@@ -17,4 +17,6 @@
/[^ⱥ]/8iBZ
+/[[:blank:]]/WBZ
+
/-- End of testinput19 --/
Modified: code/branches/pcre16/testdata/testinput2
===================================================================
--- code/branches/pcre16/testdata/testinput2 2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testinput2 2011-12-27 09:42:33 UTC (rev 826)
@@ -2173,22 +2173,6 @@
xabcpqrx
xxyzx
-/[\h]/BZ
- >\x09<
-
-/[\h]+/BZ
- >\x09\x20\xa0<
-
-/[\v]/BZ
-
-/[\H]/BZ
-
-/[^\h]/BZ
-
-/[\V]/BZ
-
-/[\x0a\V]/BZ
-
/\H++X/BZ
** Failers
XXXX
Modified: code/branches/pcre16/testdata/testinput7
===================================================================
--- code/branches/pcre16/testdata/testinput7 2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testinput7 2011-12-27 09:42:33 UTC (rev 826)
@@ -397,8 +397,6 @@
/[[:ascii:]]/WBZ
-/[[:blank:]]/WBZ
-
/[[:cntrl:]]/WBZ
/[[:digit:]]/WBZ
Modified: code/branches/pcre16/testdata/testoutput14
===================================================================
--- code/branches/pcre16/testdata/testoutput14 2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testoutput14 2011-12-27 09:42:33 UTC (rev 826)
@@ -355,4 +355,88 @@
9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e
f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
+/\h/SI
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+Subject length lower bound = 1
+Starting byte set: \x09 \x20 \xa0
+
+/\v/SI
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+Subject length lower bound = 1
+Starting byte set: \x0a \x0b \x0c \x0d \x85
+
+/\R/SI
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+Subject length lower bound = 1
+Starting byte set: \x0a \x0b \x0c \x0d \x85
+
+/[\h]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x09 \xa0]
+ Ket
+ End
+------------------------------------------------------------------
+ >\x09<
+ 0: \x09
+
+/[\h]+/BZ
+------------------------------------------------------------------
+ Bra
+ [\x09 \xa0]+
+ Ket
+ End
+------------------------------------------------------------------
+ >\x09\x20\xa0<
+ 0: \x09 \xa0
+
+/[\v]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x0a-\x0d\x85]
+ Ket
+ End
+------------------------------------------------------------------
+
+/[\H]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff]
+ Ket
+ End
+------------------------------------------------------------------
+
+/[^\h]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff] (neg)
+ Ket
+ End
+------------------------------------------------------------------
+
+/[\V]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x00-\x09\x0e-\x84\x86-\xff]
+ Ket
+ End
+------------------------------------------------------------------
+
+/[\x0a\V]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x00-\x0a\x0e-\x84\x86-\xff]
+ Ket
+ End
+------------------------------------------------------------------
+
/-- End of testinput14 --/
Modified: code/branches/pcre16/testdata/testoutput16
===================================================================
--- code/branches/pcre16/testdata/testoutput16 2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testoutput16 2011-12-27 09:42:33 UTC (rev 826)
@@ -110,4 +110,12 @@
Subject length lower bound = 1
Starting byte set: \x0a \x0b \x0c \x0d \x85
+/[[:blank:]]/WBZ
+------------------------------------------------------------------
+ Bra
+ [\x09 \xa0]
+ Ket
+ End
+------------------------------------------------------------------
+
/-- End of testinput16 --/
Modified: code/branches/pcre16/testdata/testoutput17
===================================================================
--- code/branches/pcre16/testdata/testoutput17 2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testoutput17 2011-12-27 09:42:33 UTC (rev 826)
@@ -268,4 +268,64 @@
Subject length lower bound = 1
Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff
+/[\h]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
+ Ket
+ End
+------------------------------------------------------------------
+ >\x09<
+ 0: \x09
+
+/[\h]+/BZ
+------------------------------------------------------------------
+ Bra
+ [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]+
+ Ket
+ End
+------------------------------------------------------------------
+ >\x09\x20\xa0<
+ 0: \x09 \xa0
+
+/[\v]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x0a-\x0d\x85\x{2028}-\x{2029}]
+ Ket
+ End
+------------------------------------------------------------------
+
+/[\H]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}]
+ Ket
+ End
+------------------------------------------------------------------
+
+/[^\h]/BZ
+------------------------------------------------------------------
+ Bra
+ [^\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
+ Ket
+ End
+------------------------------------------------------------------
+
+/[\V]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffff}]
+ Ket
+ End
+------------------------------------------------------------------
+
+/[\x0a\V]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x00-\x0a\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffff}]
+ Ket
+ End
+------------------------------------------------------------------
+
/-- End of testinput17 --/
Modified: code/branches/pcre16/testdata/testoutput19
===================================================================
--- code/branches/pcre16/testdata/testoutput19 2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testoutput19 2011-12-27 09:42:33 UTC (rev 826)
@@ -77,4 +77,12 @@
End
------------------------------------------------------------------
+/[[:blank:]]/WBZ
+------------------------------------------------------------------
+ Bra
+ [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
+ Ket
+ End
+------------------------------------------------------------------
+
/-- End of testinput19 --/
Modified: code/branches/pcre16/testdata/testoutput2
===================================================================
--- code/branches/pcre16/testdata/testoutput2 2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testoutput2 2011-12-27 09:42:33 UTC (rev 826)
@@ -8383,66 +8383,6 @@
3: <unset>
4: x
-/[\h]/BZ
-------------------------------------------------------------------
- Bra
- [\x09 \xa0]
- Ket
- End
-------------------------------------------------------------------
- >\x09<
- 0: \x09
-
-/[\h]+/BZ
-------------------------------------------------------------------
- Bra
- [\x09 \xa0]+
- Ket
- End
-------------------------------------------------------------------
- >\x09\x20\xa0<
- 0: \x09 \xa0
-
-/[\v]/BZ
-------------------------------------------------------------------
- Bra
- [\x0a-\x0d\x85]
- Ket
- End
-------------------------------------------------------------------
-
-/[\H]/BZ
-------------------------------------------------------------------
- Bra
- [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff]
- Ket
- End
-------------------------------------------------------------------
-
-/[^\h]/BZ
-------------------------------------------------------------------
- Bra
- [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff] (neg)
- Ket
- End
-------------------------------------------------------------------
-
-/[\V]/BZ
-------------------------------------------------------------------
- Bra
- [\x00-\x09\x0e-\x84\x86-\xff]
- Ket
- End
-------------------------------------------------------------------
-
-/[\x0a\V]/BZ
-------------------------------------------------------------------
- Bra
- [\x00-\x0a\x0e-\x84\x86-\xff]
- Ket
- End
-------------------------------------------------------------------
-
/\H++X/BZ
------------------------------------------------------------------
Bra
Modified: code/branches/pcre16/testdata/testoutput5
===================================================================
--- code/branches/pcre16/testdata/testoutput5 2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testoutput5 2011-12-27 09:42:33 UTC (rev 826)
@@ -797,7 +797,7 @@
/[\V]/8BZ
------------------------------------------------------------------
Bra
- [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{2029}-\x{10ffff}]
+ [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}]
Ket
End
------------------------------------------------------------------
Modified: code/branches/pcre16/testdata/testoutput7
===================================================================
--- code/branches/pcre16/testdata/testoutput7 2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testoutput7 2011-12-27 09:42:33 UTC (rev 826)
@@ -815,14 +815,6 @@
End
------------------------------------------------------------------
-/[[:blank:]]/WBZ
-------------------------------------------------------------------
- Bra
- [\x09 \xa0]
- Ket
- End
-------------------------------------------------------------------
-
/[[:cntrl:]]/WBZ
------------------------------------------------------------------
Bra