[Pcre-svn] [826] code/branches/pcre16: fix horizontal and ve…

Startseite
Nachricht löschen
Autor: Subversion repository
Datum:  
To: pcre-svn
Betreff: [Pcre-svn] [826] code/branches/pcre16: fix horizontal and vertical white space ranges in 16 bit mode
Revision: 826
          http://vcs.pcre.org/viewvc?view=rev&revision=826
Author:   zherczeg
Date:     2011-12-27 09:42:33 +0000 (Tue, 27 Dec 2011)


Log Message:
-----------
fix horizontal and vertical white space ranges in 16 bit mode

Modified Paths:
--------------
    code/branches/pcre16/pcre_compile.c
    code/branches/pcre16/pcre_printint.c
    code/branches/pcre16/testdata/testinput14
    code/branches/pcre16/testdata/testinput16
    code/branches/pcre16/testdata/testinput17
    code/branches/pcre16/testdata/testinput19
    code/branches/pcre16/testdata/testinput2
    code/branches/pcre16/testdata/testinput7
    code/branches/pcre16/testdata/testoutput14
    code/branches/pcre16/testdata/testoutput16
    code/branches/pcre16/testdata/testoutput17
    code/branches/pcre16/testdata/testoutput19
    code/branches/pcre16/testdata/testoutput2
    code/branches/pcre16/testdata/testoutput5
    code/branches/pcre16/testdata/testoutput7


Modified: code/branches/pcre16/pcre_compile.c
===================================================================
--- code/branches/pcre16/pcre_compile.c    2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/pcre_compile.c    2011-12-27 09:42:33 UTC (rev 826)
@@ -4023,7 +4023,22 @@
             SETBIT(classbits, 0x09); /* VT */
             SETBIT(classbits, 0x20); /* SPACE */
             SETBIT(classbits, 0xa0); /* NSBP */
-#ifdef SUPPORT_UTF
+#ifndef COMPILE_PCRE8
+            xclass = TRUE;
+            *class_uchardata++ = XCL_SINGLE;
+            *class_uchardata++ = 0x1680;
+            *class_uchardata++ = XCL_SINGLE;
+            *class_uchardata++ = 0x180e;
+            *class_uchardata++ = XCL_RANGE;
+            *class_uchardata++ = 0x2000;
+            *class_uchardata++ = 0x200a;
+            *class_uchardata++ = XCL_SINGLE;
+            *class_uchardata++ = 0x202f;
+            *class_uchardata++ = XCL_SINGLE;
+            *class_uchardata++ = 0x205f;
+            *class_uchardata++ = XCL_SINGLE;
+            *class_uchardata++ = 0x3000;
+#elif defined SUPPORT_UTF
             if (utf)
               {
               xclass = TRUE;
@@ -4033,7 +4048,7 @@
               class_uchardata += PRIV(ord2utf)(0x180e, class_uchardata);
               *class_uchardata++ = XCL_RANGE;
               class_uchardata += PRIV(ord2utf)(0x2000, class_uchardata);
-              class_uchardata += PRIV(ord2utf)(0x200A, class_uchardata);
+              class_uchardata += PRIV(ord2utf)(0x200a, class_uchardata);
               *class_uchardata++ = XCL_SINGLE;
               class_uchardata += PRIV(ord2utf)(0x202f, class_uchardata);
               *class_uchardata++ = XCL_SINGLE;
@@ -4057,9 +4072,36 @@
                 }
               classbits[c] |= x;
               }
-
+#ifndef COMPILE_PCRE8
+            xclass = TRUE;
+            *class_uchardata++ = XCL_RANGE;
+            *class_uchardata++ = 0x0100;
+            *class_uchardata++ = 0x167f;
+            *class_uchardata++ = XCL_RANGE;
+            *class_uchardata++ = 0x1681;
+            *class_uchardata++ = 0x180d;
+            *class_uchardata++ = XCL_RANGE;
+            *class_uchardata++ = 0x180f;
+            *class_uchardata++ = 0x1fff;
+            *class_uchardata++ = XCL_RANGE;
+            *class_uchardata++ = 0x200b;
+            *class_uchardata++ = 0x202e;
+            *class_uchardata++ = XCL_RANGE;
+            *class_uchardata++ = 0x2030;
+            *class_uchardata++ = 0x205e;
+            *class_uchardata++ = XCL_RANGE;
+            *class_uchardata++ = 0x2060;
+            *class_uchardata++ = 0x2fff;
+            *class_uchardata++ = XCL_RANGE;
+            *class_uchardata++ = 0x3001;
 #ifdef SUPPORT_UTF
             if (utf)
+              class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
+            else
+#endif
+              *class_uchardata++ = 0xffff;
+#elif defined SUPPORT_UTF
+            if (utf)
               {
               xclass = TRUE;
               *class_uchardata++ = XCL_RANGE;
@@ -4072,7 +4114,7 @@
               class_uchardata += PRIV(ord2utf)(0x180f, class_uchardata);
               class_uchardata += PRIV(ord2utf)(0x1fff, class_uchardata);
               *class_uchardata++ = XCL_RANGE;
-              class_uchardata += PRIV(ord2utf)(0x200B, class_uchardata);
+              class_uchardata += PRIV(ord2utf)(0x200b, class_uchardata);
               class_uchardata += PRIV(ord2utf)(0x202e, class_uchardata);
               *class_uchardata++ = XCL_RANGE;
               class_uchardata += PRIV(ord2utf)(0x2030, class_uchardata);
@@ -4093,7 +4135,12 @@
             SETBIT(classbits, 0x0c); /* FF */
             SETBIT(classbits, 0x0d); /* CR */
             SETBIT(classbits, 0x85); /* NEL */
-#ifdef SUPPORT_UTF
+#ifndef COMPILE_PCRE8
+            xclass = TRUE;
+            *class_uchardata++ = XCL_RANGE;
+            *class_uchardata++ = 0x2028;
+            *class_uchardata++ = 0x2029;
+#elif defined SUPPORT_UTF
             if (utf)
               {
               xclass = TRUE;
@@ -4121,15 +4168,28 @@
               classbits[c] |= x;
               }


+#ifndef COMPILE_PCRE8
+            xclass = TRUE;
+            *class_uchardata++ = XCL_RANGE;
+            *class_uchardata++ = 0x0100;
+            *class_uchardata++ = 0x2027;
+            *class_uchardata++ = XCL_RANGE;
+            *class_uchardata++ = 0x202a;
 #ifdef SUPPORT_UTF
             if (utf)
+              class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
+            else
+#endif
+              *class_uchardata++ = 0xffff;
+#elif defined SUPPORT_UTF
+            if (utf)
               {
               xclass = TRUE;
               *class_uchardata++ = XCL_RANGE;
               class_uchardata += PRIV(ord2utf)(0x0100, class_uchardata);
               class_uchardata += PRIV(ord2utf)(0x2027, class_uchardata);
               *class_uchardata++ = XCL_RANGE;
-              class_uchardata += PRIV(ord2utf)(0x2029, class_uchardata);
+              class_uchardata += PRIV(ord2utf)(0x202a, class_uchardata);
               class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
               }
 #endif


Modified: code/branches/pcre16/pcre_printint.c
===================================================================
--- code/branches/pcre16/pcre_printint.c    2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/pcre_printint.c    2011-12-27 09:42:33 UTC (rev 826)
@@ -114,8 +114,11 @@
 int c = *ptr;


#ifndef SUPPORT_UTF
+
(void)utf; /* Avoid compiler warning */
-if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
+if (PRINTABLE(c)) fprintf(f, "%c", c);
+else if (c <= 0xff) fprintf(f, "\\x%02x", c);
+else fprintf(f, "\\x{%x}", c);
return 0;

#else

Modified: code/branches/pcre16/testdata/testinput14
===================================================================
--- code/branches/pcre16/testdata/testinput14    2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testinput14    2011-12-27 09:42:33 UTC (rev 826)
@@ -283,4 +283,26 @@
 \)  )*                       # optional trailing comment
 /xSI


+/\h/SI
+
+/\v/SI
+
+/\R/SI
+
+/[\h]/BZ
+    >\x09<
+
+/[\h]+/BZ
+    >\x09\x20\xa0<
+
+/[\v]/BZ
+
+/[\H]/BZ
+
+/[^\h]/BZ
+
+/[\V]/BZ
+
+/[\x0a\V]/BZ
+
 /-- End of testinput14 --/


Modified: code/branches/pcre16/testdata/testinput16
===================================================================
--- code/branches/pcre16/testdata/testinput16    2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testinput16    2011-12-27 09:42:33 UTC (rev 826)
@@ -30,4 +30,6 @@


/\R/SI

+/[[:blank:]]/WBZ
+
/-- End of testinput16 --/

Modified: code/branches/pcre16/testdata/testinput17
===================================================================
--- code/branches/pcre16/testdata/testinput17    2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testinput17    2011-12-27 09:42:33 UTC (rev 826)
@@ -219,4 +219,20 @@


/\R/SI

+/[\h]/BZ
+    >\x09<
+
+/[\h]+/BZ
+    >\x09\x20\xa0<
+
+/[\v]/BZ
+
+/[\H]/BZ
+
+/[^\h]/BZ
+
+/[\V]/BZ
+
+/[\x0a\V]/BZ
+
 /-- End of testinput17 --/


Modified: code/branches/pcre16/testdata/testinput19
===================================================================
--- code/branches/pcre16/testdata/testinput19    2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testinput19    2011-12-27 09:42:33 UTC (rev 826)
@@ -17,4 +17,6 @@


/[^ⱥ]/8iBZ

+/[[:blank:]]/WBZ
+
/-- End of testinput19 --/

Modified: code/branches/pcre16/testdata/testinput2
===================================================================
--- code/branches/pcre16/testdata/testinput2    2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testinput2    2011-12-27 09:42:33 UTC (rev 826)
@@ -2173,22 +2173,6 @@
     xabcpqrx
     xxyzx 


-/[\h]/BZ
-    >\x09<
-
-/[\h]+/BZ
-    >\x09\x20\xa0<
-
-/[\v]/BZ
-
-/[\H]/BZ
-
-/[^\h]/BZ
-
-/[\V]/BZ
-
-/[\x0a\V]/BZ
-
 /\H++X/BZ
     ** Failers
     XXXX


Modified: code/branches/pcre16/testdata/testinput7
===================================================================
--- code/branches/pcre16/testdata/testinput7    2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testinput7    2011-12-27 09:42:33 UTC (rev 826)
@@ -397,8 +397,6 @@


/[[:ascii:]]/WBZ

-/[[:blank:]]/WBZ
-
/[[:cntrl:]]/WBZ

/[[:digit:]]/WBZ

Modified: code/branches/pcre16/testdata/testoutput14
===================================================================
--- code/branches/pcre16/testdata/testoutput14    2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testoutput14    2011-12-27 09:42:33 UTC (rev 826)
@@ -355,4 +355,88 @@
   9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e 
   f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f 


+/\h/SI
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+Subject length lower bound = 1
+Starting byte set: \x09 \x20 \xa0 
+
+/\v/SI
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+Subject length lower bound = 1
+Starting byte set: \x0a \x0b \x0c \x0d \x85 
+
+/\R/SI
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+Subject length lower bound = 1
+Starting byte set: \x0a \x0b \x0c \x0d \x85 
+
+/[\h]/BZ
+------------------------------------------------------------------
+        Bra
+        [\x09 \xa0]
+        Ket
+        End
+------------------------------------------------------------------
+    >\x09<
+ 0: \x09
+
+/[\h]+/BZ
+------------------------------------------------------------------
+        Bra
+        [\x09 \xa0]+
+        Ket
+        End
+------------------------------------------------------------------
+    >\x09\x20\xa0<
+ 0: \x09 \xa0
+
+/[\v]/BZ
+------------------------------------------------------------------
+        Bra
+        [\x0a-\x0d\x85]
+        Ket
+        End
+------------------------------------------------------------------
+
+/[\H]/BZ
+------------------------------------------------------------------
+        Bra
+        [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff]
+        Ket
+        End
+------------------------------------------------------------------
+
+/[^\h]/BZ
+------------------------------------------------------------------
+        Bra
+        [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff] (neg)
+        Ket
+        End
+------------------------------------------------------------------
+
+/[\V]/BZ
+------------------------------------------------------------------
+        Bra
+        [\x00-\x09\x0e-\x84\x86-\xff]
+        Ket
+        End
+------------------------------------------------------------------
+
+/[\x0a\V]/BZ
+------------------------------------------------------------------
+        Bra
+        [\x00-\x0a\x0e-\x84\x86-\xff]
+        Ket
+        End
+------------------------------------------------------------------
+
 /-- End of testinput14 --/


Modified: code/branches/pcre16/testdata/testoutput16
===================================================================
--- code/branches/pcre16/testdata/testoutput16    2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testoutput16    2011-12-27 09:42:33 UTC (rev 826)
@@ -110,4 +110,12 @@
 Subject length lower bound = 1
 Starting byte set: \x0a \x0b \x0c \x0d \x85 


+/[[:blank:]]/WBZ
+------------------------------------------------------------------
+        Bra
+        [\x09 \xa0]
+        Ket
+        End
+------------------------------------------------------------------
+
 /-- End of testinput16 --/


Modified: code/branches/pcre16/testdata/testoutput17
===================================================================
--- code/branches/pcre16/testdata/testoutput17    2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testoutput17    2011-12-27 09:42:33 UTC (rev 826)
@@ -268,4 +268,64 @@
 Subject length lower bound = 1
 Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff 


+/[\h]/BZ
+------------------------------------------------------------------
+        Bra
+        [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
+        Ket
+        End
+------------------------------------------------------------------
+    >\x09<
+ 0: \x09
+
+/[\h]+/BZ
+------------------------------------------------------------------
+        Bra
+        [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]+
+        Ket
+        End
+------------------------------------------------------------------
+    >\x09\x20\xa0<
+ 0: \x09 \xa0
+
+/[\v]/BZ
+------------------------------------------------------------------
+        Bra
+        [\x0a-\x0d\x85\x{2028}-\x{2029}]
+        Ket
+        End
+------------------------------------------------------------------
+
+/[\H]/BZ
+------------------------------------------------------------------
+        Bra
+        [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}]
+        Ket
+        End
+------------------------------------------------------------------
+
+/[^\h]/BZ
+------------------------------------------------------------------
+        Bra
+        [^\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
+        Ket
+        End
+------------------------------------------------------------------
+
+/[\V]/BZ
+------------------------------------------------------------------
+        Bra
+        [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffff}]
+        Ket
+        End
+------------------------------------------------------------------
+
+/[\x0a\V]/BZ
+------------------------------------------------------------------
+        Bra
+        [\x00-\x0a\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffff}]
+        Ket
+        End
+------------------------------------------------------------------
+
 /-- End of testinput17 --/


Modified: code/branches/pcre16/testdata/testoutput19
===================================================================
--- code/branches/pcre16/testdata/testoutput19    2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testoutput19    2011-12-27 09:42:33 UTC (rev 826)
@@ -77,4 +77,12 @@
         End
 ------------------------------------------------------------------


+/[[:blank:]]/WBZ
+------------------------------------------------------------------
+        Bra
+        [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
+        Ket
+        End
+------------------------------------------------------------------
+
 /-- End of testinput19 --/ 


Modified: code/branches/pcre16/testdata/testoutput2
===================================================================
--- code/branches/pcre16/testdata/testoutput2    2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testoutput2    2011-12-27 09:42:33 UTC (rev 826)
@@ -8383,66 +8383,6 @@
  3: <unset>
  4: x


-/[\h]/BZ
-------------------------------------------------------------------
-        Bra
-        [\x09 \xa0]
-        Ket
-        End
-------------------------------------------------------------------
-    >\x09<
- 0: \x09
-
-/[\h]+/BZ
-------------------------------------------------------------------
-        Bra
-        [\x09 \xa0]+
-        Ket
-        End
-------------------------------------------------------------------
-    >\x09\x20\xa0<
- 0: \x09 \xa0
-
-/[\v]/BZ
-------------------------------------------------------------------
-        Bra
-        [\x0a-\x0d\x85]
-        Ket
-        End
-------------------------------------------------------------------
-
-/[\H]/BZ
-------------------------------------------------------------------
-        Bra
-        [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff]
-        Ket
-        End
-------------------------------------------------------------------
-
-/[^\h]/BZ
-------------------------------------------------------------------
-        Bra
-        [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff] (neg)
-        Ket
-        End
-------------------------------------------------------------------
-
-/[\V]/BZ
-------------------------------------------------------------------
-        Bra
-        [\x00-\x09\x0e-\x84\x86-\xff]
-        Ket
-        End
-------------------------------------------------------------------
-
-/[\x0a\V]/BZ
-------------------------------------------------------------------
-        Bra
-        [\x00-\x0a\x0e-\x84\x86-\xff]
-        Ket
-        End
-------------------------------------------------------------------
-
 /\H++X/BZ
 ------------------------------------------------------------------
         Bra


Modified: code/branches/pcre16/testdata/testoutput5
===================================================================
--- code/branches/pcre16/testdata/testoutput5    2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testoutput5    2011-12-27 09:42:33 UTC (rev 826)
@@ -797,7 +797,7 @@
 /[\V]/8BZ
 ------------------------------------------------------------------
         Bra
-        [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{2029}-\x{10ffff}]
+        [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}]
         Ket
         End
 ------------------------------------------------------------------


Modified: code/branches/pcre16/testdata/testoutput7
===================================================================
--- code/branches/pcre16/testdata/testoutput7    2011-12-26 21:23:17 UTC (rev 825)
+++ code/branches/pcre16/testdata/testoutput7    2011-12-27 09:42:33 UTC (rev 826)
@@ -815,14 +815,6 @@
         End
 ------------------------------------------------------------------


-/[[:blank:]]/WBZ
-------------------------------------------------------------------
-        Bra
-        [\x09 \xa0]
-        Ket
-        End
-------------------------------------------------------------------
-
 /[[:cntrl:]]/WBZ
 ------------------------------------------------------------------
         Bra