[Pcre-svn] [1056] code/trunk: pcre32: compile: Fix \H and \V…

Startseite
Nachricht löschen
Autor: Subversion repository
Datum:  
To: pcre-svn
Betreff: [Pcre-svn] [1056] code/trunk: pcre32: compile: Fix \H and \V character ranges for pcre32
Revision: 1056
          http://vcs.pcre.org/viewvc?view=rev&revision=1056
Author:   chpe
Date:     2012-10-16 16:53:41 +0100 (Tue, 16 Oct 2012)


Log Message:
-----------
pcre32: compile: Fix \H and \V character ranges for pcre32

Go up to 0xffffffff, and move the tests to the split 16- and 32-bit
tests because the output differs.

TODO: these character ranges look rather odd for non-UTF mode... bug?

Modified Paths:
--------------
    code/trunk/pcre_compile.c
    code/trunk/testdata/testinput17
    code/trunk/testdata/testinput23
    code/trunk/testdata/testinput25
    code/trunk/testdata/testoutput17
    code/trunk/testdata/testoutput23
    code/trunk/testdata/testoutput25


Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c    2012-10-16 15:53:30 UTC (rev 1055)
+++ code/trunk/pcre_compile.c    2012-10-16 15:53:41 UTC (rev 1056)
@@ -3421,9 +3421,9 @@


static int
add_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr, int options,
- compile_data *cd, unsigned int start, unsigned int end)
+ compile_data *cd, pcre_uint32 start, pcre_uint32 end)
{
-unsigned int c;
+pcre_uint32 c;
int n8 = 0;

 /* If caseless matching is required, scan the range and process alternate 
@@ -3437,7 +3437,7 @@
   if ((options & PCRE_UTF8) != 0)
     { 
     int rc; 
-    unsigned int oc, od;
+    pcre_uint32 oc, od;


     options &= ~PCRE_CASELESS;   /* Remove for recursive calls */
     c = start;
@@ -3490,12 +3490,7 @@
 #endif
   if (end > 0xffff) end = 0xffff;


-#elif defined COMPILE_PCRE32
-#ifdef SUPPORT_UTF
-  if ((options & PCRE_UTF32) == 0)
-    if (end > 0xffffu) end = 0xffffu; // FIXMEchpe rebase fix this
-#endif
-#endif /* COMPILE_PCRE[8|16|32] */
+#endif /* COMPILE_PCRE[8|16] */


 /* If all characters are less than 256, use the bit map. Otherwise use extra
 data. */
@@ -3625,14 +3620,15 @@
 add_not_list_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr, 
   int options, compile_data *cd, const pcre_uint32 *p)
 {
+BOOL utf = (options & PCRE_UTF8) != 0;
 int n8 = 0;
 if (p[0] > 0)
   n8 += add_to_class(classbits, uchardptr, options, cd, 0, p[0] - 1);
 while (p[0] < NOTACHAR)
   {
   while (p[1] == p[0] + 1) p++;
-  n8 += add_to_class(classbits, uchardptr, options, cd, p[0] + 1, 
-    (p[1] == NOTACHAR)? 0x10ffff : p[1] - 1);
+  n8 += add_to_class(classbits, uchardptr, options, cd, p[0] + 1,
+    (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1);
   p++; 
   } 
 return n8;


Modified: code/trunk/testdata/testinput17
===================================================================
--- code/trunk/testdata/testinput17    2012-10-16 15:53:30 UTC (rev 1055)
+++ code/trunk/testdata/testinput17    2012-10-16 15:53:41 UTC (rev 1056)
@@ -214,14 +214,8 @@


/[\v]/BZ

-/[\H]/BZ
-
/[^\h]/BZ

-/[\V]/BZ
-
-/[\x0a\V]/BZ
-
 /\h+/SI
     \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
     \x{3001}\x{2fff}\x{200a}\xa0\x{2000}
@@ -236,7 +230,7 @@
     \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
     \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}


-/[\H\x{d800}]+/BZSI
+/[\H\x{d800}]+/
     \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
     \x{2000}\x{200a}\x{1fff}\x{200b}
     \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
@@ -254,7 +248,7 @@
     \x{2028}\x{2029}\x{2027}\x{2030}
     \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86


-/[\V\x{d800}]+/BZSI
+/[\V\x{d800}]+/
     \x{2028}\x{2029}\x{2027}\x{2030}
     \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86



Modified: code/trunk/testdata/testinput23
===================================================================
--- code/trunk/testdata/testinput23    2012-10-16 15:53:30 UTC (rev 1055)
+++ code/trunk/testdata/testinput23    2012-10-16 15:53:41 UTC (rev 1056)
@@ -7,4 +7,10 @@


/\x{10000}/

+/-- Check character ranges --/
+
+/[\H]/BZSI
+
+/[\V]/BZSI
+
/-- End of testinput23 --/

Modified: code/trunk/testdata/testinput25
===================================================================
--- code/trunk/testdata/testinput25    2012-10-16 15:53:30 UTC (rev 1055)
+++ code/trunk/testdata/testinput25    2012-10-16 15:53:41 UTC (rev 1056)
@@ -20,4 +20,10 @@


/\x{400000}\x{800000}/iDZ

+/-- Check character ranges --/
+
+/[\H]/BZSI
+
+/[\V]/BZSI
+
/-- End of testinput25 --/

Modified: code/trunk/testdata/testoutput17
===================================================================
--- code/trunk/testdata/testoutput17    2012-10-16 15:53:30 UTC (rev 1055)
+++ code/trunk/testdata/testoutput17    2012-10-16 15:53:41 UTC (rev 1056)
@@ -259,14 +259,6 @@
         End
 ------------------------------------------------------------------


-/[\H]/BZ
-------------------------------------------------------------------
-        Bra
-        [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}]
-        Ket
-        End
-------------------------------------------------------------------
-
 /[^\h]/BZ
 ------------------------------------------------------------------
         Bra
@@ -275,22 +267,6 @@
         End
 ------------------------------------------------------------------


-/[\V]/BZ
-------------------------------------------------------------------
-        Bra
-        [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{ffff}]
-        Ket
-        End
-------------------------------------------------------------------
-
-/[\x0a\V]/BZ
-------------------------------------------------------------------
-        Bra
-        [\x00-\x0a\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{ffff}]
-        Ket
-        End
-------------------------------------------------------------------
-
 /\h+/SI
 Capturing subpattern count = 0
 No options
@@ -337,19 +313,7 @@
     \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
  0: \x9f\xa1\x{2fff}\x{3001}


-/[\H\x{d800}]+/BZSI
-------------------------------------------------------------------
-        Bra
-        [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}\x{d800}]+
-        Ket
-        End
-------------------------------------------------------------------
-Capturing subpattern count = 0
-No options
-No first char
-No need char
-Subject length lower bound = 1
-No set of starting bytes
+/[\H\x{d800}]+/
     \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
  0: \x{167f}\x{1681}\x{180d}\x{180f}
     \x{2000}\x{200a}\x{1fff}\x{200b}
@@ -401,19 +365,7 @@
     \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
  0: \x09\x0e\x84\x86


-/[\V\x{d800}]+/BZSI
-------------------------------------------------------------------
-        Bra
-        [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{ffff}\x{d800}]+
-        Ket
-        End
-------------------------------------------------------------------
-Capturing subpattern count = 0
-No options
-No first char
-No need char
-Subject length lower bound = 1
-No set of starting bytes
+/[\V\x{d800}]+/
     \x{2028}\x{2029}\x{2027}\x{2030}
  0: \x{2027}\x{2030}
     \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86


Modified: code/trunk/testdata/testoutput23
===================================================================
--- code/trunk/testdata/testoutput23    2012-10-16 15:53:30 UTC (rev 1055)
+++ code/trunk/testdata/testoutput23    2012-10-16 15:53:41 UTC (rev 1056)
@@ -9,4 +9,34 @@
 /\x{10000}/
 Failed: character value in \x{...} sequence is too large at offset 8


+/-- Check character ranges --/
+
+/[\H]/BZSI
+------------------------------------------------------------------
+        Bra
+        [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}]
+        Ket
+        End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+Subject length lower bound = 1
+No set of starting bytes
+
+/[\V]/BZSI
+------------------------------------------------------------------
+        Bra
+        [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{ffff}]
+        Ket
+        End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+Subject length lower bound = 1
+No set of starting bytes
+
 /-- End of testinput23 --/


Modified: code/trunk/testdata/testoutput25
===================================================================
--- code/trunk/testdata/testoutput25    2012-10-16 15:53:30 UTC (rev 1055)
+++ code/trunk/testdata/testoutput25    2012-10-16 15:53:41 UTC (rev 1056)
@@ -34,4 +34,34 @@
 First char = \x{400000}
 Need char = \x{800000}


+/-- Check character ranges --/
+
+/[\H]/BZSI
+------------------------------------------------------------------
+        Bra
+        [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffffffff}]
+        Ket
+        End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+Subject length lower bound = 1
+No set of starting bytes
+
+/[\V]/BZSI
+------------------------------------------------------------------
+        Bra
+        [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{ffffffff}]
+        Ket
+        End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+Subject length lower bound = 1
+No set of starting bytes
+
 /-- End of testinput25 --/