Revision: 1056
http://vcs.pcre.org/viewvc?view=rev&revision=1056
Author: chpe
Date: 2012-10-16 16:53:41 +0100 (Tue, 16 Oct 2012)
Log Message:
-----------
pcre32: compile: Fix \H and \V character ranges for pcre32
Go up to 0xffffffff, and move the tests to the split 16- and 32-bit
tests because the output differs.
TODO: these character ranges look rather odd for non-UTF mode... bug?
Modified Paths:
--------------
code/trunk/pcre_compile.c
code/trunk/testdata/testinput17
code/trunk/testdata/testinput23
code/trunk/testdata/testinput25
code/trunk/testdata/testoutput17
code/trunk/testdata/testoutput23
code/trunk/testdata/testoutput25
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2012-10-16 15:53:30 UTC (rev 1055)
+++ code/trunk/pcre_compile.c 2012-10-16 15:53:41 UTC (rev 1056)
@@ -3421,9 +3421,9 @@
static int
add_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr, int options,
- compile_data *cd, unsigned int start, unsigned int end)
+ compile_data *cd, pcre_uint32 start, pcre_uint32 end)
{
-unsigned int c;
+pcre_uint32 c;
int n8 = 0;
/* If caseless matching is required, scan the range and process alternate
@@ -3437,7 +3437,7 @@
if ((options & PCRE_UTF8) != 0)
{
int rc;
- unsigned int oc, od;
+ pcre_uint32 oc, od;
options &= ~PCRE_CASELESS; /* Remove for recursive calls */
c = start;
@@ -3490,12 +3490,7 @@
#endif
if (end > 0xffff) end = 0xffff;
-#elif defined COMPILE_PCRE32
-#ifdef SUPPORT_UTF
- if ((options & PCRE_UTF32) == 0)
- if (end > 0xffffu) end = 0xffffu; // FIXMEchpe rebase fix this
-#endif
-#endif /* COMPILE_PCRE[8|16|32] */
+#endif /* COMPILE_PCRE[8|16] */
/* If all characters are less than 256, use the bit map. Otherwise use extra
data. */
@@ -3625,14 +3620,15 @@
add_not_list_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr,
int options, compile_data *cd, const pcre_uint32 *p)
{
+BOOL utf = (options & PCRE_UTF8) != 0;
int n8 = 0;
if (p[0] > 0)
n8 += add_to_class(classbits, uchardptr, options, cd, 0, p[0] - 1);
while (p[0] < NOTACHAR)
{
while (p[1] == p[0] + 1) p++;
- n8 += add_to_class(classbits, uchardptr, options, cd, p[0] + 1,
- (p[1] == NOTACHAR)? 0x10ffff : p[1] - 1);
+ n8 += add_to_class(classbits, uchardptr, options, cd, p[0] + 1,
+ (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1);
p++;
}
return n8;
Modified: code/trunk/testdata/testinput17
===================================================================
--- code/trunk/testdata/testinput17 2012-10-16 15:53:30 UTC (rev 1055)
+++ code/trunk/testdata/testinput17 2012-10-16 15:53:41 UTC (rev 1056)
@@ -214,14 +214,8 @@
/[\v]/BZ
-/[\H]/BZ
-
/[^\h]/BZ
-/[\V]/BZ
-
-/[\x0a\V]/BZ
-
/\h+/SI
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
@@ -236,7 +230,7 @@
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
-/[\H\x{d800}]+/BZSI
+/[\H\x{d800}]+/
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
\x{2000}\x{200a}\x{1fff}\x{200b}
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
@@ -254,7 +248,7 @@
\x{2028}\x{2029}\x{2027}\x{2030}
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
-/[\V\x{d800}]+/BZSI
+/[\V\x{d800}]+/
\x{2028}\x{2029}\x{2027}\x{2030}
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
Modified: code/trunk/testdata/testinput23
===================================================================
--- code/trunk/testdata/testinput23 2012-10-16 15:53:30 UTC (rev 1055)
+++ code/trunk/testdata/testinput23 2012-10-16 15:53:41 UTC (rev 1056)
@@ -7,4 +7,10 @@
/\x{10000}/
+/-- Check character ranges --/
+
+/[\H]/BZSI
+
+/[\V]/BZSI
+
/-- End of testinput23 --/
Modified: code/trunk/testdata/testinput25
===================================================================
--- code/trunk/testdata/testinput25 2012-10-16 15:53:30 UTC (rev 1055)
+++ code/trunk/testdata/testinput25 2012-10-16 15:53:41 UTC (rev 1056)
@@ -20,4 +20,10 @@
/\x{400000}\x{800000}/iDZ
+/-- Check character ranges --/
+
+/[\H]/BZSI
+
+/[\V]/BZSI
+
/-- End of testinput25 --/
Modified: code/trunk/testdata/testoutput17
===================================================================
--- code/trunk/testdata/testoutput17 2012-10-16 15:53:30 UTC (rev 1055)
+++ code/trunk/testdata/testoutput17 2012-10-16 15:53:41 UTC (rev 1056)
@@ -259,14 +259,6 @@
End
------------------------------------------------------------------
-/[\H]/BZ
-------------------------------------------------------------------
- Bra
- [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}]
- Ket
- End
-------------------------------------------------------------------
-
/[^\h]/BZ
------------------------------------------------------------------
Bra
@@ -275,22 +267,6 @@
End
------------------------------------------------------------------
-/[\V]/BZ
-------------------------------------------------------------------
- Bra
- [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{ffff}]
- Ket
- End
-------------------------------------------------------------------
-
-/[\x0a\V]/BZ
-------------------------------------------------------------------
- Bra
- [\x00-\x0a\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{ffff}]
- Ket
- End
-------------------------------------------------------------------
-
/\h+/SI
Capturing subpattern count = 0
No options
@@ -337,19 +313,7 @@
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
0: \x9f\xa1\x{2fff}\x{3001}
-/[\H\x{d800}]+/BZSI
-------------------------------------------------------------------
- Bra
- [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}\x{d800}]+
- Ket
- End
-------------------------------------------------------------------
-Capturing subpattern count = 0
-No options
-No first char
-No need char
-Subject length lower bound = 1
-No set of starting bytes
+/[\H\x{d800}]+/
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
0: \x{167f}\x{1681}\x{180d}\x{180f}
\x{2000}\x{200a}\x{1fff}\x{200b}
@@ -401,19 +365,7 @@
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
0: \x09\x0e\x84\x86
-/[\V\x{d800}]+/BZSI
-------------------------------------------------------------------
- Bra
- [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{ffff}\x{d800}]+
- Ket
- End
-------------------------------------------------------------------
-Capturing subpattern count = 0
-No options
-No first char
-No need char
-Subject length lower bound = 1
-No set of starting bytes
+/[\V\x{d800}]+/
\x{2028}\x{2029}\x{2027}\x{2030}
0: \x{2027}\x{2030}
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
Modified: code/trunk/testdata/testoutput23
===================================================================
--- code/trunk/testdata/testoutput23 2012-10-16 15:53:30 UTC (rev 1055)
+++ code/trunk/testdata/testoutput23 2012-10-16 15:53:41 UTC (rev 1056)
@@ -9,4 +9,34 @@
/\x{10000}/
Failed: character value in \x{...} sequence is too large at offset 8
+/-- Check character ranges --/
+
+/[\H]/BZSI
+------------------------------------------------------------------
+ Bra
+ [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}]
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+Subject length lower bound = 1
+No set of starting bytes
+
+/[\V]/BZSI
+------------------------------------------------------------------
+ Bra
+ [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{ffff}]
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+Subject length lower bound = 1
+No set of starting bytes
+
/-- End of testinput23 --/
Modified: code/trunk/testdata/testoutput25
===================================================================
--- code/trunk/testdata/testoutput25 2012-10-16 15:53:30 UTC (rev 1055)
+++ code/trunk/testdata/testoutput25 2012-10-16 15:53:41 UTC (rev 1056)
@@ -34,4 +34,34 @@
First char = \x{400000}
Need char = \x{800000}
+/-- Check character ranges --/
+
+/[\H]/BZSI
+------------------------------------------------------------------
+ Bra
+ [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffffffff}]
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+Subject length lower bound = 1
+No set of starting bytes
+
+/[\V]/BZSI
+------------------------------------------------------------------
+ Bra
+ [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{ffffffff}]
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+Subject length lower bound = 1
+No set of starting bytes
+
/-- End of testinput25 --/