Revision: 1438
http://vcs.pcre.org/viewvc?view=rev&revision=1438
Author: ph10
Date: 2014-01-10 16:13:10 +0000 (Fri, 10 Jan 2014)
Log Message:
-----------
Fix caseless character class bug for characters within a range that have more
than one other case.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_compile.c
code/trunk/testdata/testinput6
code/trunk/testdata/testinput7
code/trunk/testdata/testoutput6
code/trunk/testdata/testoutput7
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2014-01-10 08:52:20 UTC (rev 1437)
+++ code/trunk/ChangeLog 2014-01-10 16:13:10 UTC (rev 1438)
@@ -53,6 +53,12 @@
11. Empty match is not possible, when the minimum length is greater than zero,
and there is no \K in the pattern. Remove these unnecessary checks form JIT.
+
+12. In a caseless character class with UCP support, when a character with more
+ than one alternative case was not the first character of a range, not all
+ the alternative cases were added to the class. For example, s and \x{17f}
+ are both alternative cases for S: the class [RST] was handled correctly,
+ but [R-T] was not.
Version 8.34 15-December-2013
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2014-01-10 08:52:20 UTC (rev 1437)
+++ code/trunk/pcre_compile.c 2014-01-10 16:13:10 UTC (rev 1438)
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2013 University of Cambridge
+ Copyright (c) 1997-2014 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -4077,12 +4077,16 @@
if (c > d) return -1; /* Reached end of range */
+/* Found a character that has a single other case. Search for the end of the
+range, which is either the end of the input range, or a character that has zero
+or more than one other cases. */
+
*ocptr = othercase;
next = othercase + 1;
for (++c; c <= d; c++)
{
- if (UCD_OTHERCASE(c) != next) break;
+ if ((co = UCD_CASESET(c)) != 0 || UCD_OTHERCASE(c) != next) break;
next++;
}
@@ -4138,7 +4142,7 @@
options &= ~PCRE_CASELESS; /* Remove for recursive calls */
c = start;
-
+
while ((rc = get_othercase_range(&c, end, &oc, &od)) >= 0)
{
/* Handle a single character that has more than one other case. */
@@ -4201,9 +4205,9 @@
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
if (start <= 0xff) start = 0xff + 1;
-if (end >= start) {
+if (end >= start)
+ {
pcre_uchar *uchardata = *uchardptr;
-
#ifdef SUPPORT_UTF
if ((options & PCRE_UTF8) != 0) /* All UTFs use the same flag bit */
{
Modified: code/trunk/testdata/testinput6
===================================================================
--- code/trunk/testdata/testinput6 2014-01-10 08:52:20 UTC (rev 1437)
+++ code/trunk/testdata/testinput6 2014-01-10 16:13:10 UTC (rev 1438)
@@ -1484,4 +1484,13 @@
\x{a1}\x{a7}
\x{37e}
+/[RST]+/8iW
+ Ss\x{17f}
+
+/[R-T]+/8iW
+ Ss\x{17f}
+
+/[q-u]+/8iW
+ Ss\x{17f}
+
/-- End of testinput6 --/
Modified: code/trunk/testdata/testinput7
===================================================================
--- code/trunk/testdata/testinput7 2014-01-10 08:52:20 UTC (rev 1437)
+++ code/trunk/testdata/testinput7 2014-01-10 16:13:10 UTC (rev 1438)
@@ -829,4 +829,10 @@
/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/8WBZ
+/[RST]+/8iWBZ
+
+/[R-T]+/8iWBZ
+
+/[Q-U]+/8iWBZ
+
/-- End of testinput7 --/
Modified: code/trunk/testdata/testoutput6
===================================================================
--- code/trunk/testdata/testoutput6 2014-01-10 08:52:20 UTC (rev 1437)
+++ code/trunk/testdata/testoutput6 2014-01-10 16:13:10 UTC (rev 1438)
@@ -2445,4 +2445,16 @@
\x{37e}
No match
+/[RST]+/8iW
+ Ss\x{17f}
+ 0: Ss\x{17f}
+
+/[R-T]+/8iW
+ Ss\x{17f}
+ 0: Ss\x{17f}
+
+/[q-u]+/8iW
+ Ss\x{17f}
+ 0: Ss\x{17f}
+
/-- End of testinput6 --/
Modified: code/trunk/testdata/testoutput7
===================================================================
--- code/trunk/testdata/testoutput7 2014-01-10 08:52:20 UTC (rev 1437)
+++ code/trunk/testdata/testoutput7 2014-01-10 16:13:10 UTC (rev 1438)
@@ -124,7 +124,7 @@
/[z-\x{100}]/8iDZ
------------------------------------------------------------------
Bra
- [Zz-\xff\x{39c}\x{3bc}\x{1e9e}\x{178}\x{100}-\x{101}]
+ [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
Ket
End
------------------------------------------------------------------
@@ -162,7 +162,7 @@
/[z-\x{100}]/8DZi
------------------------------------------------------------------
Bra
- [Zz-\xff\x{39c}\x{3bc}\x{1e9e}\x{178}\x{100}-\x{101}]
+ [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
Ket
End
------------------------------------------------------------------
@@ -2263,4 +2263,28 @@
End
------------------------------------------------------------------
+/[RST]+/8iWBZ
+------------------------------------------------------------------
+ Bra
+ [R-Tr-t\x{17f}]++
+ Ket
+ End
+------------------------------------------------------------------
+
+/[R-T]+/8iWBZ
+------------------------------------------------------------------
+ Bra
+ [R-Tr-t\x{17f}]++
+ Ket
+ End
+------------------------------------------------------------------
+
+/[Q-U]+/8iWBZ
+------------------------------------------------------------------
+ Bra
+ [Q-Uq-u\x{17f}]++
+ Ket
+ End
+------------------------------------------------------------------
+
/-- End of testinput7 --/