Revision: 1664
http://vcs.pcre.org/viewvc?view=rev&revision=1664
Author: ph10
Date: 2016-08-04 18:15:38 +0100 (Thu, 04 Aug 2016)
Log Message:
-----------
Fix character class bug when a Unicode property was present with \D etc in a
non-ucp character class in a wide character mode.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_compile.c
code/trunk/testdata/testinput16
code/trunk/testdata/testinput19
code/trunk/testdata/testinput7
code/trunk/testdata/testoutput16
code/trunk/testdata/testoutput19
code/trunk/testdata/testoutput7
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2016-07-17 04:35:25 UTC (rev 1663)
+++ code/trunk/ChangeLog 2016-08-04 17:15:38 UTC (rev 1664)
@@ -4,7 +4,7 @@
Note that the PCRE 8.xx series (PCRE1) is now in a bugfix-only state. All
development is happening in the PCRE2 10.xx series.
-Version 8.40 17-June-2016
+Version 8.40 xx-xxxx-2016
-------------------------
1. Using -o with -M in pcregrep could cause unnecessary repeated output when
@@ -17,9 +17,15 @@
4. Ignore "show all captures" (/=) for DFA matching.
-5. Fix unaligned accesses on x86. Patch by Marc Mutz.
+5. Fix JIT unaligned accesses on x86. Patch by Marc Mutz.
+6. In any wide-character mode (8-bit UTF or any 16-bit or 32-bit mode), without
+ PCRE_UCP set, a negative character type such as \D in a positive class
+ should cause all characters greater than 255 to match, whatever else is in
+ the class. There was a bug that caused this not to happen if a Unicode
+ property item was added to such a class, for example [\D\P{Nd}] or [\W\pL].
+
Version 8.39 14-June-2016
-------------------------
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2016-07-17 04:35:25 UTC (rev 1663)
+++ code/trunk/pcre_compile.c 2016-08-04 17:15:38 UTC (rev 1664)
@@ -5579,6 +5579,34 @@
#endif
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
{
+ /* For non-UCP wide characters, in a non-negative class containing \S or
+ similar (should_flip_negation is set), all characters greater than 255
+ must be in the class. */
+
+ if (
+#if defined COMPILE_PCRE8
+ utf &&
+#endif
+ should_flip_negation && !negate_class && (options & PCRE_UCP) == 0)
+ {
+ *class_uchardata++ = XCL_RANGE;
+ if (utf) /* Will always be utf in the 8-bit library */
+ {
+ class_uchardata += PRIV(ord2utf)(0x100, class_uchardata);
+ class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
+ }
+ else /* Can only happen for the 16-bit & 32-bit libraries */
+ {
+#if defined COMPILE_PCRE16
+ *class_uchardata++ = 0x100;
+ *class_uchardata++ = 0xffffu;
+#elif defined COMPILE_PCRE32
+ *class_uchardata++ = 0x100;
+ *class_uchardata++ = 0xffffffffu;
+#endif
+ }
+ }
+
*class_uchardata++ = XCL_END; /* Marks the end of extra data */
*code++ = OP_XCLASS;
code += LINK_SIZE;
Modified: code/trunk/testdata/testinput16
===================================================================
--- code/trunk/testdata/testinput16 2016-07-17 04:35:25 UTC (rev 1663)
+++ code/trunk/testdata/testinput16 2016-08-04 17:15:38 UTC (rev 1664)
@@ -38,4 +38,30 @@
/s+/i8SI
SSss\x{17f}
+/[\W\p{Any}]/BZ
+ abc
+ 123
+
+/[\W\pL]/BZ
+ abc
+ ** Failers
+ 123
+
+/[\D]/8
+ \x{1d7cf}
+
+/[\D\P{Nd}]/8
+ \x{1d7cf}
+
+/[^\D]/8
+ a9b
+ ** Failers
+ \x{1d7cf}
+
+/[^\D\P{Nd}]/8
+ a9b
+ \x{1d7cf}
+ ** Failers
+ \x{10000}
+
/-- End of testinput16 --/
Modified: code/trunk/testdata/testinput19
===================================================================
--- code/trunk/testdata/testinput19 2016-07-17 04:35:25 UTC (rev 1663)
+++ code/trunk/testdata/testinput19 2016-08-04 17:15:38 UTC (rev 1664)
@@ -25,4 +25,21 @@
/s+/i8SI
SSss\x{17f}
+/[\D]/8
+ \x{1d7cf}
+
+/[\D\P{Nd}]/8
+ \x{1d7cf}
+
+/[^\D]/8
+ a9b
+ ** Failers
+ \x{1d7cf}
+
+/[^\D\P{Nd}]/8
+ a9b
+ \x{1d7cf}
+ ** Failers
+ \x{10000}
+
/-- End of testinput19 --/
Modified: code/trunk/testdata/testinput7
===================================================================
--- code/trunk/testdata/testinput7 2016-07-17 04:35:25 UTC (rev 1663)
+++ code/trunk/testdata/testinput7 2016-08-04 17:15:38 UTC (rev 1664)
@@ -838,15 +838,6 @@
/^s?c/mi8I
scat
-/[\W\p{Any}]/BZ
- abc
- 123
-
-/[\W\pL]/BZ
- abc
- ** Failers
- 123
-
/a[[:punct:]b]/WBZ
/a[[:punct:]b]/8WBZ
Modified: code/trunk/testdata/testoutput16
===================================================================
--- code/trunk/testdata/testoutput16 2016-07-17 04:35:25 UTC (rev 1663)
+++ code/trunk/testdata/testoutput16 2016-08-04 17:15:38 UTC (rev 1664)
@@ -138,4 +138,56 @@
SSss\x{17f}
0: SSss\x{17f}
+/[\W\p{Any}]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x00-/:-@[-^`{-\xff\p{Any}]
+ Ket
+ End
+------------------------------------------------------------------
+ abc
+ 0: a
+ 123
+ 0: 1
+
+/[\W\pL]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x00-/:-@[-^`{-\xff\p{L}]
+ Ket
+ End
+------------------------------------------------------------------
+ abc
+ 0: a
+ ** Failers
+ 0: *
+ 123
+No match
+
+/[\D]/8
+ \x{1d7cf}
+ 0: \x{1d7cf}
+
+/[\D\P{Nd}]/8
+ \x{1d7cf}
+ 0: \x{1d7cf}
+
+/[^\D]/8
+ a9b
+ 0: 9
+ ** Failers
+No match
+ \x{1d7cf}
+No match
+
+/[^\D\P{Nd}]/8
+ a9b
+ 0: 9
+ \x{1d7cf}
+ 0: \x{1d7cf}
+ ** Failers
+No match
+ \x{10000}
+No match
+
/-- End of testinput16 --/
Modified: code/trunk/testdata/testoutput19
===================================================================
--- code/trunk/testdata/testoutput19 2016-07-17 04:35:25 UTC (rev 1663)
+++ code/trunk/testdata/testoutput19 2016-08-04 17:15:38 UTC (rev 1664)
@@ -105,4 +105,30 @@
SSss\x{17f}
0: SSss\x{17f}
+/[\D]/8
+ \x{1d7cf}
+ 0: \x{1d7cf}
+
+/[\D\P{Nd}]/8
+ \x{1d7cf}
+ 0: \x{1d7cf}
+
+/[^\D]/8
+ a9b
+ 0: 9
+ ** Failers
+No match
+ \x{1d7cf}
+No match
+
+/[^\D\P{Nd}]/8
+ a9b
+ 0: 9
+ \x{1d7cf}
+ 0: \x{1d7cf}
+ ** Failers
+No match
+ \x{10000}
+No match
+
/-- End of testinput19 --/
Modified: code/trunk/testdata/testoutput7
===================================================================
--- code/trunk/testdata/testoutput7 2016-07-17 04:35:25 UTC (rev 1663)
+++ code/trunk/testdata/testoutput7 2016-08-04 17:15:38 UTC (rev 1664)
@@ -2295,32 +2295,6 @@
scat
0: sc
-/[\W\p{Any}]/BZ
-------------------------------------------------------------------
- Bra
- [\x00-/:-@[-^`{-\xff\p{Any}]
- Ket
- End
-------------------------------------------------------------------
- abc
- 0: a
- 123
- 0: 1
-
-/[\W\pL]/BZ
-------------------------------------------------------------------
- Bra
- [\x00-/:-@[-^`{-\xff\p{L}]
- Ket
- End
-------------------------------------------------------------------
- abc
- 0: a
- ** Failers
- 0: *
- 123
-No match
-
/a[[:punct:]b]/WBZ
------------------------------------------------------------------
Bra