Revision: 440
http://www.exim.org/viewvc/pcre2?view=rev&revision=440
Author: ph10
Date: 2015-11-17 17:13:43 +0000 (Tue, 17 Nov 2015)
Log Message:
-----------
Fix single-character POSIX class bug in UCP mode.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_compile.c
code/trunk/testdata/testinput2
code/trunk/testdata/testinput5
code/trunk/testdata/testoutput2
code/trunk/testdata/testoutput5
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-11-16 08:30:48 UTC (rev 439)
+++ code/trunk/ChangeLog 2015-11-17 17:13:43 UTC (rev 440)
@@ -314,7 +314,11 @@
94. Support offset_limit in JIT.
+95. A sequence such as [[:punct:]b] that is, a POSIX character class followed
+by a single ASCII character in a class item, was incorrectly compiled in UCP
+mode. The POSIX class got lost, but only if the single character followed it.
+
Version 10.20 30-June-2015
--------------------------
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2015-11-16 08:30:48 UTC (rev 439)
+++ code/trunk/src/pcre2_compile.c 2015-11-17 17:13:43 UTC (rev 440)
@@ -1352,7 +1352,7 @@
/* A large and/or complex regex can take too long to process. We have to assume
it can match an empty string. This can happen more often when (?| groups are
-present in the pattern and the caching is disabled. Setting the cap at 1100
+present in the pattern and the caching is disabled. Setting the cap at 1100
allows the test for more than 1023 capturing patterns to work. */
if ((*countptr)++ > 1100) return CBE_TOOCOMPLICATED;
@@ -4729,16 +4729,20 @@
CLASS_SINGLE_CHARACTER:
if (class_one_char < 2) class_one_char++;
- /* If class_one_char is 1, we have the first single character in the
- class, and there have been no prior ranges, or XCLASS items generated by
- escapes. If this is the final character in the class, we can optimize by
- turning the item into a 1-character OP_CHAR[I] if it's positive, or
- OP_NOT[I] if it's negative. In the positive case, it can cause firstcu
- to be set. Otherwise, there can be no first char if this item is first,
- whatever repeat count may follow. In the case of reqcu, save the
- previous value for reinstating. */
+ /* If class_one_char is 1 and xclass_has_prop is false, we have the first
+ single character in the class, and there have been no prior ranges, or
+ XCLASS items generated by escapes. If this is the final character in the
+ class, we can optimize by turning the item into a 1-character OP_CHAR[I]
+ if it's positive, or OP_NOT[I] if it's negative. In the positive case, it
+ can cause firstcu to be set. Otherwise, there can be no first char if
+ this item is first, whatever repeat count may follow. In the case of
+ reqcu, save the previous value for reinstating. */
- if (!inescq && class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
+ if (!inescq &&
+#ifdef SUPPORT_UNICODE
+ !xclass_has_prop &&
+#endif
+ class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
{
ptr++;
zeroreqcu = reqcu;
@@ -7287,7 +7291,7 @@
else
{
- if (escape == ESC_C) cb->external_flags |= PCRE2_HASBKC; /* Record */
+ if (escape == ESC_C) cb->external_flags |= PCRE2_HASBKC; /* Record */
if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
cb->max_lookbehind == 0)
cb->max_lookbehind = 1;
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2015-11-16 08:30:48 UTC (rev 439)
+++ code/trunk/testdata/testinput2 2015-11-17 17:13:43 UTC (rev 440)
@@ -4685,4 +4685,8 @@
"(*ANYCRLF)(?m)^(.*[^0-9\r\n].*|)$"g,replace=NaN
15\r\nfoo\r\n20\r\nbar\r\nbaz\r\n\r\n20
+/a[[:punct:]b]/bincode
+
+/a[b[:punct:]]/bincode
+
# End of testinput2
Modified: code/trunk/testdata/testinput5
===================================================================
--- code/trunk/testdata/testinput5 2015-11-16 08:30:48 UTC (rev 439)
+++ code/trunk/testdata/testinput5 2015-11-17 17:13:43 UTC (rev 440)
@@ -1693,4 +1693,10 @@
/abc\Cdef/info,utf
+/a[[:punct:]b]/ucp,bincode
+
+/a[[:punct:]b]/utf,ucp,bincode
+
+/a[b[:punct:]]/utf,ucp,bincode
+
# End of testinput5
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2015-11-16 08:30:48 UTC (rev 439)
+++ code/trunk/testdata/testoutput2 2015-11-17 17:13:43 UTC (rev 440)
@@ -14888,4 +14888,22 @@
15\r\nfoo\r\n20\r\nbar\r\nbaz\r\n\r\n20
4: 15\x0d\x0aNaN\x0d\x0a20\x0d\x0aNaN\x0d\x0aNaN\x0d\x0aNaN\x0d\x0a20
+/a[[:punct:]b]/bincode
+------------------------------------------------------------------
+ Bra
+ a
+ [!-/:-@[-`b{-~]
+ Ket
+ End
+------------------------------------------------------------------
+
+/a[b[:punct:]]/bincode
+------------------------------------------------------------------
+ Bra
+ a
+ [!-/:-@[-`b{-~]
+ Ket
+ End
+------------------------------------------------------------------
+
# End of testinput2
Modified: code/trunk/testdata/testoutput5
===================================================================
--- code/trunk/testdata/testoutput5 2015-11-16 08:30:48 UTC (rev 439)
+++ code/trunk/testdata/testoutput5 2015-11-17 17:13:43 UTC (rev 440)
@@ -4072,4 +4072,31 @@
Last code unit = 'f'
Subject length lower bound = 0
+/a[[:punct:]b]/ucp,bincode
+------------------------------------------------------------------
+ Bra
+ a
+ [b[:punct:]]
+ Ket
+ End
+------------------------------------------------------------------
+
+/a[[:punct:]b]/utf,ucp,bincode
+------------------------------------------------------------------
+ Bra
+ a
+ [b[:punct:]]
+ Ket
+ End
+------------------------------------------------------------------
+
+/a[b[:punct:]]/utf,ucp,bincode
+------------------------------------------------------------------
+ Bra
+ a
+ [b[:punct:]]
+ Ket
+ End
+------------------------------------------------------------------
+
# End of testinput5