[Pcre-svn] [384] code/trunk: Fix compiler bug for classes su…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [384] code/trunk: Fix compiler bug for classes such as [\W\p{Any}].
Revision: 384
          http://www.exim.org/viewvc/pcre2?view=rev&revision=384
Author:   ph10
Date:     2015-10-09 17:06:53 +0100 (Fri, 09 Oct 2015)
Log Message:
-----------
Fix compiler bug for classes such as [\W\p{Any}].


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/src/pcre2_compile.c
    code/trunk/testdata/testinput5
    code/trunk/testdata/testoutput5


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2015-10-07 17:40:22 UTC (rev 383)
+++ code/trunk/ChangeLog    2015-10-09 16:06:53 UTC (rev 384)
@@ -194,7 +194,11 @@


55. Implemented PCRE2_SUBSTITUTE_EXTENDED.

+56. In a character class such as [\W\p{Any}] where both a negative-type escape
+("not a word character") and a property escape were present, the property
+escape was being ignored.

+
Version 10.20 30-June-2015
--------------------------


Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c    2015-10-07 17:40:22 UTC (rev 383)
+++ code/trunk/src/pcre2_compile.c    2015-10-09 16:06:53 UTC (rev 384)
@@ -1645,11 +1645,11 @@


/* If backslash is at the end of the pattern, it's an error. */

-if (ptr >= ptrend)
+if (ptr >= ptrend)
{
*errorcodeptr = ERR1;
return 0;
- }
+ }

 GETCHARINCTEST(c, ptr);         /* Get character value, increment pointer */
 ptr--;                          /* Set pointer back to the last code unit */
@@ -1671,8 +1671,8 @@
     }
   }


-/* Escapes that need further processing, including those that are unknown.
-When called from pcre2_substitute(), only \c, \o, and \x are recognized (and \u
+/* Escapes that need further processing, including those that are unknown.
+When called from pcre2_substitute(), only \c, \o, and \x are recognized (and \u
when BSUX is set). */

else
@@ -1680,7 +1680,7 @@
PCRE2_SPTR oldptr;
BOOL braced, negated, overflow;
unsigned int s;
-
+
/* Filter calls from pcre2_substitute(). */

   if (cb == NULL && c != CHAR_c && c != CHAR_o && c != CHAR_x &&
@@ -1688,7 +1688,7 @@
     {
     *errorcodeptr = ERR3;
     return 0;
-    }  
+    }


   switch (c)
     {
@@ -4645,19 +4645,21 @@
     zeroreqcu = reqcu;
     zeroreqcuflags = reqcuflags;


-    /* If there are characters with values > 255, we have to compile an
-    extended class, with its own opcode, unless there was a negated special
-    such as \S in the class, and PCRE2_UCP is not set, because in that case all
+    /* If there are characters with values > 255, or Unicode property settings
+    (\p or \P), we have to compile an extended class, with its own opcode,
+    unless there were no property settings and there was a negated special such
+    as \S in the class, and PCRE2_UCP is not set, because in that case all
     characters > 255 are in the class, so any that were explicitly given as
-    well can be ignored. If (when there are explicit characters > 255 that must
-    be listed) there are no characters < 256, we can omit the bitmap in the
-    actual compiled code. */
+    well can be ignored. If (when there are explicit characters > 255 or
+    property settings that must be listed) there are no characters < 256, we
+    can omit the bitmap in the actual compiled code. */


 #ifdef SUPPORT_WIDE_CHARS
 #ifdef SUPPORT_UNICODE
-    if (xclass && (!should_flip_negation || (options & PCRE2_UCP) != 0))
+    if (xclass && (xclass_has_prop || !should_flip_negation ||
+         (options & PCRE2_UCP) != 0))
 #elif PCRE2_CODE_UNIT_WIDTH != 8
-    if (xclass && !should_flip_negation)
+    if (xclass && (xclass_has_prop || !should_flip_negation))
 #endif
       {
       *class_uchardata++ = XCL_END;    /* Marks the end of extra data */


Modified: code/trunk/testdata/testinput5
===================================================================
--- code/trunk/testdata/testinput5    2015-10-07 17:40:22 UTC (rev 383)
+++ code/trunk/testdata/testinput5    2015-10-09 16:06:53 UTC (rev 384)
@@ -1693,4 +1693,13 @@
 /((?<digit>\d)|(?<letter>\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}>
     ab12cde


+/[\W\p{Any}]/B
+    abc
+    123 
+
+/[\W\pL]/B
+    abc
+\= Expect no match
+    123     
+
 # End of testinput5 


Modified: code/trunk/testdata/testoutput5
===================================================================
--- code/trunk/testdata/testoutput5    2015-10-07 17:40:22 UTC (rev 383)
+++ code/trunk/testdata/testoutput5    2015-10-09 16:06:53 UTC (rev 384)
@@ -4046,4 +4046,29 @@
     ab12cde
  7: <not digit; letter><not digit; letter><digit; not a letter><digit; not a letter><not digit; letter><not digit; letter><not digit; letter>


+/[\W\p{Any}]/B
+------------------------------------------------------------------
+        Bra
+        [\x00-/:-@[-^`{-\xff\p{Any}]
+        Ket
+        End
+------------------------------------------------------------------
+    abc
+ 0: a
+    123 
+ 0: 1
+
+/[\W\pL]/B
+------------------------------------------------------------------
+        Bra
+        [\x00-/:-@[-^`{-\xff\p{L}]
+        Ket
+        End
+------------------------------------------------------------------
+    abc
+ 0: a
+\= Expect no match
+    123     
+No match
+
 # End of testinput5