[Pcre-svn] [455] code/trunk: Fix issues with NULL characters…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [455] code/trunk: Fix issues with NULL characters in patterns.
Revision: 455
          http://www.exim.org/viewvc/pcre2?view=rev&revision=455
Author:   ph10
Date:     2015-12-02 17:39:26 +0000 (Wed, 02 Dec 2015)
Log Message:
-----------
Fix issues with NULL characters in patterns.


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/src/pcre2_compile.c
    code/trunk/src/pcre2_printint.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2015-12-01 17:41:24 UTC (rev 454)
+++ code/trunk/ChangeLog    2015-12-02 17:39:26 UTC (rev 455)
@@ -358,7 +358,15 @@
 parenthesis, pcre2_compile() misbehaved. Example: /(*:abc )/, but only when 
 both those options were set.


+107. In a number of places pcre2_compile() was not handling NULL characters
+correctly, and pcre2test with the "bincode" modifier was not always correctly
+displaying fields containing NULLS:

+ (a) Within /x extended #-comments
+ (b) Within the "name" part of (*MARK) and other *verbs
+ (c) Within the text argument of a callout
+
+
Version 10.20 30-June-2015
--------------------------


Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c    2015-12-01 17:41:24 UTC (rev 454)
+++ code/trunk/src/pcre2_compile.c    2015-12-02 17:39:26 UTC (rev 455)
@@ -3017,12 +3017,12 @@


     if ((options & PCRE2_EXTENDED) != 0)
       {
-      PCRE2_SPTR wscptr = ptr; 
+      PCRE2_SPTR wscptr = ptr;
       while (MAX_255(x) && (cb->ctypes[x] & ctype_space) != 0) x = *(++ptr);
       if (x == CHAR_NUMBER_SIGN)
-        { 
+        {
         ptr++;
-        while (*ptr != CHAR_NULL)
+        while (*ptr != CHAR_NULL || ptr < cb->end_pattern)
           {
           if (IS_NEWLINE(ptr))       /* For non-fixed-length newline cases, */
             {                        /* IS_NEWLINE sets cb->nllen. */
@@ -3034,10 +3034,10 @@
           if (utf) FORWARDCHAR(ptr);
 #endif
           }
-        }   
-      
+        }
+
       /* If we have skipped any characters, restart the loop. */
-       
+
       if (ptr > wscptr)
         {
         ptr--;
@@ -4008,7 +4008,7 @@
     if (c == CHAR_NUMBER_SIGN)
       {
       ptr++;
-      while (*ptr != CHAR_NULL)
+      while (ptr < cb->end_pattern)
         {
         if (IS_NEWLINE(ptr))         /* For non-fixed-length newline cases, */
           {                          /* IS_NEWLINE sets cb->nllen. */
@@ -5044,7 +5044,7 @@
         while (MAX_255(*p) && (cb->ctypes[*p] & ctype_space) != 0) p++;
         if (*p != CHAR_NUMBER_SIGN) break;
         p++;
-        while (*p != CHAR_NULL)
+        while (ptr < cb->end_pattern)
           {
           if (IS_NEWLINE(p))         /* For non-fixed-length newline cases, */
             {                        /* IS_NEWLINE sets cb->nllen. */
@@ -5832,7 +5832,7 @@
         if ((options & PCRE2_ALT_VERBNAMES) == 0)
           {
           arglen = 0;
-          while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS)
+          while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS)
             {
             ptr++;                                /* Check length as we go */
             arglen++;                             /* along, to avoid the   */


Modified: code/trunk/src/pcre2_printint.c
===================================================================
--- code/trunk/src/pcre2_printint.c    2015-12-01 17:41:24 UTC (rev 454)
+++ code/trunk/src/pcre2_printint.c    2015-12-02 17:39:26 UTC (rev 455)
@@ -58,12 +58,13 @@


/* The functions and tables herein must all have mode-dependent names. */

-#define OP_lengths        PCRE2_SUFFIX(OP_lengths_)
-#define get_ucpname       PCRE2_SUFFIX(get_ucpname_)
-#define pcre2_printint    PCRE2_SUFFIX(pcre2_printint_)
-#define print_char        PCRE2_SUFFIX(print_char_)
-#define print_custring    PCRE2_SUFFIX(print_custring_)
-#define print_prop        PCRE2_SUFFIX(print_prop_)
+#define OP_lengths            PCRE2_SUFFIX(OP_lengths_)
+#define get_ucpname           PCRE2_SUFFIX(get_ucpname_)
+#define pcre2_printint        PCRE2_SUFFIX(pcre2_printint_)
+#define print_char            PCRE2_SUFFIX(print_char_)
+#define print_custring        PCRE2_SUFFIX(print_custring_)
+#define print_custring_bylen  PCRE2_SUFFIX(print_custring_bylen_)
+#define print_prop            PCRE2_SUFFIX(print_prop_)


 /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
 the definition is next to the definition of the opcodes in pcre2_internal.h.
@@ -188,12 +189,14 @@
 *     Print string as a list of code units       *
 *************************************************/


-/* This takes no account of UTF as it always prints each individual code unit.
-The string is zero-terminated.
+/* These take no account of UTF as they always print each individual code unit.
+The string is zero-terminated for print_custring(); the length is given for
+print_custring_bylen().

 Arguments:
   f          file to write to
   ptr        point to the string
+  len        length for print_custring_bylen() 


 Returns:     nothing
 */
@@ -208,8 +211,18 @@
   }
 }


+static void
+print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
+{
+while (len-- > 0)
+ {
+ register uint32_t c = *ptr++;
+ if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
+ }
+}


+
 /*************************************************
 *          Find Unicode property name            *
 *************************************************/
@@ -603,7 +616,7 @@
     c = code[1 + 4*LINK_SIZE];
     fprintf(f, "    %s %c", OP_names[*code], c);
     extra = GET(code, 1 + 2*LINK_SIZE);
-    print_custring(f, code + 2 + 4*LINK_SIZE);
+    print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
     for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
       if (c == PRIV(callout_start_delims)[i])
         {
@@ -791,7 +804,7 @@
     case OP_SKIP_ARG:
     case OP_THEN_ARG:
     fprintf(f, "    %s ", OP_names[*code]);
-    print_custring(f, code + 2);
+    print_custring_bylen(f, code + 2, code[1]);
     extra += code[1];
     break;



Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2015-12-01 17:41:24 UTC (rev 454)
+++ code/trunk/testdata/testinput2    2015-12-02 17:39:26 UTC (rev 455)
@@ -4704,4 +4704,24 @@


/\x8a+f|;T?(*:;.'?`(\xeap ){![^()!y*''C*(?';]{1;(\x08)/B,alt_verbnames,dupnames,extended

+# Tests for NULL characters in comments and verb "names" and callouts
+
+# /A#B\x00C\x0aZ/
+/41 23 42 00 43 0a 5a/Bx,hex
+
+# /A+#B\x00C\x0a+/
+/41 2b 23 42 00 43 0a 2b/Bx,hex
+
+# /A(*:B\x00W#X\00Y\x0aC)Z/
+/41 28 2a 3a 42 00 57 23 58 00 59 0a 43 29 5a/Bx,hex,alt_verbnames
+
+# /A(*:B\x00W#X\00Y\x0aC)Z/
+/41 28 2a 3a 42 00 57 23 58 00 59 0a 43 29 5a/Bx,hex
+
+# /A(?C{X\x00Y})B/
+/41 28 3f 43 7b 58 00 59 7d 29 42/B,hex
+
+# /A(?#X\x00Y)B/
+/41 28 3f 23 7b 00 7d 29 42/B,hex
+
# End of testinput2

Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2015-12-01 17:41:24 UTC (rev 454)
+++ code/trunk/testdata/testoutput2    2015-12-02 17:39:26 UTC (rev 455)
@@ -14998,4 +14998,66 @@
         End
 ------------------------------------------------------------------


+# Tests for NULL characters in comments and verb "names" and callouts
+
+# /A#B\x00C\x0aZ/
+/41 23 42 00 43 0a 5a/Bx,hex
+------------------------------------------------------------------
+        Bra
+        AZ
+        Ket
+        End
+------------------------------------------------------------------
+
+# /A+#B\x00C\x0a+/
+/41 2b 23 42 00 43 0a 2b/Bx,hex
+------------------------------------------------------------------
+        Bra
+        A++
+        Ket
+        End
+------------------------------------------------------------------
+
+# /A(*:B\x00W#X\00Y\x0aC)Z/
+/41 28 2a 3a 42 00 57 23 58 00 59 0a 43 29 5a/Bx,hex,alt_verbnames
+------------------------------------------------------------------
+        Bra
+        A
+        *MARK B\x{0}WC
+        Z
+        Ket
+        End
+------------------------------------------------------------------
+
+# /A(*:B\x00W#X\00Y\x0aC)Z/
+/41 28 2a 3a 42 00 57 23 58 00 59 0a 43 29 5a/Bx,hex
+------------------------------------------------------------------
+        Bra
+        A
+        *MARK B\x{0}W#X\x{0}Y\x{a}C
+        Z
+        Ket
+        End
+------------------------------------------------------------------
+
+# /A(?C{X\x00Y})B/
+/41 28 3f 43 7b 58 00 59 7d 29 42/B,hex
+------------------------------------------------------------------
+        Bra
+        A
+        CalloutStr {X\x{0}Y} 5 10 1
+        B
+        Ket
+        End
+------------------------------------------------------------------
+
+# /A(?#X\x00Y)B/
+/41 28 3f 23 7b 00 7d 29 42/B,hex
+------------------------------------------------------------------
+        Bra
+        AB
+        Ket
+        End
+------------------------------------------------------------------
+
 # End of testinput2