[Pcre-svn] [453] code/trunk: Fix empty \Q\E between an item …

Inizio della pagina
Delete this message
Autore: Subversion repository
Data:  
To: pcre-svn
Oggetto: [Pcre-svn] [453] code/trunk: Fix empty \Q\E between an item and a qualifier in auto-callout mode.
Revision: 453
          http://www.exim.org/viewvc/pcre2?view=rev&revision=453
Author:   ph10
Date:     2015-11-30 17:31:16 +0000 (Mon, 30 Nov 2015)
Log Message:
-----------
Fix empty \Q\E between an item and a qualifier in auto-callout mode.


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/src/pcre2_compile.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2015-11-29 17:45:27 UTC (rev 452)
+++ code/trunk/ChangeLog    2015-11-30 17:31:16 UTC (rev 453)
@@ -349,7 +349,11 @@


104. Allow for up to 32-bit numbers in the ordin() function in pcre2grep.

+105. An empty \Q\E sequence between an item and its qualifier caused
+pcre2_compile() to misbehave when auto callouts were enabled. This bug
+was found by the LLVM fuzzer.

+
Version 10.20 30-June-2015
--------------------------


Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c    2015-11-29 17:45:27 UTC (rev 452)
+++ code/trunk/src/pcre2_compile.c    2015-11-30 17:31:16 UTC (rev 453)
@@ -3947,9 +3947,17 @@
     last_code = code;
     }


- /* If in \Q...\E, check for the end; if not, we have a literal. If not in
- \Q...\E, an isolated \E is ignored. */
+ /* Before doing anything else we must handle all the special items that do
+ nothing, and which may come between an item and its quantifier. Otherwise,
+ when auto-callouts are enabled, a callout gets incorrectly inserted before
+ the quantifier is recognized. After recognizing a "do nothing" item, restart
+ the loop in case another one follows. */

+  /* If c is not NULL we are not at the end of the pattern. If it is NULL, we
+  may still be in the pattern with a NULL data item. In these cases, if we are
+  in \Q...\E, check for the \E that ends the literal string; if not, we have a
+  literal character. If not in \Q...\E, an isolated \E is ignored. */
+
   if (c != CHAR_NULL || ptr < cb->end_pattern)
     {
     if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
@@ -3958,7 +3966,7 @@
       ptr++;
       continue;
       }
-    else if (inescq)
+    else if (inescq)   /* Literal character */
       {
       if (previous_callout != NULL)
         {
@@ -3973,17 +3981,27 @@
         }
       goto NORMAL_CHAR;
       }
+
+    /* Check for the start of a \Q...\E sequence. We must do this here rather
+    than later in case it is immediately followed by \E, which turns it into a
+    "do nothing" sequence. */
+
+    if (c == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
+      {
+      inescq = TRUE;
+      ptr++;
+      continue;
+      }
     }


- /* In extended mode, skip white space and comments. We need a loop in order
- to check for more white space and more comments after a comment. */
+ /* In extended mode, skip white space and #-comments that end at newline. */

   if ((options & PCRE2_EXTENDED) != 0)
     {
-    for (;;)
+    PCRE2_SPTR wscptr = ptr;
+    while (MAX_255(c) && (cb->ctypes[c] & ctype_space) != 0) c = *(++ptr);
+    if (c == CHAR_NUMBER_SIGN)
       {
-      while (MAX_255(c) && (cb->ctypes[c] & ctype_space) != 0) c = *(++ptr);
-      if (c != CHAR_NUMBER_SIGN) break;
       ptr++;
       while (*ptr != CHAR_NULL)
         {
@@ -3997,13 +4015,19 @@
         if (utf) FORWARDCHAR(ptr);
 #endif
         }
-      c = *ptr;     /* Either NULL or the char after a newline */
       }
+
+    /* If we skipped any characters, restart the loop. Otherwise, we didn't see
+    a comment. */
+
+    if (ptr > wscptr)
+      {
+      ptr--;
+      continue;
+      }
     }


- /* Skip over (?# comments. We need to do this here because we want to know if
- the next thing is a quantifier, and these comments may come between an item
- and its quantifier. */
+ /* Skip over (?# comments. */

   if (c == CHAR_LEFT_PARENTHESIS && ptr[1] == CHAR_QUESTION_MARK &&
       ptr[2] == CHAR_NUMBER_SIGN)
@@ -4018,7 +4042,8 @@
     continue;
     }


- /* See if the next thing is a quantifier. */
+ /* End of processing "do nothing" items. See if the next thing is a
+ quantifier. */

   is_quantifier =
     c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
@@ -7133,8 +7158,11 @@
     are negative the reference number. Only back references and those types
     that consume a character may be repeated. We can test for values between
     ESC_b and ESC_Z for the latter; this may have to change if any new ones are
-    ever created. */
+    ever created.


+    Note: \Q and \E are handled at the start of the character-processing loop,
+    not here. */
+
     case CHAR_BACKSLASH:
     tempptr = ptr;
     escape = PRIV(check_escape)(&ptr, cb->end_pattern, &ec, errorcodeptr,
@@ -7145,16 +7173,6 @@
       c = ec;
     else
       {
-      if (escape == ESC_Q)            /* Handle start of quoted string */
-        {
-        if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
-          ptr += 2;               /* avoid empty string */
-            else inescq = TRUE;
-        continue;
-        }
-
-      if (escape == ESC_E) continue;  /* Perl ignores an orphan \E */
-
       /* For metasequences that actually match a character, we disable the
       setting of a first character if it hasn't already been set. */



Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2015-11-29 17:45:27 UTC (rev 452)
+++ code/trunk/testdata/testinput2    2015-11-30 17:31:16 UTC (rev 453)
@@ -4699,4 +4699,7 @@


/(A*)\E+/B,auto_callout

+/()\Q\E*]/B,auto_callout
+    a[bc]d
+
 # End of testinput2 


Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2015-11-29 17:45:27 UTC (rev 452)
+++ code/trunk/testdata/testoutput2    2015-11-30 17:31:16 UTC (rev 453)
@@ -14956,4 +14956,27 @@
         End
 ------------------------------------------------------------------


+/()\Q\E*]/B,auto_callout
+------------------------------------------------------------------
+        Bra
+        Callout 255 0 7
+        Brazero
+        SCBra 1
+        Callout 255 1 0
+        KetRmax
+        Callout 255 7 1
+        ]
+        Callout 255 8 0
+        Ket
+        End
+------------------------------------------------------------------
+    a[bc]d
+--->a[bc]d
+ +0     ^      ()\Q\E*
+ +1     ^      )
+ +7     ^      ]
+ +8     ^^     
+ 0: ]
+ 1: 
+
 # End of testinput2