[Pcre-svn] [393] code/trunk: Fix PCRE2_NO_AUTO_CAPTURE bug.

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [393] code/trunk: Fix PCRE2_NO_AUTO_CAPTURE bug.
Revision: 393
          http://www.exim.org/viewvc/pcre2?view=rev&revision=393
Author:   ph10
Date:     2015-10-25 17:35:34 +0000 (Sun, 25 Oct 2015)
Log Message:
-----------
Fix PCRE2_NO_AUTO_CAPTURE bug.


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/src/pcre2_compile.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2015-10-22 15:49:54 UTC (rev 392)
+++ code/trunk/ChangeLog    2015-10-25 17:35:34 UTC (rev 393)
@@ -213,7 +213,11 @@
 61. Whitespace at the end of a pcre2test pattern line caused a spurious error 
 message if there were only single-character modifiers. It should be ignored.


+62. The use of PCRE2_NO_AUTO_CAPTURE could cause incorrect compilation results
+or segmentation errors for some patterns. Found with libFuzzer and
+AddressSanitizer.

+
Version 10.20 30-June-2015
--------------------------


Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c    2015-10-22 15:49:54 UTC (rev 392)
+++ code/trunk/src/pcre2_compile.c    2015-10-25 17:35:34 UTC (rev 393)
@@ -1621,7 +1621,7 @@
 There is one "trick" case: when a sequence such as [[:>:]] or \s in UCP mode is
 processed, it is replaced by a nested alternative sequence. If this contains a
 backslash (which is usually does), ptrend does not point to its end - it still
-points to the end of the whole pattern. However, we can detect this case 
+points to the end of the whole pattern. However, we can detect this case
 because cb->nestptr[0] will be non-NULL. The nested sequences are all zero-
 terminated and there are only ever two levels of nesting.


@@ -3187,9 +3187,10 @@

     if (ptr[1] != CHAR_QUESTION_MARK)
       {
-      if (ptr[1] != CHAR_ASTERISK &&
-          (options & PCRE2_NO_AUTO_CAPTURE) == 0)
-        cb->bracount++;  /* Capturing group */
+      if (ptr[1] != CHAR_ASTERISK)
+        {
+        if ((options & PCRE2_NO_AUTO_CAPTURE) == 0) cb->bracount++;
+        }
       else  /* (*something) - just skip to closing ket */
         {
         ptr += 2;
@@ -3717,7 +3718,7 @@
   if (c == CHAR_NULL && cb->nestptr[0] != NULL)
     {
     ptr = cb->nestptr[0];
-    cb->nestptr[0] = cb->nestptr[1]; 
+    cb->nestptr[0] = cb->nestptr[1];
     cb->nestptr[1] = NULL;
     c = *ptr;
     }
@@ -3846,7 +3847,7 @@
   /* Create auto callout, except for quantifiers, or while processing property
   strings that are substituted for \w etc in UCP mode. */


-  if ((options & PCRE2_AUTO_CALLOUT) != 0 && !is_quantifier && 
+  if ((options & PCRE2_AUTO_CALLOUT) != 0 && !is_quantifier &&
        cb->nestptr[0] == NULL)
     {
     previous_callout = code;
@@ -4140,8 +4141,8 @@
           int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0);


           /* The posix_substitutes table specifies which POSIX classes can be
-          converted to \p or \P items. This can only happen at top nestling 
-          level, as there will never be a POSIX class in a string that is 
+          converted to \p or \P items. This can only happen at top nestling
+          level, as there will never be a POSIX class in a string that is
           substituted for something else. */


           if (posix_substitutes[pc] != NULL)
@@ -4282,7 +4283,7 @@
             case ESC_WU:     /* or \P to test Unicode properties instead */
             case ESC_su:     /* of the default ASCII testing. This might be */
             case ESC_SU:     /* a 2nd-level nesting for [[:<:]] or [[:>:]]. */
-            cb->nestptr[1] = cb->nestptr[0]; 
+            cb->nestptr[1] = cb->nestptr[0];
             cb->nestptr[0] = ptr;
             ptr = substitutes[escape - ESC_DU] - 1;  /* Just before substitute */
             class_has_8bitchar--;                /* Undo! */
@@ -4628,7 +4629,7 @@
       if (c == CHAR_NULL && cb->nestptr[0] != NULL)
         {
         ptr = cb->nestptr[0];
-        cb->nestptr[0] = cb->nestptr[1]; 
+        cb->nestptr[0] = cb->nestptr[1];
         cb->nestptr[1] = NULL;
         c = *(++ptr);
         }
@@ -7072,7 +7073,7 @@
 #endif


       /* The use of \C can be locked out. */
-      
+
 #ifdef NEVER_BACKSLASH_C
       else if (escape == ESC_C)
         {
@@ -7085,7 +7086,7 @@
         *errorcodeptr = ERR83;
         goto FAILED;
         }
-#endif         
+#endif


       /* For the rest (including \X when Unicode properties are supported), we
       can obtain the OP value by negating the escape value in the default


Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2015-10-22 15:49:54 UTC (rev 392)
+++ code/trunk/testdata/testinput2    2015-10-25 17:35:34 UTC (rev 393)
@@ -4583,4 +4583,8 @@


/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I

+/((p(?'K/
+
+/((p(?'K/no_auto_capture
+
# End of testinput2

Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2015-10-22 15:49:54 UTC (rev 392)
+++ code/trunk/testdata/testoutput2    2015-10-25 17:35:34 UTC (rev 393)
@@ -14668,4 +14668,10 @@
 Last code unit = '}'
 Subject length lower bound = 65535


+/((p(?'K/
+Failed: error 142 at offset 7: syntax error in subpattern name (missing terminator)
+
+/((p(?'K/no_auto_capture
+Failed: error 142 at offset 7: syntax error in subpattern name (missing terminator)
+
# End of testinput2