Revision: 393
http://www.exim.org/viewvc/pcre2?view=rev&revision=393
Author: ph10
Date: 2015-10-25 17:35:34 +0000 (Sun, 25 Oct 2015)
Log Message:
-----------
Fix PCRE2_NO_AUTO_CAPTURE bug.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_compile.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-10-22 15:49:54 UTC (rev 392)
+++ code/trunk/ChangeLog 2015-10-25 17:35:34 UTC (rev 393)
@@ -213,7 +213,11 @@
61. Whitespace at the end of a pcre2test pattern line caused a spurious error
message if there were only single-character modifiers. It should be ignored.
+62. The use of PCRE2_NO_AUTO_CAPTURE could cause incorrect compilation results
+or segmentation errors for some patterns. Found with libFuzzer and
+AddressSanitizer.
+
Version 10.20 30-June-2015
--------------------------
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2015-10-22 15:49:54 UTC (rev 392)
+++ code/trunk/src/pcre2_compile.c 2015-10-25 17:35:34 UTC (rev 393)
@@ -1621,7 +1621,7 @@
There is one "trick" case: when a sequence such as [[:>:]] or \s in UCP mode is
processed, it is replaced by a nested alternative sequence. If this contains a
backslash (which is usually does), ptrend does not point to its end - it still
-points to the end of the whole pattern. However, we can detect this case
+points to the end of the whole pattern. However, we can detect this case
because cb->nestptr[0] will be non-NULL. The nested sequences are all zero-
terminated and there are only ever two levels of nesting.
@@ -3187,9 +3187,10 @@
if (ptr[1] != CHAR_QUESTION_MARK)
{
- if (ptr[1] != CHAR_ASTERISK &&
- (options & PCRE2_NO_AUTO_CAPTURE) == 0)
- cb->bracount++; /* Capturing group */
+ if (ptr[1] != CHAR_ASTERISK)
+ {
+ if ((options & PCRE2_NO_AUTO_CAPTURE) == 0) cb->bracount++;
+ }
else /* (*something) - just skip to closing ket */
{
ptr += 2;
@@ -3717,7 +3718,7 @@
if (c == CHAR_NULL && cb->nestptr[0] != NULL)
{
ptr = cb->nestptr[0];
- cb->nestptr[0] = cb->nestptr[1];
+ cb->nestptr[0] = cb->nestptr[1];
cb->nestptr[1] = NULL;
c = *ptr;
}
@@ -3846,7 +3847,7 @@
/* Create auto callout, except for quantifiers, or while processing property
strings that are substituted for \w etc in UCP mode. */
- if ((options & PCRE2_AUTO_CALLOUT) != 0 && !is_quantifier &&
+ if ((options & PCRE2_AUTO_CALLOUT) != 0 && !is_quantifier &&
cb->nestptr[0] == NULL)
{
previous_callout = code;
@@ -4140,8 +4141,8 @@
int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0);
/* The posix_substitutes table specifies which POSIX classes can be
- converted to \p or \P items. This can only happen at top nestling
- level, as there will never be a POSIX class in a string that is
+ converted to \p or \P items. This can only happen at top nestling
+ level, as there will never be a POSIX class in a string that is
substituted for something else. */
if (posix_substitutes[pc] != NULL)
@@ -4282,7 +4283,7 @@
case ESC_WU: /* or \P to test Unicode properties instead */
case ESC_su: /* of the default ASCII testing. This might be */
case ESC_SU: /* a 2nd-level nesting for [[:<:]] or [[:>:]]. */
- cb->nestptr[1] = cb->nestptr[0];
+ cb->nestptr[1] = cb->nestptr[0];
cb->nestptr[0] = ptr;
ptr = substitutes[escape - ESC_DU] - 1; /* Just before substitute */
class_has_8bitchar--; /* Undo! */
@@ -4628,7 +4629,7 @@
if (c == CHAR_NULL && cb->nestptr[0] != NULL)
{
ptr = cb->nestptr[0];
- cb->nestptr[0] = cb->nestptr[1];
+ cb->nestptr[0] = cb->nestptr[1];
cb->nestptr[1] = NULL;
c = *(++ptr);
}
@@ -7072,7 +7073,7 @@
#endif
/* The use of \C can be locked out. */
-
+
#ifdef NEVER_BACKSLASH_C
else if (escape == ESC_C)
{
@@ -7085,7 +7086,7 @@
*errorcodeptr = ERR83;
goto FAILED;
}
-#endif
+#endif
/* For the rest (including \X when Unicode properties are supported), we
can obtain the OP value by negating the escape value in the default
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2015-10-22 15:49:54 UTC (rev 392)
+++ code/trunk/testdata/testinput2 2015-10-25 17:35:34 UTC (rev 393)
@@ -4583,4 +4583,8 @@
/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I
+/((p(?'K/
+
+/((p(?'K/no_auto_capture
+
# End of testinput2
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2015-10-22 15:49:54 UTC (rev 392)
+++ code/trunk/testdata/testoutput2 2015-10-25 17:35:34 UTC (rev 393)
@@ -14668,4 +14668,10 @@
Last code unit = '}'
Subject length lower bound = 65535
+/((p(?'K/
+Failed: error 142 at offset 7: syntax error in subpattern name (missing terminator)
+
+/((p(?'K/no_auto_capture
+Failed: error 142 at offset 7: syntax error in subpattern name (missing terminator)
+
# End of testinput2