Revision: 1295
http://www.exim.org/viewvc/pcre2?view=rev&revision=1295
Author: ph10
Date: 2021-02-01 17:56:12 +0000 (Mon, 01 Feb 2021)
Log Message:
-----------
Fix some numerical checking bugs, Bugzilla 2690.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_compile.c
code/trunk/testdata/testinput1
code/trunk/testdata/testinput11
code/trunk/testdata/testinput2
code/trunk/testdata/testinput9
code/trunk/testdata/testoutput1
code/trunk/testdata/testoutput11-16
code/trunk/testdata/testoutput11-32
code/trunk/testdata/testoutput2
code/trunk/testdata/testoutput9
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2021-01-14 17:14:58 UTC (rev 1294)
+++ code/trunk/ChangeLog 2021-02-01 17:56:12 UTC (rev 1295)
@@ -16,7 +16,16 @@
3. An alternative patch for CMakeLists.txt because 10.36 # 4 breaks CMake on
Windows. Patch from email@??? fixes bugzilla #2688.
+4. Two bugs related to over-large numbers have been fixed so the behaviour is
+now the same as Perl.
+
+ (a) A pattern such as /\214748364/ gave an overflow error instead of being
+ treated as the octal number \214 followed by literal digits.
+
+ (b) A sequence such as {65536 that has no terminating } so is not a
+ quantifier was nevertheless complaining that a quantifier number was too big.
+
Version 10.36 04-December-2020
------------------------------
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2021-01-14 17:14:58 UTC (rev 1294)
+++ code/trunk/src/pcre2_compile.c 2021-02-01 17:56:12 UTC (rev 1295)
@@ -1398,32 +1398,47 @@
read_repeat_counts(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *minp,
uint32_t *maxp, int *errorcodeptr)
{
-PCRE2_SPTR p = *ptrptr;
+PCRE2_SPTR p;
BOOL yield = FALSE;
+BOOL had_comma = FALSE;
int32_t min = 0;
int32_t max = REPEAT_UNLIMITED; /* This value is larger than MAX_REPEAT_COUNT */
-/* NB read_number() initializes the error code to zero. The only error is for a
-number that is too big. */
+/* Check the syntax */
+*errorcodeptr = 0;
+for (p = *ptrptr;; p++)
+ {
+ uint32_t c;
+ if (p >= ptrend) return FALSE;
+ c = *p;
+ if (IS_DIGIT(c)) continue;
+ if (c == CHAR_RIGHT_CURLY_BRACKET) break;
+ if (c == CHAR_COMMA)
+ {
+ if (had_comma) return FALSE;
+ had_comma = TRUE;
+ }
+ else return FALSE;
+ }
+
+/* The only error from read_number() is for a number that is too big. */
+
+p = *ptrptr;
if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &min, errorcodeptr))
goto EXIT;
-if (p >= ptrend) goto EXIT;
-
if (*p == CHAR_RIGHT_CURLY_BRACKET)
{
p++;
max = min;
}
-
else
{
- if (*p++ != CHAR_COMMA || p >= ptrend) goto EXIT;
- if (*p != CHAR_RIGHT_CURLY_BRACKET)
+ if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
{
if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &max,
- errorcodeptr) || p >= ptrend || *p != CHAR_RIGHT_CURLY_BRACKET)
+ errorcodeptr))
goto EXIT;
if (max < min)
{
@@ -1438,11 +1453,10 @@
if (minp != NULL) *minp = (uint32_t)min;
if (maxp != NULL) *maxp = (uint32_t)max;
-/* Update the pattern pointer on success, or after an error, but not when
-the result is "not a repeat quantifier". */
+/* Update the pattern pointer */
EXIT:
-if (yield || *errorcodeptr != 0) *ptrptr = p;
+*ptrptr = p;
return yield;
}
@@ -1776,19 +1790,23 @@
{
oldptr = ptr;
ptr--; /* Back to the digit */
- if (!read_number(&ptr, ptrend, -1, INT_MAX/10 - 1, ERR61, &s,
- errorcodeptr))
- break;
- /* \1 to \9 are always back references. \8x and \9x are too; \1x to \7x
+ /* As we know we are at a digit, the only possible error from
+ read_number() is a number that is too large to be a group number. In this
+ case we fall through handle this as not a group reference. If we have
+ read a small enough number, check for a back reference.
+
+ \1 to \9 are always back references. \8x and \9x are too; \1x to \7x
are octal escapes if there are not that many previous captures. */
- if (s < 10 || oldptr[-1] >= CHAR_8 || s <= (int)cb->bracount)
+ if (read_number(&ptr, ptrend, -1, INT_MAX/10 - 1, 0, &s, errorcodeptr) &&
+ (s < 10 || oldptr[-1] >= CHAR_8 || s <= (int)cb->bracount))
{
if (s > (int)MAX_GROUP_NUMBER) *errorcodeptr = ERR61;
else escape = -s; /* Indicates a back reference */
break;
}
+
ptr = oldptr; /* Put the pointer back and fall through */
}
Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1 2021-01-14 17:14:58 UTC (rev 1294)
+++ code/trunk/testdata/testinput1 2021-02-01 17:56:12 UTC (rev 1295)
@@ -6420,4 +6420,13 @@
/(?(DEFINE)(?<foo>bar))(?<![-a-z0-9])word/
word
+/a{1,2,3}b/
+ a{1,2,3}b
+
+/\214748364/
+ >\x{8c}748364<
+
+/a{65536/
+ >a{65536<
+
# End of testinput1
Modified: code/trunk/testdata/testinput11
===================================================================
--- code/trunk/testdata/testinput11 2021-01-14 17:14:58 UTC (rev 1294)
+++ code/trunk/testdata/testinput11 2021-02-01 17:56:12 UTC (rev 1295)
@@ -368,4 +368,7 @@
ab\xFFAz
ab\x{80000041}z
+/(?i:A{1,}\6666666666)/
+ A\x{1b6}6666666
+
# End of testinput11
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2021-01-14 17:14:58 UTC (rev 1294)
+++ code/trunk/testdata/testinput2 2021-02-01 17:56:12 UTC (rev 1295)
@@ -2189,8 +2189,6 @@
/a(*MARK)b/
-/(?i:A{1,}\6666666666)/
-
/\g6666666666/
/[\g6666666666]/B
Modified: code/trunk/testdata/testinput9
===================================================================
--- code/trunk/testdata/testinput9 2021-01-14 17:14:58 UTC (rev 1294)
+++ code/trunk/testdata/testinput9 2021-02-01 17:56:12 UTC (rev 1295)
@@ -260,4 +260,7 @@
/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/
+/(?i:A{1,}\6666666666)/
+ A\x{1b6}6666666
+
# End of testinput9
Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1 2021-01-14 17:14:58 UTC (rev 1294)
+++ code/trunk/testdata/testoutput1 2021-02-01 17:56:12 UTC (rev 1295)
@@ -10176,4 +10176,16 @@
word
0: word
+/a{1,2,3}b/
+ a{1,2,3}b
+ 0: a{1,2,3}b
+
+/\214748364/
+ >\x{8c}748364<
+ 0: \x8c748364
+
+/a{65536/
+ >a{65536<
+ 0: a{65536
+
# End of testinput1
Modified: code/trunk/testdata/testoutput11-16
===================================================================
--- code/trunk/testdata/testoutput11-16 2021-01-14 17:14:58 UTC (rev 1294)
+++ code/trunk/testdata/testoutput11-16 2021-02-01 17:56:12 UTC (rev 1295)
@@ -661,4 +661,8 @@
ab\xFFAz
ab\x{80000041}z
+/(?i:A{1,}\6666666666)/
+ A\x{1b6}6666666
+ 0: A\x{1b6}6666666
+
# End of testinput11
Modified: code/trunk/testdata/testoutput11-32
===================================================================
--- code/trunk/testdata/testoutput11-32 2021-01-14 17:14:58 UTC (rev 1294)
+++ code/trunk/testdata/testoutput11-32 2021-02-01 17:56:12 UTC (rev 1295)
@@ -667,4 +667,8 @@
ab\x{80000041}z
0: ab\x{80000041}z
+/(?i:A{1,}\6666666666)/
+ A\x{1b6}6666666
+ 0: A\x{1b6}6666666
+
# End of testinput11
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2021-01-14 17:14:58 UTC (rev 1294)
+++ code/trunk/testdata/testoutput2 2021-02-01 17:56:12 UTC (rev 1295)
@@ -8374,9 +8374,6 @@
/a(*MARK)b/
Failed: error 166 at offset 7: (*MARK) must have an argument
-/(?i:A{1,}\6666666666)/
-Failed: error 161 at offset 19: subpattern number is too big
-
/\g6666666666/
Failed: error 161 at offset 7: subpattern number is too big
Modified: code/trunk/testdata/testoutput9
===================================================================
--- code/trunk/testdata/testoutput9 2021-01-14 17:14:58 UTC (rev 1294)
+++ code/trunk/testdata/testoutput9 2021-02-01 17:56:12 UTC (rev 1295)
@@ -367,4 +367,8 @@
/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/
Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
+/(?i:A{1,}\6666666666)/
+Failed: error 151 at offset 13: octal value is greater than \377 in 8-bit non-UTF-8 mode
+ A\x{1b6}6666666
+
# End of testinput9