[Pcre-svn] [1295] code/trunk: Fix some numerical checking bu…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [1295] code/trunk: Fix some numerical checking bugs, Bugzilla 2690.
Revision: 1295
          http://www.exim.org/viewvc/pcre2?view=rev&revision=1295
Author:   ph10
Date:     2021-02-01 17:56:12 +0000 (Mon, 01 Feb 2021)
Log Message:
-----------
Fix some numerical checking bugs, Bugzilla 2690.


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/src/pcre2_compile.c
    code/trunk/testdata/testinput1
    code/trunk/testdata/testinput11
    code/trunk/testdata/testinput2
    code/trunk/testdata/testinput9
    code/trunk/testdata/testoutput1
    code/trunk/testdata/testoutput11-16
    code/trunk/testdata/testoutput11-32
    code/trunk/testdata/testoutput2
    code/trunk/testdata/testoutput9


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2021-01-14 17:14:58 UTC (rev 1294)
+++ code/trunk/ChangeLog    2021-02-01 17:56:12 UTC (rev 1295)
@@ -16,7 +16,16 @@
 3. An alternative patch for CMakeLists.txt because 10.36 # 4 breaks CMake on
 Windows. Patch from email@??? fixes bugzilla #2688.


+4. Two bugs related to over-large numbers have been fixed so the behaviour is
+now the same as Perl.
+
+ (a) A pattern such as /\214748364/ gave an overflow error instead of being
+ treated as the octal number \214 followed by literal digits.
+
+ (b) A sequence such as {65536 that has no terminating } so is not a
+ quantifier was nevertheless complaining that a quantifier number was too big.

+
Version 10.36 04-December-2020
------------------------------


Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c    2021-01-14 17:14:58 UTC (rev 1294)
+++ code/trunk/src/pcre2_compile.c    2021-02-01 17:56:12 UTC (rev 1295)
@@ -1398,32 +1398,47 @@
 read_repeat_counts(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *minp,
   uint32_t *maxp, int *errorcodeptr)
 {
-PCRE2_SPTR p = *ptrptr;
+PCRE2_SPTR p;
 BOOL yield = FALSE;
+BOOL had_comma = FALSE;
 int32_t min = 0;
 int32_t max = REPEAT_UNLIMITED; /* This value is larger than MAX_REPEAT_COUNT */


-/* NB read_number() initializes the error code to zero. The only error is for a
-number that is too big. */
+/* Check the syntax */

+*errorcodeptr = 0;
+for (p = *ptrptr;; p++)
+  {
+  uint32_t c;
+  if (p >= ptrend) return FALSE;
+  c = *p;
+  if (IS_DIGIT(c)) continue;
+  if (c == CHAR_RIGHT_CURLY_BRACKET) break;
+  if (c == CHAR_COMMA)
+    {
+    if (had_comma) return FALSE;
+    had_comma = TRUE;
+    }
+  else return FALSE;   
+  }
+
+/* The only error from read_number() is for a number that is too big. */
+
+p = *ptrptr;
 if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &min, errorcodeptr))
   goto EXIT;


-if (p >= ptrend) goto EXIT;
-
 if (*p == CHAR_RIGHT_CURLY_BRACKET)
   {
   p++;
   max = min;
   }
-
 else
   {
-  if (*p++ != CHAR_COMMA || p >= ptrend) goto EXIT;
-  if (*p != CHAR_RIGHT_CURLY_BRACKET)
+  if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
     {
     if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &max,
-        errorcodeptr) || p >= ptrend ||  *p != CHAR_RIGHT_CURLY_BRACKET)
+        errorcodeptr))
       goto EXIT;
     if (max < min)
       {
@@ -1438,11 +1453,10 @@
 if (minp != NULL) *minp = (uint32_t)min;
 if (maxp != NULL) *maxp = (uint32_t)max;


-/* Update the pattern pointer on success, or after an error, but not when
-the result is "not a repeat quantifier". */
+/* Update the pattern pointer */

EXIT:
-if (yield || *errorcodeptr != 0) *ptrptr = p;
+*ptrptr = p;
return yield;
}

@@ -1776,19 +1790,23 @@
       {
       oldptr = ptr;
       ptr--;   /* Back to the digit */
-      if (!read_number(&ptr, ptrend, -1, INT_MAX/10 - 1, ERR61, &s,
-          errorcodeptr))
-        break;


-      /* \1 to \9 are always back references. \8x and \9x are too; \1x to \7x
+      /* As we know we are at a digit, the only possible error from
+      read_number() is a number that is too large to be a group number. In this
+      case we fall through handle this as not a group reference. If we have
+      read a small enough number, check for a back reference.
+
+      \1 to \9 are always back references. \8x and \9x are too; \1x to \7x
       are octal escapes if there are not that many previous captures. */


-      if (s < 10 || oldptr[-1] >= CHAR_8 || s <= (int)cb->bracount)
+      if (read_number(&ptr, ptrend, -1, INT_MAX/10 - 1, 0, &s, errorcodeptr) &&
+          (s < 10 || oldptr[-1] >= CHAR_8 || s <= (int)cb->bracount))
         {
         if (s > (int)MAX_GROUP_NUMBER) *errorcodeptr = ERR61;
           else escape = -s;     /* Indicates a back reference */
         break;
         }
+
       ptr = oldptr;      /* Put the pointer back and fall through */
       }



Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1    2021-01-14 17:14:58 UTC (rev 1294)
+++ code/trunk/testdata/testinput1    2021-02-01 17:56:12 UTC (rev 1295)
@@ -6420,4 +6420,13 @@
 /(?(DEFINE)(?<foo>bar))(?<![-a-z0-9])word/
     word


+/a{1,2,3}b/
+    a{1,2,3}b
+
+/\214748364/
+    >\x{8c}748364<
+    
+/a{65536/
+    >a{65536<
+
 # End of testinput1 


Modified: code/trunk/testdata/testinput11
===================================================================
--- code/trunk/testdata/testinput11    2021-01-14 17:14:58 UTC (rev 1294)
+++ code/trunk/testdata/testinput11    2021-02-01 17:56:12 UTC (rev 1295)
@@ -368,4 +368,7 @@
     ab\xFFAz
     ab\x{80000041}z 


+/(?i:A{1,}\6666666666)/
+    A\x{1b6}6666666
+
 # End of testinput11


Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2021-01-14 17:14:58 UTC (rev 1294)
+++ code/trunk/testdata/testinput2    2021-02-01 17:56:12 UTC (rev 1295)
@@ -2189,8 +2189,6 @@


/a(*MARK)b/

-/(?i:A{1,}\6666666666)/
-
/\g6666666666/

/[\g6666666666]/B

Modified: code/trunk/testdata/testinput9
===================================================================
--- code/trunk/testdata/testinput9    2021-01-14 17:14:58 UTC (rev 1294)
+++ code/trunk/testdata/testinput9    2021-02-01 17:56:12 UTC (rev 1295)
@@ -260,4 +260,7 @@


/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/

+/(?i:A{1,}\6666666666)/
+    A\x{1b6}6666666
+
 # End of testinput9


Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1    2021-01-14 17:14:58 UTC (rev 1294)
+++ code/trunk/testdata/testoutput1    2021-02-01 17:56:12 UTC (rev 1295)
@@ -10176,4 +10176,16 @@
     word
  0: word


+/a{1,2,3}b/
+    a{1,2,3}b
+ 0: a{1,2,3}b
+
+/\214748364/
+    >\x{8c}748364<
+ 0: \x8c748364
+    
+/a{65536/
+    >a{65536<
+ 0: a{65536
+
 # End of testinput1 


Modified: code/trunk/testdata/testoutput11-16
===================================================================
--- code/trunk/testdata/testoutput11-16    2021-01-14 17:14:58 UTC (rev 1294)
+++ code/trunk/testdata/testoutput11-16    2021-02-01 17:56:12 UTC (rev 1295)
@@ -661,4 +661,8 @@
     ab\xFFAz
     ab\x{80000041}z 


+/(?i:A{1,}\6666666666)/
+    A\x{1b6}6666666
+ 0: A\x{1b6}6666666
+
 # End of testinput11


Modified: code/trunk/testdata/testoutput11-32
===================================================================
--- code/trunk/testdata/testoutput11-32    2021-01-14 17:14:58 UTC (rev 1294)
+++ code/trunk/testdata/testoutput11-32    2021-02-01 17:56:12 UTC (rev 1295)
@@ -667,4 +667,8 @@
     ab\x{80000041}z 
  0: ab\x{80000041}z


+/(?i:A{1,}\6666666666)/
+    A\x{1b6}6666666
+ 0: A\x{1b6}6666666
+
 # End of testinput11


Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2021-01-14 17:14:58 UTC (rev 1294)
+++ code/trunk/testdata/testoutput2    2021-02-01 17:56:12 UTC (rev 1295)
@@ -8374,9 +8374,6 @@
 /a(*MARK)b/
 Failed: error 166 at offset 7: (*MARK) must have an argument


-/(?i:A{1,}\6666666666)/
-Failed: error 161 at offset 19: subpattern number is too big
-
/\g6666666666/
Failed: error 161 at offset 7: subpattern number is too big


Modified: code/trunk/testdata/testoutput9
===================================================================
--- code/trunk/testdata/testoutput9    2021-01-14 17:14:58 UTC (rev 1294)
+++ code/trunk/testdata/testoutput9    2021-02-01 17:56:12 UTC (rev 1295)
@@ -367,4 +367,8 @@
 /(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/
 Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)


+/(?i:A{1,}\6666666666)/
+Failed: error 151 at offset 13: octal value is greater than \377 in 8-bit non-UTF-8 mode
+    A\x{1b6}6666666
+
 # End of testinput9