Revision: 412
http://www.exim.org/viewvc/pcre2?view=rev&revision=412
Author: ph10
Date: 2015-11-03 20:04:23 +0000 (Tue, 03 Nov 2015)
Log Message:
-----------
Check subpattern name lengths dynamically.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_compile.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-11-03 19:14:31 UTC (rev 411)
+++ code/trunk/ChangeLog 2015-11-03 20:04:23 UTC (rev 412)
@@ -260,8 +260,8 @@
75. Give an error in pcre2_substitute() if a match ends before it starts (as a
result of the use of \K).
-76. Check the length of the name in (*MARK:xx) etc. dynamically to avoid the
-possibility of integer overflow.
+76. Check the length of subpattern names and the names in (*MARK:xx) etc.
+dynamically to avoid the possibility of integer overflow.
Version 10.20 30-June-2015
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2015-11-03 19:14:31 UTC (rev 411)
+++ code/trunk/src/pcre2_compile.c 2015-11-03 20:04:23 UTC (rev 412)
@@ -3002,6 +3002,31 @@
/*************************************************
+* Macro for the next two functions *
+*************************************************/
+
+/* Both scan_for_captures() and compile_branch() use this macro to generate a
+fragment of code that reads the characters of a name and sets its length
+(checking for not being too long). Count the characters dynamically, to avoid
+the possibility of integer overflow. The same macro is used for reading *VERB
+names. */
+
+#define READ_NAME(ctype, errno, errset) \
+ namelen = 0; \
+ while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype) != 0) \
+ { \
+ ptr++; \
+ namelen++; \
+ if (namelen > MAX_NAME_SIZE) \
+ { \
+ errset = errno; \
+ goto FAILED; \
+ } \
+ }
+
+
+
+/*************************************************
* Scan regex to identify named groups *
*************************************************/
@@ -3459,8 +3484,8 @@
goto FAILED;
}
- while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) != 0) ptr++;
- namelen = (int)(ptr - name);
+ /* Advance ptr, set namelen and check its length. */
+ READ_NAME(ctype_word, ERR48, errorcode);
if (*ptr != c)
{
@@ -3475,14 +3500,7 @@
}
if (namelen + IMM2_SIZE + 1 > cb->name_entry_size)
- {
cb->name_entry_size = namelen + IMM2_SIZE + 1;
- if (namelen > MAX_NAME_SIZE)
- {
- errorcode = ERR48;
- goto FAILED;
- }
- }
/* We have a valid name for this capturing group. */
@@ -5602,9 +5620,11 @@
PCRE2_SPTR arg = NULL;
previous = NULL;
ptr++;
- while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_letter) != 0) ptr++;
- namelen = (int)(ptr - name);
+ /* Increment ptr, set namelen, check length */
+
+ READ_NAME(ctype_letter, ERR60, *errorcodeptr);
+
/* It appears that Perl allows any characters whatsoever, other than
a closing parenthesis, to appear in arguments, so we no longer insist on
letters, digits, and underscores. Perl does not, however, do any
@@ -6008,12 +6028,9 @@
*errorcodeptr = ERR28; /* Assertion expected */
goto FAILED;
}
- name = ptr++;
- while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) != 0)
- {
- ptr++;
- }
- namelen = (int)(ptr - name);
+ name = ptr;
+ /* Increment ptr, set namelen, check length */
+ READ_NAME(ctype_word, ERR48, *errorcodeptr);
if (lengthptr != NULL) skipunits += IMM2_SIZE;
}
@@ -6396,8 +6413,8 @@
*errorcodeptr = ERR44; /* Group name must start with non-digit */
goto FAILED;
}
- while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) != 0) ptr++;
- namelen = (int)(ptr - name);
+ /* Increment ptr, set namelen, check length */
+ READ_NAME(ctype_word, ERR48, *errorcodeptr);
/* In the pre-compile phase, do a syntax check. */
@@ -6413,11 +6430,6 @@
*errorcodeptr = ERR42;
goto FAILED;
}
- if (namelen > MAX_NAME_SIZE)
- {
- *errorcodeptr = ERR48;
- goto FAILED;
- }
}
/* Scan the list of names generated in the pre-pass in order to get
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2015-11-03 19:14:31 UTC (rev 411)
+++ code/trunk/testdata/testinput2 2015-11-03 20:04:23 UTC (rev 412)
@@ -4598,5 +4598,9 @@
/(?=a\K)/replace=z
BaCaD
+
+/(?'abcdefghijklmnopqrstuvwxyzABCDEFG'toolong)/
+
+/(?'abcdefghijklmnopqrstuvwxyzABCDEF'justright)/
# End of testinput2
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2015-11-03 19:14:31 UTC (rev 411)
+++ code/trunk/testdata/testoutput2 2015-11-03 20:04:23 UTC (rev 412)
@@ -14693,5 +14693,10 @@
/(?=a\K)/replace=z
BaCaD
Failed: error -60: match with end before start is not supported
+
+/(?'abcdefghijklmnopqrstuvwxyzABCDEFG'toolong)/
+Failed: error 148 at offset 36: subpattern name is too long (maximum 32 characters)
+
+/(?'abcdefghijklmnopqrstuvwxyzABCDEF'justright)/
# End of testinput2