Revision: 1585
http://vcs.pcre.org/viewvc?view=rev&revision=1585
Author: ph10
Date: 2015-08-05 16:38:32 +0100 (Wed, 05 Aug 2015)
Log Message:
-----------
Fix buffer overflow for named references in (?| situations.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_compile.c
code/trunk/pcre_internal.h
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-08-01 09:30:02 UTC (rev 1584)
+++ code/trunk/ChangeLog 2015-08-05 15:38:32 UTC (rev 1585)
@@ -100,6 +100,12 @@
26. Arrange to give up on finding the minimum matching length for overly
complex patterns.
+
+27. Similar to (4) above: in a pattern with duplicated named groups and an
+ occurrence of (?| it is possible for an apparently non-recursive back
+ reference to become recursive if a later named group with the relevant
+ number is encountered. This could lead to a buffer overflow. Wen Guanxing
+ from Venustech ADLAB discovered this bug.
Version 8.37 28-April-2015
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2015-08-01 09:30:02 UTC (rev 1584)
+++ code/trunk/pcre_compile.c 2015-08-05 15:38:32 UTC (rev 1585)
@@ -6668,6 +6668,7 @@
/* ------------------------------------------------------------ */
case CHAR_VERTICAL_LINE: /* Reset capture count for each branch */
reset_bracount = TRUE;
+ cd->dupgroups = TRUE; /* Record (?| encountered */
/* Fall through */
/* ------------------------------------------------------------ */
@@ -7178,7 +7179,8 @@
if (lengthptr != NULL)
{
named_group *ng;
-
+ recno = 0;
+
if (namelen == 0)
{
*errorcodeptr = ERR62;
@@ -7195,32 +7197,6 @@
goto FAILED;
}
- /* The name table does not exist in the first pass; instead we must
- scan the list of names encountered so far in order to get the
- number. If the name is not found, set the value to 0 for a forward
- reference. */
-
- recno = 0;
- ng = cd->named_groups;
- for (i = 0; i < cd->names_found; i++, ng++)
- {
- if (namelen == ng->length &&
- STRNCMP_UC_UC(name, ng->name, namelen) == 0)
- {
- open_capitem *oc;
- recno = ng->number;
- if (is_recurse) break;
- for (oc = cd->open_caps; oc != NULL; oc = oc->next)
- {
- if (oc->number == recno)
- {
- oc->flag = TRUE;
- break;
- }
- }
- }
- }
-
/* Count named back references. */
if (!is_recurse) cd->namedrefcount++;
@@ -7242,7 +7218,44 @@
issue is fixed "properly" in PCRE2. As PCRE1 is now in maintenance
only mode, we finesse the bug by allowing more memory always. */
- /* if (recno == 0) */ *lengthptr += 2 + 2*LINK_SIZE;
+ *lengthptr += 2 + 2*LINK_SIZE;
+
+ /* It is even worse than that. The current reference may be to an
+ existing named group with a different number (so apparently not
+ recursive) but which later on is also attached to a group with the
+ current number. This can only happen if $(| has been previous
+ encountered. In that case, we allow yet more memory, just in case.
+ (Again, this is fixed "properly" in PCRE2. */
+
+ if (cd->dupgroups) *lengthptr += 2 + 2*LINK_SIZE;
+
+ /* Otherwise, check for recursion here. The name table does not exist
+ in the first pass; instead we must scan the list of names encountered
+ so far in order to get the number. If the name is not found, leave
+ the value of recno as 0 for a forward reference. */
+
+ else
+ {
+ ng = cd->named_groups;
+ for (i = 0; i < cd->names_found; i++, ng++)
+ {
+ if (namelen == ng->length &&
+ STRNCMP_UC_UC(name, ng->name, namelen) == 0)
+ {
+ open_capitem *oc;
+ recno = ng->number;
+ if (is_recurse) break;
+ for (oc = cd->open_caps; oc != NULL; oc = oc->next)
+ {
+ if (oc->number == recno)
+ {
+ oc->flag = TRUE;
+ break;
+ }
+ }
+ }
+ }
+ }
}
/* In the real compile, search the name table. We check the name
@@ -7289,8 +7302,6 @@
for (i++; i < cd->names_found; i++)
{
if (STRCMP_UC_UC(slot + IMM2_SIZE, cslot + IMM2_SIZE) != 0) break;
-
-
count++;
cslot += cd->name_entry_size;
}
@@ -9239,6 +9250,7 @@
cd->name_entry_size = 0;
cd->name_table = NULL;
cd->dupnames = FALSE;
+cd->dupgroups = FALSE;
cd->namedrefcount = 0;
cd->start_code = cworkspace;
cd->hwm = cworkspace;
@@ -9273,7 +9285,7 @@
DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
(int)(cd->hwm - cworkspace)));
-
+
if (length > MAX_PATTERN_SIZE)
{
errorcode = ERR20;
Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h 2015-08-01 09:30:02 UTC (rev 1584)
+++ code/trunk/pcre_internal.h 2015-08-05 15:38:32 UTC (rev 1585)
@@ -2454,6 +2454,7 @@
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
BOOL check_lookbehind; /* Lookbehinds need later checking */
BOOL dupnames; /* Duplicate names exist */
+ BOOL dupgroups; /* Duplicate groups exist: (?| found */
BOOL iscondassert; /* Next assert is a condition */
int nltype; /* Newline type */
int nllen; /* Newline string length */
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2015-08-01 09:30:02 UTC (rev 1584)
+++ code/trunk/testdata/testinput2 2015-08-05 15:38:32 UTC (rev 1585)
@@ -4194,4 +4194,6 @@
/(?1){3918}(((((0(\k'R'))))(?J)(?'R'(?'R'\3){99})))/I
+/(?J:(?|(:(?|(?'R')(\k'R')|((?'R')))H'Rk'Rf)|s(?'R')))/
+
/-- End of testinput2 --/
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2015-08-01 09:30:02 UTC (rev 1584)
+++ code/trunk/testdata/testoutput2 2015-08-05 15:38:32 UTC (rev 1585)
@@ -14537,4 +14537,6 @@
No first char
Need char = '0'
+/(?J:(?|(:(?|(?'R')(\k'R')|((?'R')))H'Rk'Rf)|s(?'R')))/
+
/-- End of testinput2 --/