Revision: 1482
http://vcs.pcre.org/viewvc?view=rev&revision=1482
Author: ph10
Date: 2014-05-28 12:10:58 +0100 (Wed, 28 May 2014)
Log Message:
-----------
Fix two bugs concerned with duplicate named patterns.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_compile.c
code/trunk/testdata/testinput1
code/trunk/testdata/testoutput1
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2014-05-27 18:24:42 UTC (rev 1481)
+++ code/trunk/ChangeLog 2014-05-28 11:10:58 UTC (rev 1482)
@@ -45,7 +45,21 @@
^\w+(?>\s*)(?<=\w) which caused it not to match "test test".
9. Give a compile-time error for \o{} (as Perl does) and for \x{} (which Perl
- doesn't).
+ doesn't).
+
+10. Change 8.34/15 introduced a bug that caused the amount of memory needed
+ to hold a pattern to be incorrectly computed (too small) when there were
+ named back references to duplicated names. This could cause "internal
+ error: code overflow" or "double free or corruption" or other memory
+ handling errors.
+
+11. When named subpatterns had the same prefixes, back references could be
+ confused. For example, in this pattern:
+
+ /(?P<Name>a)?(?P<Name2>b)?(?(<Name>)c|d)*l/
+
+ the reference to 'Name' was incorrectly treated as a reference to a
+ duplicate name.
Version 8.35 04-April-2014
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2014-05-27 18:24:42 UTC (rev 1481)
+++ code/trunk/pcre_compile.c 2014-05-28 11:10:58 UTC (rev 1482)
@@ -5982,7 +5982,7 @@
just adjust the length as if we had. Do some paranoid checks for
potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit
integer type when available, otherwise double. */
-
+
if (lengthptr != NULL)
{
int delta = (repeat_min - 1)*length_prevgroup;
@@ -6700,7 +6700,8 @@
ptr++;
}
namelen = (int)(ptr - name);
- if (lengthptr != NULL) *lengthptr += IMM2_SIZE;
+ if (lengthptr != NULL && (options & PCRE_DUPNAMES) != 0)
+ *lengthptr += IMM2_SIZE;
}
/* Check the terminator */
@@ -6761,9 +6762,11 @@
for (; i < cd->names_found; i++)
{
slot += cd->name_entry_size;
- if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) != 0) break;
+ if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) != 0 ||
+ (slot+IMM2_SIZE)[namelen] != 0) break;
count++;
}
+
if (count > 1)
{
PUT2(code, 2+LINK_SIZE, offset);
@@ -7112,6 +7115,12 @@
/* Count named back references. */
if (!is_recurse) cd->namedrefcount++;
+
+ /* If duplicate names are permitted, we have to allow for a named
+ reference to a duplicated name (this cannot be determined until the
+ second pass). This needs an extra 16-bit data item. */
+
+ if ((options & PCRE_DUPNAMES) != 0) *lengthptr += IMM2_SIZE;
}
/* In the real compile, search the name table. We check the name
@@ -7158,10 +7167,12 @@
for (i++; i < cd->names_found; i++)
{
if (STRCMP_UC_UC(slot + IMM2_SIZE, cslot + IMM2_SIZE) != 0) break;
+
+
count++;
cslot += cd->name_entry_size;
}
-
+
if (count > 1)
{
if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1 2014-05-27 18:24:42 UTC (rev 1481)
+++ code/trunk/testdata/testinput1 2014-05-28 11:10:58 UTC (rev 1482)
@@ -5684,4 +5684,25 @@
/^\w+(?>\s*)(?<=\w)/
test test
+/(?P<same>a)(?P<same>b)/gJ
+ abbaba
+
+/(?P<same>a)(?P<same>b)(?P=same)/gJ
+ abbaba
+
+/(?P=same)?(?P<same>a)(?P<same>b)/gJ
+ abbaba
+
+/(?:(?P=same)?(?:(?P<same>a)|(?P<same>b))(?P=same))+/gJ
+ bbbaaabaabb
+
+/(?:(?P=same)?(?:(?P=same)(?P<same>a)(?P=same)|(?P=same)?(?P<same>b)(?P=same)){2}(?P=same)(?P<same>c)(?P=same)){2}(?P<same>z)?/gJ
+ bbbaaaccccaaabbbcc
+
+/(?P<Name>a)?(?P<Name2>b)?(?(<Name>)c|d)*l/
+ acl
+ bdl
+ adl
+ bcl
+
/-- End of testinput1 --/
Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1 2014-05-27 18:24:42 UTC (rev 1481)
+++ code/trunk/testdata/testoutput1 2014-05-28 11:10:58 UTC (rev 1482)
@@ -9339,4 +9339,54 @@
test test
0: tes
+/(?P<same>a)(?P<same>b)/gJ
+ abbaba
+ 0: ab
+ 1: a
+ 2: b
+ 0: ab
+ 1: a
+ 2: b
+
+/(?P<same>a)(?P<same>b)(?P=same)/gJ
+ abbaba
+ 0: aba
+ 1: a
+ 2: b
+
+/(?P=same)?(?P<same>a)(?P<same>b)/gJ
+ abbaba
+ 0: ab
+ 1: a
+ 2: b
+ 0: ab
+ 1: a
+ 2: b
+
+/(?:(?P=same)?(?:(?P<same>a)|(?P<same>b))(?P=same))+/gJ
+ bbbaaabaabb
+ 0: bbbaaaba
+ 1: a
+ 2: b
+ 0: bb
+ 1: <unset>
+ 2: b
+
+/(?:(?P=same)?(?:(?P=same)(?P<same>a)(?P=same)|(?P=same)?(?P<same>b)(?P=same)){2}(?P=same)(?P<same>c)(?P=same)){2}(?P<same>z)?/gJ
+ bbbaaaccccaaabbbcc
+No match
+
+/(?P<Name>a)?(?P<Name2>b)?(?(<Name>)c|d)*l/
+ acl
+ 0: acl
+ 1: a
+ bdl
+ 0: bdl
+ 1: <unset>
+ 2: b
+ adl
+ 0: dl
+ bcl
+ 0: l
+
/-- End of testinput1 --/