Revision: 1107
http://www.exim.org/viewvc/pcre2?view=rev&revision=1107
Author: ph10
Date: 2019-06-16 16:37:45 +0100 (Sun, 16 Jun 2019)
Log Message:
-----------
Improve minimum length finder in the presence of back references when there are
multiple groups with the same number.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_study.c
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2019-06-15 15:51:07 UTC (rev 1106)
+++ code/trunk/ChangeLog 2019-06-16 15:37:45 UTC (rev 1107)
@@ -31,11 +31,17 @@
9. Some changes to the way the minimum subject length is handled:
* When PCRE2_NO_START_OPTIMIZE is set, no minimum length is computed;
- pcre2test no longer shows a value (of zero).
+ pcre2test omits this item instead of showing a value of zero.
* When no minimum length is set by the normal scan, but a first and/or last
code unit is recorded, set the minimum to 1 or 2 as appropriate.
+ * When a pattern contains multiple groups with the same number, a back
+ reference cannot know which one to scan for a minimum length. This used to
+ cause the minimum length finder to give up with no result. Now it treats
+ such references as not adding to the minimum length (which it should have
+ done all along).
+
10. A (*MARK) value inside a successful condition was not being returned by the
interpretive matcher (it was returned by JIT). This bug has been mended.
Modified: code/trunk/src/pcre2_study.c
===================================================================
--- code/trunk/src/pcre2_study.c 2019-06-15 15:51:07 UTC (rev 1106)
+++ code/trunk/src/pcre2_study.c 2019-06-16 15:37:45 UTC (rev 1107)
@@ -92,7 +92,6 @@
-1 \C in UTF-8 mode
or (*ACCEPT)
or pattern too complicated
- or back reference to duplicate name/number
-2 internal error (missing capturing bracket)
-3 internal error (opcode not listed)
*/
@@ -135,7 +134,7 @@
int d, min, recno;
PCRE2_UCHAR *cs, *ce;
PCRE2_UCHAR op = *cc;
-
+
if (branchlength >= UINT16_MAX) return UINT16_MAX;
switch (op)
@@ -452,12 +451,12 @@
that case we must set the minimum length to zero. */
/* Duplicate named pattern back reference. We cannot reliably find a length
- for this if duplicate numbers are present in the pattern. */
+ for this if duplicate numbers are present in the pattern, so we set the
+ length to zero here also. */
case OP_DNREF:
case OP_DNREFI:
- if (dupcapused) return -1;
- if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
+ if (!dupcapused && (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
{
int count = GET2(cc, 1+IMM2_SIZE);
PCRE2_UCHAR *slot =
@@ -524,7 +523,6 @@
case OP_REF:
case OP_REFI:
- if (dupcapused) return -1;
recno = GET2(cc, 1);
if (recno <= backref_cache[0] && backref_cache[recno] >= 0)
d = backref_cache[recno];
@@ -531,7 +529,7 @@
else
{
int i;
- if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
+ if (!dupcapused && (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
{
ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno);
if (cs == NULL) return -2;
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2019-06-15 15:51:07 UTC (rev 1106)
+++ code/trunk/testdata/testoutput2 2019-06-16 15:37:45 UTC (rev 1107)
@@ -14607,7 +14607,7 @@
Capture group count = 1
Max back reference = 1
Starting code units: a b
-Subject length lower bound = 0
+Subject length lower bound = 1
/(?|(aaa)|(b))(?1)/I
Capture group count = 1
@@ -14625,7 +14625,7 @@
Named capture groups:
a 1
Starting code units: a b
-Subject length lower bound = 0
+Subject length lower bound = 1
/(?|(?'a'aaa)|(?'a'b))(?'a'cccc)\k'a'/I,dupnames
Capture group count = 2
@@ -14636,7 +14636,7 @@
Options: dupnames
Starting code units: a b
Last code unit = 'c'
-Subject length lower bound = 1
+Subject length lower bound = 5
/ab{3cd/
ab{3cd