Revision: 1361
http://vcs.pcre.org/viewvc?view=rev&revision=1361
Author: ph10
Date: 2013-09-06 18:47:32 +0100 (Fri, 06 Sep 2013)
Log Message:
-----------
Make back references to duplicated named subpatterns more like Perl.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/doc/pcrepattern.3
code/trunk/pcre_compile.c
code/trunk/pcre_dfa_exec.c
code/trunk/pcre_exec.c
code/trunk/pcre_internal.h
code/trunk/pcre_printint.c
code/trunk/pcre_study.c
code/trunk/testdata/saved16
code/trunk/testdata/saved16BE-1
code/trunk/testdata/saved16BE-2
code/trunk/testdata/saved16LE-1
code/trunk/testdata/saved16LE-2
code/trunk/testdata/saved32
code/trunk/testdata/saved32BE-1
code/trunk/testdata/saved32BE-2
code/trunk/testdata/saved32LE-1
code/trunk/testdata/saved32LE-2
code/trunk/testdata/saved8
code/trunk/testdata/testinput1
code/trunk/testdata/testinput2
code/trunk/testdata/testinput21
code/trunk/testdata/testinput22
code/trunk/testdata/testoutput1
code/trunk/testdata/testoutput2
code/trunk/testdata/testoutput21-16
code/trunk/testdata/testoutput21-32
code/trunk/testdata/testoutput22-16
code/trunk/testdata/testoutput22-32
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2013-09-03 10:25:39 UTC (rev 1360)
+++ code/trunk/ChangeLog 2013-09-06 17:47:32 UTC (rev 1361)
@@ -74,6 +74,11 @@
compile happens. This has simplified the code (it is now nearly 150 lines
shorter) and prepared the way for better handling of references to groups
with duplicate names.
+
+15. A back reference to a named subpattern when there is more than one of the
+ same name now checks them in the order in which they appear in the pattern.
+ The first one that is set is used for the reference. Previously only the
+ first one was inspected. This change makes PCRE more compatible with Perl.
Version 8.33 28-May-2013
Modified: code/trunk/doc/pcrepattern.3
===================================================================
--- code/trunk/doc/pcrepattern.3 2013-09-03 10:25:39 UTC (rev 1360)
+++ code/trunk/doc/pcrepattern.3 2013-09-06 17:47:32 UTC (rev 1361)
@@ -1,4 +1,4 @@
-.TH PCREPATTERN 3 "26 April 2013" "PCRE 8.33"
+.TH PCREPATTERN 3 "06 September 2013" "PCRE 8.34"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "PCRE REGULAR EXPRESSION DETAILS"
@@ -1577,9 +1577,20 @@
matched. This saves searching to find which numbered subpattern it was.
.P
If you make a back reference to a non-unique named subpattern from elsewhere in
-the pattern, the one that corresponds to the first occurrence of the name is
-used. In the absence of duplicate numbers (see the previous section) this is
-the one with the lowest number. If you use a named reference in a condition
+the pattern, the subpatterns to which the name refers are checked in the order
+in which they appear in the overall pattern. The first one that is set is used
+for the reference. For example, this pattern matches both "foofoo" and
+"barbar" but not "foobar" or "barfoo":
+.sp
+ (?:(?<n>foo)|(?<n>bar))\k<n>
+.sp
+.P
+If you make a subroutine call to a non-unique named subpattern, the one that
+corresponds to the first occurrence of the name is used. In the absence of
+duplicate numbers (see the previous section) this is the one with the lowest
+number.
+.P
+If you use a named reference in a condition
test (see the
.\"
.\" HTML <a href="#conditions">
@@ -1599,8 +1610,9 @@
\fBWarning:\fP You cannot use different names to distinguish between two
subpatterns with the same number because PCRE uses only the numbers when
matching. For this reason, an error is given at compile time if different names
-are given to subpatterns with the same number. However, you can give the same
-name to subpatterns with the same number, even when PCRE_DUPNAMES is not set.
+are given to subpatterns with the same number. However, you can always give the
+same name to subpatterns with the same number, even when PCRE_DUPNAMES is not
+set.
.
.
.SH REPETITION
@@ -3145,6 +3157,6 @@
.rs
.sp
.nf
-Last updated: 26 April 2013
+Last updated: 06 September 2013
Copyright (c) 1997-2013 University of Cambridge.
.fi
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2013-09-03 10:25:39 UTC (rev 1360)
+++ code/trunk/pcre_compile.c 2013-09-06 17:47:32 UTC (rev 1361)
@@ -1722,6 +1722,8 @@
case OP_QUERYI:
case OP_REF:
case OP_REFI:
+ case OP_DNREF:
+ case OP_DNREFI:
case OP_SBRA:
case OP_SBRAPOS:
case OP_SCBRA:
@@ -4826,13 +4828,12 @@
/* If previous was a character class or a back reference, we put the repeat
stuff after it, but just skip the item if the repeat was {0,0}. */
- else if (*previous == OP_CLASS ||
- *previous == OP_NCLASS ||
+ else if (*previous == OP_CLASS || *previous == OP_NCLASS ||
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
*previous == OP_XCLASS ||
#endif
- *previous == OP_REF ||
- *previous == OP_REFI)
+ *previous == OP_REF || *previous == OP_REFI ||
+ *previous == OP_DNREF || *previous == OP_DNREFI)
{
if (repeat_max == 0)
{
@@ -5886,7 +5887,8 @@
{
*errorcodeptr = ERR43;
goto FAILED;
- }
+ }
+ cd->dupnames = TRUE; /* Duplicate names exist */
}
else if (ng->number == number)
{
@@ -5987,6 +5989,10 @@
break;
}
recno = (i < cd->names_found)? ng->number : 0;
+
+ /* Count named back references. */
+
+ if (!is_recurse) cd->namedrefcount++;
}
/* In the real compile, search the name table. We check the name
@@ -6016,12 +6022,66 @@
}
}
- /* In both phases, we can now go to the code than handles numerical
- recursion or backreferences. */
+ /* In both phases, for recursions, we can now go to the code than
+ handles numerical recursion. */
if (is_recurse) goto HANDLE_RECURSION;
- else goto HANDLE_REFERENCE;
+
+ /* In the second pass we must see if the name is duplicated. If so, we
+ generate a different opcode. */
+
+ if (lengthptr == NULL && cd->dupnames)
+ {
+ int count = 1;
+ unsigned int index = i;
+ pcre_uchar *cslot = slot + cd->name_entry_size;
+
+ for (i++; i < cd->names_found; i++)
+ {
+ if (STRCMP_UC_UC(slot + IMM2_SIZE, cslot + IMM2_SIZE) != 0) break;
+ count++;
+ cslot += cd->name_entry_size;
+ }
+ if (count > 1)
+ {
+ if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
+ previous = code;
+ *code++ = ((options & PCRE_CASELESS) != 0)? OP_DNREFI : OP_DNREF;
+ PUT2INC(code, 0, index);
+ PUT2INC(code, 0, count);
+
+ /* Process each potentially referenced group. */
+
+ for (; slot < cslot; slot += cd->name_entry_size)
+ {
+ open_capitem *oc;
+ recno = GET2(slot, 0);
+ cd->backref_map |= (recno < 32)? (1 << recno) : 1;
+ if (recno > cd->top_backref) cd->top_backref = recno;
+
+ /* Check to see if this back reference is recursive, that it, it
+ is inside the group that it references. A flag is set so that the
+ group can be made atomic. */
+
+ for (oc = cd->open_caps; oc != NULL; oc = oc->next)
+ {
+ if (oc->number == recno)
+ {
+ oc->flag = TRUE;
+ break;
+ }
+ }
+ }
+
+ continue; /* End of back ref handling */
+ }
+ }
+
+ /* First pass, or a non-duplicated name. */
+
+ goto HANDLE_REFERENCE;
+
/* ------------------------------------------------------------ */
case CHAR_R: /* Recursion */
@@ -6602,8 +6662,11 @@
{
open_capitem *oc;
recno = -escape;
+
+ /* Come here from named backref handling when the reference is to a
+ single group (i.e. not to a duplicated name. */
- HANDLE_REFERENCE: /* Come here from named backref handling */
+ HANDLE_REFERENCE:
if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
previous = code;
*code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;
@@ -7872,6 +7935,8 @@
cd->names_found = 0;
cd->name_entry_size = 0;
cd->name_table = NULL;
+cd->dupnames = FALSE;
+cd->namedrefcount = 0;
cd->start_code = cworkspace;
cd->hwm = cworkspace;
cd->start_workspace = cworkspace;
@@ -7909,14 +7974,23 @@
goto PCRE_EARLY_ERROR_RETURN;
}
-/* Compute the size of data block needed and get it, either from malloc or
-externally provided function. Integer overflow should no longer be possible
-because nowadays we limit the maximum value of cd->names_found and
-cd->name_entry_size. */
+/* If there are groups with duplicate names and there are also references by
+name, we must allow for the possibility of named references to duplicated
+groups. These require an extra data item each. */
-size = sizeof(REAL_PCRE) + (length + cd->names_found * cd->name_entry_size) * sizeof(pcre_uchar);
+if (cd->dupnames && cd->namedrefcount > 0)
+ length += cd->namedrefcount * IMM2_SIZE * sizeof(pcre_uchar);
+
+/* Compute the size of the data block for storing the compiled pattern. Integer
+overflow should no longer be possible because nowadays we limit the maximum
+value of cd->names_found and cd->name_entry_size. */
+
+size = sizeof(REAL_PCRE) +
+ (length + cd->names_found * cd->name_entry_size) * sizeof(pcre_uchar);
+
+/* Get the memory. */
+
re = (REAL_PCRE *)(PUBL(malloc))(size);
-
if (re == NULL)
{
errorcode = ERR21;
Modified: code/trunk/pcre_dfa_exec.c
===================================================================
--- code/trunk/pcre_dfa_exec.c 2013-09-03 10:25:39 UTC (rev 1360)
+++ code/trunk/pcre_dfa_exec.c 2013-09-06 17:47:32 UTC (rev 1361)
@@ -156,6 +156,8 @@
0, /* XCLASS - variable length */
0, /* REF */
0, /* REFI */
+ 0, /* DNREF */
+ 0, /* DNREFI */
0, /* RECURSE */
0, /* CALLOUT */
0, /* Alt */
@@ -225,6 +227,8 @@
1, /* XCLASS - variable length */
0, /* REF */
0, /* REFI */
+ 0, /* DNREF */
+ 0, /* DNREFI */
0, /* RECURSE */
0, /* CALLOUT */
0, /* Alt */
Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c 2013-09-03 10:25:39 UTC (rev 1360)
+++ code/trunk/pcre_exec.c 2013-09-06 17:47:32 UTC (rev 1361)
@@ -2742,16 +2742,8 @@
similar code to character type repeats - written out again for speed.
However, if the referenced string is the empty string, always treat
it as matched, any number of times (otherwise there could be infinite
- loops). */
+ loops). If the reference is unset, there are two possibilities:
- case OP_REF:
- case OP_REFI:
- caseless = op == OP_REFI;
- offset = GET2(ecode, 1) << 1; /* Doubled ref number */
- ecode += 1 + IMM2_SIZE;
-
- /* If the reference is unset, there are two possibilities:
-
(a) In the default, Perl-compatible state, set the length negative;
this ensures that every attempt at a match fails. We can't just fail
here, because of the possibility of quantifiers with zero minima.
@@ -2760,8 +2752,41 @@
so that the back reference matches an empty string.
Otherwise, set the length to the length of what was matched by the
- referenced subpattern. */
+ referenced subpattern.
+
+ The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
+ or to a non-duplicated named group. For a duplicated named group, OP_DNREF
+ and OP_DNREFI are used. In this case we must scan the list of groups to
+ which the name refers, and use the first one that is set. */
+
+ case OP_DNREF:
+ case OP_DNREFI:
+ caseless = op == OP_DNREFI;
+ {
+ int count = GET2(ecode, 1+IMM2_SIZE);
+ pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
+ ecode += 1 + 2*IMM2_SIZE;
+
+ while (count-- > 0)
+ {
+ offset = GET2(slot, 0) << 1;
+ if (offset < offset_top && md->offset_vector[offset] >= 0) break;
+ slot += md->name_entry_size;
+ }
+ if (count < 0)
+ length = (md->jscript_compat)? 0 : -1;
+ else
+ length = md->offset_vector[offset+1] - md->offset_vector[offset];
+ }
+ goto REF_REPEAT;
+ case OP_REF:
+ case OP_REFI:
+ caseless = op == OP_REFI;
+ offset = GET2(ecode, 1) << 1; /* Doubled ref number */
+ ecode += 1 + IMM2_SIZE;
+
+
if (offset >= offset_top || md->offset_vector[offset] < 0)
length = (md->jscript_compat)? 0 : -1;
else
@@ -2769,6 +2794,7 @@
/* Set up for repetition, or handle the non-repeated case */
+ REF_REPEAT:
switch (*ecode)
{
case OP_CRSTAR:
Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h 2013-09-03 10:25:39 UTC (rev 1360)
+++ code/trunk/pcre_internal.h 2013-09-06 17:47:32 UTC (rev 1361)
@@ -2055,79 +2055,81 @@
class. This does both positive and negative. */
OP_REF, /* 109 Match a back reference, casefully */
OP_REFI, /* 110 Match a back reference, caselessly */
- OP_RECURSE, /* 111 Match a numbered subpattern (possibly recursive) */
- OP_CALLOUT, /* 112 Call out to external function if provided */
+ OP_DNREF, /* 111 Match a duplicate name backref, casefully */
+ OP_DNREFI, /* 112 Match a duplicate name backref, caselessly */
+ OP_RECURSE, /* 113 Match a numbered subpattern (possibly recursive) */
+ OP_CALLOUT, /* 114 Call out to external function if provided */
- OP_ALT, /* 113 Start of alternation */
- OP_KET, /* 114 End of group that doesn't have an unbounded repeat */
- OP_KETRMAX, /* 115 These two must remain together and in this */
- OP_KETRMIN, /* 116 order. They are for groups the repeat for ever. */
- OP_KETRPOS, /* 117 Possessive unlimited repeat. */
+ OP_ALT, /* 115 Start of alternation */
+ OP_KET, /* 116 End of group that doesn't have an unbounded repeat */
+ OP_KETRMAX, /* 117 These two must remain together and in this */
+ OP_KETRMIN, /* 118 order. They are for groups the repeat for ever. */
+ OP_KETRPOS, /* 119 Possessive unlimited repeat. */
/* The assertions must come before BRA, CBRA, ONCE, and COND, and the four
asserts must remain in order. */
- OP_REVERSE, /* 118 Move pointer back - used in lookbehind assertions */
- OP_ASSERT, /* 119 Positive lookahead */
- OP_ASSERT_NOT, /* 120 Negative lookahead */
- OP_ASSERTBACK, /* 121 Positive lookbehind */
- OP_ASSERTBACK_NOT, /* 122 Negative lookbehind */
+ OP_REVERSE, /* 129 Move pointer back - used in lookbehind assertions */
+ OP_ASSERT, /* 121 Positive lookahead */
+ OP_ASSERT_NOT, /* 122 Negative lookahead */
+ OP_ASSERTBACK, /* 123 Positive lookbehind */
+ OP_ASSERTBACK_NOT, /* 124 Negative lookbehind */
/* ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately
after the assertions, with ONCE first, as there's a test for >= ONCE for a
subpattern that isn't an assertion. The POS versions must immediately follow
the non-POS versions in each case. */
- OP_ONCE, /* 123 Atomic group, contains captures */
- OP_ONCE_NC, /* 124 Atomic group containing no captures */
- OP_BRA, /* 125 Start of non-capturing bracket */
- OP_BRAPOS, /* 126 Ditto, with unlimited, possessive repeat */
- OP_CBRA, /* 127 Start of capturing bracket */
- OP_CBRAPOS, /* 128 Ditto, with unlimited, possessive repeat */
- OP_COND, /* 129 Conditional group */
+ OP_ONCE, /* 125 Atomic group, contains captures */
+ OP_ONCE_NC, /* 126 Atomic group containing no captures */
+ OP_BRA, /* 127 Start of non-capturing bracket */
+ OP_BRAPOS, /* 128 Ditto, with unlimited, possessive repeat */
+ OP_CBRA, /* 129 Start of capturing bracket */
+ OP_CBRAPOS, /* 130 Ditto, with unlimited, possessive repeat */
+ OP_COND, /* 131 Conditional group */
/* These five must follow the previous five, in the same order. There's a
check for >= SBRA to distinguish the two sets. */
- OP_SBRA, /* 130 Start of non-capturing bracket, check empty */
- OP_SBRAPOS, /* 131 Ditto, with unlimited, possessive repeat */
- OP_SCBRA, /* 132 Start of capturing bracket, check empty */
- OP_SCBRAPOS, /* 133 Ditto, with unlimited, possessive repeat */
- OP_SCOND, /* 134 Conditional group, check empty */
+ OP_SBRA, /* 132 Start of non-capturing bracket, check empty */
+ OP_SBRAPOS, /* 133 Ditto, with unlimited, possessive repeat */
+ OP_SCBRA, /* 134 Start of capturing bracket, check empty */
+ OP_SCBRAPOS, /* 135 Ditto, with unlimited, possessive repeat */
+ OP_SCOND, /* 136 Conditional group, check empty */
/* The next two pairs must (respectively) be kept together. */
- OP_CREF, /* 135 Used to hold a capture number as condition */
- OP_NCREF, /* 136 Same, but generated by a name reference*/
- OP_RREF, /* 137 Used to hold a recursion number as condition */
- OP_NRREF, /* 138 Same, but generated by a name reference*/
- OP_DEF, /* 139 The DEFINE condition */
+ OP_CREF, /* 137 Used to hold a capture number as condition */
+ OP_NCREF, /* 138 Same, but generated by a name reference*/
+ OP_RREF, /* 139 Used to hold a recursion number as condition */
+ OP_NRREF, /* 140 Same, but generated by a name reference*/
+ OP_DEF, /* 141 The DEFINE condition */
- OP_BRAZERO, /* 140 These two must remain together and in this */
- OP_BRAMINZERO, /* 141 order. */
- OP_BRAPOSZERO, /* 142 */
+ OP_BRAZERO, /* 142 These two must remain together and in this */
+ OP_BRAMINZERO, /* 143 order. */
+ OP_BRAPOSZERO, /* 144 */
/* These are backtracking control verbs */
- OP_MARK, /* 143 always has an argument */
- OP_PRUNE, /* 144 */
- OP_PRUNE_ARG, /* 145 same, but with argument */
- OP_SKIP, /* 146 */
- OP_SKIP_ARG, /* 147 same, but with argument */
- OP_THEN, /* 148 */
- OP_THEN_ARG, /* 149 same, but with argument */
- OP_COMMIT, /* 150 */
+ OP_MARK, /* 145 always has an argument */
+ OP_PRUNE, /* 146 */
+ OP_PRUNE_ARG, /* 147 same, but with argument */
+ OP_SKIP, /* 148 */
+ OP_SKIP_ARG, /* 149 same, but with argument */
+ OP_THEN, /* 150 */
+ OP_THEN_ARG, /* 151 same, but with argument */
+ OP_COMMIT, /* 152 */
/* These are forced failure and success verbs */
- OP_FAIL, /* 151 */
- OP_ACCEPT, /* 152 */
- OP_ASSERT_ACCEPT, /* 153 Used inside assertions */
- OP_CLOSE, /* 154 Used before OP_ACCEPT to close open captures */
+ OP_FAIL, /* 153 */
+ OP_ACCEPT, /* 154 */
+ OP_ASSERT_ACCEPT, /* 155 Used inside assertions */
+ OP_CLOSE, /* 156 Used before OP_ACCEPT to close open captures */
/* This is used to skip a subpattern with a {0} quantifier */
- OP_SKIPZERO, /* 155 */
+ OP_SKIPZERO, /* 157 */
/* This is not an opcode, but is used to check that tables indexed by opcode
are the correct length, in order to catch updating errors - there have been
@@ -2167,7 +2169,7 @@
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
"*+","++", "?+", "{", \
"*", "*?", "+", "+?", "?", "??", "{", "{", \
- "class", "nclass", "xclass", "Ref", "Refi", \
+ "class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi", \
"Recurse", "Callout", \
"Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \
"Reverse", "Assert", "Assert not", "AssertB", "AssertB not", \
@@ -2237,6 +2239,8 @@
0, /* XCLASS - variable length */ \
1+IMM2_SIZE, /* REF */ \
1+IMM2_SIZE, /* REFI */ \
+ 1+2*IMM2_SIZE, /* DNREF */ \
+ 1+2*IMM2_SIZE, /* DNREFI */ \
1+LINK_SIZE, /* RECURSE */ \
2+2*LINK_SIZE, /* CALLOUT */ \
1+LINK_SIZE, /* Alt */ \
@@ -2441,6 +2445,7 @@
int max_lookbehind; /* Maximum lookbehind (characters) */
int top_backref; /* Maximum back reference */
unsigned int backref_map; /* Bitmap of low back refs */
+ unsigned int namedrefcount; /* Number of backreferences by name */
int assert_depth; /* Depth of nested assertions */
pcre_uint32 external_options; /* External (initial) options */
pcre_uint32 external_flags; /* External flag bits to be set */
@@ -2448,6 +2453,7 @@
BOOL had_accept; /* (*ACCEPT) encountered */
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
BOOL check_lookbehind; /* Lookbehinds need later checking */
+ BOOL dupnames; /* Duplicate names exist */
int nltype; /* Newline type */
int nllen; /* Newline string length */
pcre_uchar nl[4]; /* Newline string when fixed length */
Modified: code/trunk/pcre_printint.c
===================================================================
--- code/trunk/pcre_printint.c 2013-09-03 10:25:39 UTC (rev 1360)
+++ code/trunk/pcre_printint.c 2013-09-06 17:47:32 UTC (rev 1361)
@@ -598,6 +598,20 @@
ccode = code + priv_OP_lengths[*code];
goto CLASS_REF_REPEAT;
+ case OP_DNREFI:
+ flag = "/i";
+ /* Fall through */
+ case OP_DNREF:
+ {
+ pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
+ IMM2_SIZE;
+ fprintf(f, " %s \\k<", flag);
+ print_puchar(f, entry);
+ fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
+ }
+ ccode = code + priv_OP_lengths[*code];
+ goto CLASS_REF_REPEAT;
+
case OP_CALLOUT:
fprintf(f, " %s %d %d %d", priv_OP_names[*code], code[1], GET(code,2),
GET(code, 2 + LINK_SIZE));
Modified: code/trunk/pcre_study.c
===================================================================
--- code/trunk/pcre_study.c 2013-09-03 10:25:39 UTC (rev 1360)
+++ code/trunk/pcre_study.c 2013-09-06 17:47:32 UTC (rev 1361)
@@ -66,8 +66,9 @@
rather than bytes.
Arguments:
+ re compiled pattern block
code pointer to start of group (the bracket)
- startcode pointer to start of the whole pattern
+ startcode pointer to start of the whole pattern's code
options the compiling options
int RECURSE depth
@@ -78,8 +79,8 @@
*/
static int
-find_minlength(const pcre_uchar *code, const pcre_uchar *startcode, int options,
- int recurse_depth)
+find_minlength(const REAL_PCRE *re, const pcre_uchar *code,
+ const pcre_uchar *startcode, int options, int recurse_depth)
{
int length = -1;
/* PCRE_UTF16 has the same value as PCRE_UTF8. */
@@ -129,7 +130,7 @@
case OP_SBRAPOS:
case OP_ONCE:
case OP_ONCE_NC:
- d = find_minlength(cc, startcode, options, recurse_depth);
+ d = find_minlength(re, cc, startcode, options, recurse_depth);
if (d < 0) return d;
branchlength += d;
do cc += GET(cc, 1); while (*cc == OP_ALT);
@@ -374,8 +375,39 @@
If PCRE_JAVASCRIPT_COMPAT is set, a backreference to an unset bracket
matches an empty string (by default it causes a matching failure), so in
that case we must set the minimum length to zero. */
+
+ case OP_DNREF: /* Duplicate named pattern back reference */
+ case OP_DNREFI:
+ if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
+ {
+ int count = GET2(cc, 1+IMM2_SIZE);
+ pcre_uchar *slot = (pcre_uchar *)re +
+ re->name_table_offset + GET2(cc, 1) * re->name_entry_size;
+ d = INT_MAX;
+ while (count-- > 0)
+ {
+ ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0));
+ if (cs == NULL) return -2;
+ do ce += GET(ce, 1); while (*ce == OP_ALT);
+ if (cc > cs && cc < ce)
+ {
+ d = 0;
+ had_recurse = TRUE;
+ break;
+ }
+ else
+ {
+ int dd = find_minlength(re, cs, startcode, options, recurse_depth);
+ if (dd < d) d = dd;
+ }
+ slot += re->name_entry_size;
+ }
+ }
+ else d = 0;
+ cc += 1 + 2*IMM2_SIZE;
+ goto REPEAT_BACK_REFERENCE;
- case OP_REF:
+ case OP_REF: /* Single back reference */
case OP_REFI:
if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
{
@@ -389,7 +421,7 @@
}
else
{
- d = find_minlength(cs, startcode, options, recurse_depth);
+ d = find_minlength(re, cs, startcode, options, recurse_depth);
}
}
else d = 0;
@@ -397,6 +429,7 @@
/* Handle repeated back references */
+ REPEAT_BACK_REFERENCE:
switch (*cc)
{
case OP_CRSTAR:
@@ -437,7 +470,8 @@
had_recurse = TRUE;
else
{
- branchlength += find_minlength(cs, startcode, options, recurse_depth + 1);
+ branchlength += find_minlength(re, cs, startcode, options,
+ recurse_depth + 1);
}
cc += 1 + LINK_SIZE;
break;
@@ -825,6 +859,8 @@
case OP_RECURSE:
case OP_REF:
case OP_REFI:
+ case OP_DNREF:
+ case OP_DNREFI:
case OP_REVERSE:
case OP_RREF:
case OP_SCOND:
@@ -1346,6 +1382,7 @@
compile_data compile_block;
const REAL_PCRE *re = (const REAL_PCRE *)external_re;
+
*errorptr = NULL;
if (re == NULL || re->magic_number != MAGIC_NUMBER)
@@ -1422,7 +1459,7 @@
/* Find the minimum length of subject string. */
-switch(min = find_minlength(code, code, re->options, 0))
+switch(min = find_minlength(re, code, code, re->options, 0))
{
case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
Modified: code/trunk/testdata/saved16
===================================================================
(Binary files differ)
Modified: code/trunk/testdata/saved16BE-1
===================================================================
(Binary files differ)
Modified: code/trunk/testdata/saved16BE-2
===================================================================
(Binary files differ)
Modified: code/trunk/testdata/saved16LE-1
===================================================================
(Binary files differ)
Modified: code/trunk/testdata/saved16LE-2
===================================================================
(Binary files differ)
Modified: code/trunk/testdata/saved32
===================================================================
--- code/trunk/testdata/saved32 2013-09-03 10:25:39 UTC (rev 1360)
+++ code/trunk/testdata/saved32 2013-09-06 17:47:32 UTC (rev 1361)
@@ -1 +1 @@
-???d????ERCPd???????T???????????a???c???????????????????????????}???????????a???????b???????c???r???????????
\ No newline at end of file
+???d????ERCPd???????T???\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFFa???c??????????????????????????????????????a???????b???????c???t???????????
\ No newline at end of file
Modified: code/trunk/testdata/saved32BE-1
===================================================================
--- code/trunk/testdata/saved32BE-1 2013-09-03 10:25:39 UTC (rev 1360)
+++ code/trunk/testdata/saved32BE-1 2013-09-06 17:47:32 UTC (rev 1361)
@@ -1 +1 @@
-???\xF4???,PCRE???\xF4???????????????????????????????????????????????????????n???a???m???e???????????????o???t???h???e???r???????}???^???????j??????????????????????????????????????????????}???????j???????????????????????????????????d???r???????k\xFF\xFF\xFF\xFF\xFF\xDF\xFF\xFF\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF???c???r???????\x80???????????l??? ???????????P???P???????????????q???????l??????????????????8???8???????????????????????????????\xD8???\xDF\xFF???????u???????l???????????????????????????????????????????????????????????????h???????????????r???^???????,????????????????????????????????????????
\ No newline at end of file
+???\xF4???,PCRE???\xF4????????\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF???????????????????????????????????????n???a???m???e???????????????o???t???h???e???r??????????^???????j???????????????????????????????????\x81??????????????????j???????????????????????????????????d???t???????k\xFF\xFF\xFF\xFF\xFF\xDF\xFF\xFF\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF???c???t???????\x82???????????l??? ???????????P???P???????????????s???????l??????????????????8???8???????????????????????????????\xD8???\xDF\xFF???????w???????l???????????????????????????????????????????????????????????????h???????????????t???^???????,????????????????????????????????????????
\ No newline at end of file
Modified: code/trunk/testdata/saved32BE-2
===================================================================
--- code/trunk/testdata/saved32BE-2 2013-09-03 10:25:39 UTC (rev 1360)
+++ code/trunk/testdata/saved32BE-2 2013-09-06 17:47:32 UTC (rev 1361)
@@ -1 +1 @@
-???\x94???,PCRE???\x94???????????????????????????????????????????????????????c???b???2???????1???????????c???b???r???a???1???????}???F??????!???????l??????????????????????????????????????????????????\xFF\xFF???????h???????????l??????????????\xF1#???????9???????????4???????f???q???????l???????????????????????????????????????????????????\xA7???????4???????r???4??????????????k\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD\xFF\xFF\xFB\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF???r???????r???F???????,????????????????????????????????????????
\ No newline at end of file
+???\x94???,PCRE???\x94????????\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF???????????????????????????????????????c???b???2???????1???????????c???b???r???a???1??????????F???\x81???!???????l??????????????????????????????????????????????????\xFF\xFF???????h???????????l??????????????\xF1#???????9???????????4???????f???s???????l???????????????????????????????????????????????????\xA7???????4???????t???4???\x81???????????k\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD\xFF\xFF\xFB\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF???t???????t???F???????,????????????????????????????????????????
\ No newline at end of file
Modified: code/trunk/testdata/saved32LE-1
===================================================================
--- code/trunk/testdata/saved32LE-1 2013-09-03 10:25:39 UTC (rev 1360)
+++ code/trunk/testdata/saved32LE-1 2013-09-06 17:47:32 UTC (rev 1361)
@@ -1 +1 @@
-???\xF4???,ERCP\xF4???????????????????????????????????????????????????????n???a???m???e???????????????o???t???h???e???r???????}???^???????j??????????????????????????????????????????????}???????j???????????????????????????????????d???r???????k???\xFF\xFF\xFF\xFF\xFF\xDF\xFF\xFF\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFFc???r???????\x80???????????l??? ???????????P???P???????????????q???????l?????????????????????8???8??????????????????????????????\xD8??\xFF\xDF??????u???????l???????????????????????????????????????????????????????????????h???????????????r???^???????,???????????????????????????????????????????
\ No newline at end of file
+???\xF4???,ERCP\xF4???????????\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF????????????????????????????????????n???a???m???e???????????????o???t???h???e???r??????????^???????j???????????????????????????????????\x81??????????????????j???????????????????????????????????d???t???????k???\xFF\xFF\xFF\xFF\xFF\xDF\xFF\xFF\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFFc???t???????\x82???????????l??? ???????????P???P???????????????s???????l?????????????????????8???8??????????????????????????????\xD8??\xFF\xDF??????w???????l???????????????????????????????????????????????????????????????h???????????????t???^???????,???????????????????????????????????????????
\ No newline at end of file
Modified: code/trunk/testdata/saved32LE-2
===================================================================
--- code/trunk/testdata/saved32LE-2 2013-09-03 10:25:39 UTC (rev 1360)
+++ code/trunk/testdata/saved32LE-2 2013-09-06 17:47:32 UTC (rev 1361)
@@ -1 +1 @@
-???\x94???,ERCP\x94???????????????????????????????????????????????????????c???b???2???????1???????????c???b???r???a???1???????}???F??????!???????l???????????????????????????????????????????????????\xFF\xFF??????h???????????l???????????????#\xF1??????9???????????4???????f???q???????l???????????????????????????????????????????????????\xA7???????4???????r???4??????????????k???\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD\xFF\xFF\xFB\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFFr???????r???F???????,???????????????????????????????????????????
\ No newline at end of file
+???\x94???,ERCP\x94???????????\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF????????????????????????????????????c???b???2???????1???????????c???b???r???a???1??????????F???\x81???!???????l???????????????????????????????????????????????????\xFF\xFF??????h???????????l???????????????#\xF1??????9???????????4???????f???s???????l???????????????????????????????????????????????????\xA7???????4???????t???4???\x81???????????k???\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD\xFF\xFF\xFB\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFFt???????t???F???????,???????????????????????????????????????????
\ No newline at end of file
Modified: code/trunk/testdata/saved8
===================================================================
(Binary files differ)
Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1 2013-09-03 10:25:39 UTC (rev 1360)
+++ code/trunk/testdata/testinput1 2013-09-06 17:47:32 UTC (rev 1361)
@@ -5609,4 +5609,15 @@
ca
cd
+/(?:(?<n>foo)|(?<n>bar))\k<n>/J
+ foofoo
+ barbar
+
+/(?<n>A)(?:(?<n>foo)|(?<n>bar))\k<n>/J
+ AfooA
+ AbarA
+ ** Failers
+ Afoofoo
+ Abarbar
+
/-- End of testinput1 --/
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2013-09-03 10:25:39 UTC (rev 1360)
+++ code/trunk/testdata/testinput2 2013-09-06 17:47:32 UTC (rev 1361)
@@ -3844,4 +3844,8 @@
/^(?=(a)){0}b(?1)/
backgammon
+/(?|(?<n>f)|(?<n>b))/JI
+
+/(?<a>abc)(?<a>z)\k<a>()/JDZS
+
/-- End of testinput2 --/
Modified: code/trunk/testdata/testinput21
===================================================================
--- code/trunk/testdata/testinput21 2013-09-03 10:25:39 UTC (rev 1360)
+++ code/trunk/testdata/testinput21 2013-09-06 17:47:32 UTC (rev 1361)
@@ -4,7 +4,11 @@
<!testsaved8
-%-- Generated from: ^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|[^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$
+%-- Generated from:
+ /^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|
+ [^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$
+ /x
+
In 16-bit mode with options: S>testdata/saved16LE-1
FS>testdata/saved16BE-1
In 32-bit mode with options: S>testdata/saved32LE-1
Modified: code/trunk/testdata/testinput22
===================================================================
--- code/trunk/testdata/testinput22 2013-09-03 10:25:39 UTC (rev 1360)
+++ code/trunk/testdata/testinput22 2013-09-06 17:47:32 UTC (rev 1361)
@@ -1,10 +1,15 @@
/-- Tests for reloading pre-compile patterns with UTF-16 or UTF-32 support. */
-%-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az])
- In 16-bit mode with options: S8>testdata/saved16LE-1
- FS8>testdata/saved16BE-1
- In 32-bit mode with options: S8>testdata/saved32LE-1
- FS8testdata/saved32BE-1
+%-- Generated from:
+ /(?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}
+ [\x{f123}\x{10039}\x{20000}-\x{21234}]?|
+ [A-Cx-z\x{100000}-\x{1000a7}\x{101234}])
+ (?<cb2>[^az])/x
+
+ In 16-bit mode with options: S8>testdata/saved16LE-2
+ FS8>testdata/saved16BE-2
+ In 32-bit mode with options: S8>testdata/saved32LE-2
+ FS8>testdata/saved32BE-2
--%8x
<!testsaved16LE-2
Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1 2013-09-03 10:25:39 UTC (rev 1360)
+++ code/trunk/testdata/testoutput1 2013-09-06 17:47:32 UTC (rev 1361)
@@ -9204,4 +9204,30 @@
0:
0+ cd
+/(?:(?<n>foo)|(?<n>bar))\k<n>/J
+ foofoo
+ 0: foofoo
+ 1: foo
+ barbar
+ 0: barbar
+ 1: <unset>
+ 2: bar
+
+/(?<n>A)(?:(?<n>foo)|(?<n>bar))\k<n>/J
+ AfooA
+ 0: AfooA
+ 1: A
+ 2: foo
+ AbarA
+ 0: AbarA
+ 1: A
+ 2: <unset>
+ 3: bar
+ ** Failers
+No match
+ Afoofoo
+No match
+ Abarbar
+No match
+
/-- End of testinput1 --/
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2013-09-03 10:25:39 UTC (rev 1360)
+++ code/trunk/testdata/testoutput2 2013-09-06 17:47:32 UTC (rev 1361)
@@ -12734,4 +12734,38 @@
backgammon
0: ba
+/(?|(?<n>f)|(?<n>b))/JI
+Capturing subpattern count = 1
+Named capturing subpatterns:
+ n 1
+Options: dupnames
+No first char
+No need char
+
+/(?<a>abc)(?<a>z)\k<a>()/JDZS
+------------------------------------------------------------------
+ Bra
+ CBra 1
+ abc
+ Ket
+ CBra 2
+ z
+ Ket
+ \k<a>2
+ CBra 3
+ Ket
+ Ket
+ End
+------------------------------------------------------------------
+Capturing subpattern count = 3
+Max back reference = 2
+Named capturing subpatterns:
+ a 1
+ a 2
+Options: dupnames
+First char = 'a'
+Need char = 'z'
+Subject length lower bound = 5
+No set of starting bytes
+
/-- End of testinput2 --/
Modified: code/trunk/testdata/testoutput21-16
===================================================================
--- code/trunk/testdata/testoutput21-16 2013-09-03 10:25:39 UTC (rev 1360)
+++ code/trunk/testdata/testoutput21-16 2013-09-06 17:47:32 UTC (rev 1361)
@@ -8,7 +8,11 @@
Error -28 from pcre16_fullinfo(0)
Running in 16-bit mode but pattern was compiled in 8-bit mode
-%-- Generated from: ^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|[^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$
+%-- Generated from:
+ /^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|
+ [^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$
+ /x
+
In 16-bit mode with options: S>testdata/saved16LE-1
FS>testdata/saved16BE-1
In 32-bit mode with options: S>testdata/saved32LE-1
@@ -42,7 +46,7 @@
Named capturing subpatterns:
name 1
other 2
-Options: anchored
+Options: anchored extended
No first char
No need char
Subject length lower bound = 6
@@ -75,7 +79,7 @@
Named capturing subpatterns:
name 1
other 2
-Options: anchored
+Options: anchored extended
No first char
No need char
Subject length lower bound = 6
Modified: code/trunk/testdata/testoutput21-32
===================================================================
--- code/trunk/testdata/testoutput21-32 2013-09-03 10:25:39 UTC (rev 1360)
+++ code/trunk/testdata/testoutput21-32 2013-09-06 17:47:32 UTC (rev 1361)
@@ -8,7 +8,11 @@
Error -28 from pcre32_fullinfo(0)
Running in 32-bit mode but pattern was compiled in 8-bit mode
-%-- Generated from: ^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|[^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$
+%-- Generated from:
+ /^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|
+ [^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$
+ /x
+
In 16-bit mode with options: S>testdata/saved16LE-1
FS>testdata/saved16BE-1
In 32-bit mode with options: S>testdata/saved32LE-1
@@ -54,7 +58,7 @@
Named capturing subpatterns:
name 1
other 2
-Options: anchored
+Options: anchored extended
No first char
No need char
Subject length lower bound = 6
@@ -87,7 +91,7 @@
Named capturing subpatterns:
name 1
other 2
-Options: anchored
+Options: anchored extended
No first char
No need char
Subject length lower bound = 6
Modified: code/trunk/testdata/testoutput22-16
===================================================================
--- code/trunk/testdata/testoutput22-16 2013-09-03 10:25:39 UTC (rev 1360)
+++ code/trunk/testdata/testoutput22-16 2013-09-06 17:47:32 UTC (rev 1361)
@@ -1,10 +1,15 @@
/-- Tests for reloading pre-compile patterns with UTF-16 or UTF-32 support. */
-%-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az])
- In 16-bit mode with options: S8>testdata/saved16LE-1
- FS8>testdata/saved16BE-1
- In 32-bit mode with options: S8>testdata/saved32LE-1
- FS8testdata/saved32BE-1
+%-- Generated from:
+ /(?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}
+ [\x{f123}\x{10039}\x{20000}-\x{21234}]?|
+ [A-Cx-z\x{100000}-\x{1000a7}\x{101234}])
+ (?<cb2>[^az])/x
+
+ In 16-bit mode with options: S8>testdata/saved16LE-2
+ FS8>testdata/saved16BE-2
+ In 32-bit mode with options: S8>testdata/saved32LE-2
+ FS8>testdata/saved32BE-2
--%8x
<!testsaved16LE-2
@@ -28,7 +33,7 @@
Named capturing subpatterns:
cb2 2
cbra1 1
-Options: utf
+Options: extended utf
No first char
No need char
Subject length lower bound = 2
@@ -55,7 +60,7 @@
Named capturing subpatterns:
cb2 2
cbra1 1
-Options: utf
+Options: extended utf
No first char
No need char
Subject length lower bound = 2
Modified: code/trunk/testdata/testoutput22-32
===================================================================
--- code/trunk/testdata/testoutput22-32 2013-09-03 10:25:39 UTC (rev 1360)
+++ code/trunk/testdata/testoutput22-32 2013-09-06 17:47:32 UTC (rev 1361)
@@ -1,10 +1,15 @@
/-- Tests for reloading pre-compile patterns with UTF-16 or UTF-32 support. */
-%-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az])
- In 16-bit mode with options: S8>testdata/saved16LE-1
- FS8>testdata/saved16BE-1
- In 32-bit mode with options: S8>testdata/saved32LE-1
- FS8testdata/saved32BE-1
+%-- Generated from:
+ /(?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}
+ [\x{f123}\x{10039}\x{20000}-\x{21234}]?|
+ [A-Cx-z\x{100000}-\x{1000a7}\x{101234}])
+ (?<cb2>[^az])/x
+
+ In 16-bit mode with options: S8>testdata/saved16LE-2
+ FS8>testdata/saved16BE-2
+ In 32-bit mode with options: S8>testdata/saved32LE-2
+ FS8>testdata/saved32BE-2
--%8x
<!testsaved16LE-2
@@ -40,7 +45,7 @@
Named capturing subpatterns:
cb2 2
cbra1 1
-Options: utf
+Options: extended utf
No first char
No need char
Subject length lower bound = 2
@@ -67,7 +72,7 @@
Named capturing subpatterns:
cb2 2
cbra1 1
-Options: utf
+Options: extended utf
No first char
No need char
Subject length lower bound = 2