Revision: 1365
http://vcs.pcre.org/viewvc?view=rev&revision=1365
Author: ph10
Date: 2013-10-06 19:33:56 +0100 (Sun, 06 Oct 2013)
Log Message:
-----------
Refactor named group handling for conditional tests.
Modified Paths:
--------------
code/trunk/pcre_compile.c
code/trunk/pcre_dfa_exec.c
code/trunk/pcre_exec.c
code/trunk/pcre_internal.h
code/trunk/pcre_printint.c
code/trunk/pcre_study.c
code/trunk/testdata/testinput1
code/trunk/testdata/testoutput1
code/trunk/testdata/testoutput11-16
code/trunk/testdata/testoutput11-32
code/trunk/testdata/testoutput11-8
code/trunk/testdata/testoutput2
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2013-10-05 15:45:11 UTC (rev 1364)
+++ code/trunk/pcre_compile.c 2013-10-06 18:33:56 UTC (rev 1365)
@@ -1524,9 +1524,9 @@
case OP_CALLOUT:
case OP_CREF:
- case OP_NCREF:
+ case OP_DNCREF:
case OP_RREF:
- case OP_NRREF:
+ case OP_DNRREF:
case OP_DEF:
code += PRIV(OP_lengths)[*code];
break;
@@ -1663,13 +1663,13 @@
case OP_COMMIT:
case OP_CREF:
case OP_DEF:
+ case OP_DNCREF:
+ case OP_DNRREF:
case OP_DOLL:
case OP_DOLLM:
case OP_EOD:
case OP_EODN:
case OP_FAIL:
- case OP_NCREF:
- case OP_NRREF:
case OP_NOT_WORD_BOUNDARY:
case OP_PRUNE:
case OP_REVERSE:
@@ -6030,8 +6030,8 @@
tempptr[2] == CHAR_LESS_THAN_SIGN))
break;
- /* Most other conditions use OP_CREF (a couple change to OP_RREF
- below), and all need to skip 1+IMM2_SIZE bytes at the start of the group. */
+ /* Other conditions use OP_CREF/OP_DNCREF/OP_RREF/OP_DNRREF, and all
+ need to skip at least 1+IMM2_SIZE bytes at the start of the group. */
code[1+LINK_SIZE] = OP_CREF;
skipbytes = 1+IMM2_SIZE;
@@ -6047,7 +6047,9 @@
}
/* Check for a test for a named group's having been set, using the Perl
- syntax (?(<name>) or (?('name') */
+ syntax (?(<name>) or (?('name'), and also allow for the original PCRE
+ syntax of (?(name) or for (?(+n), (?(-n), and just (?(n). As names may
+ consist entirely of digits, there is scope for ambiguity. */
else if (ptr[1] == CHAR_LESS_THAN_SIGN)
{
@@ -6064,8 +6066,16 @@
terminator = CHAR_NULL;
if (ptr[1] == CHAR_MINUS || ptr[1] == CHAR_PLUS) refsign = *(++ptr);
}
+
+ /* When a name is one of a number of duplicates, a different opcode is
+ used and it needs more memory. Unfortunately we cannot tell whether a
+ name is a duplicate in the first pass, so we have to allow for more
+ memory except when we know it is a relative numerical reference. */
+
+ if (refsign < 0 && lengthptr != NULL) *lengthptr += IMM2_SIZE;
- /* We now expect to read a name; any thing else is an error */
+ /* We now expect to read a name (possibly all digits); any thing else
+ is an error. In the case of all digits, also get it as a number. */
if (!MAX_255(ptr[1]) || (cd->ctypes[ptr[1]] & ctype_word) == 0)
{
@@ -6074,8 +6084,6 @@
goto FAILED;
}
- /* Read the name, but also get it as a number if it's all digits */
-
recno = 0;
name = ++ptr;
while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0)
@@ -6086,6 +6094,8 @@
}
namelen = (int)(ptr - name);
+ /* Check the terminator */
+
if ((terminator > 0 && *ptr++ != (pcre_uchar)terminator) ||
*ptr++ != CHAR_RIGHT_PARENTHESIS)
{
@@ -6121,11 +6131,8 @@
}
/* Otherwise (did not start with "+" or "-"), start by looking for the
- name. If we find a name, add one to the opcode to change OP_CREF or
- OP_RREF into OP_NCREF or OP_NRREF. These behave exactly the same,
- except they record that the reference was originally to a name. The
- information is used to check duplicate names. */
-
+ name. */
+
slot = cd->name_table;
for (i = 0; i < cd->names_found; i++)
{
@@ -6133,13 +6140,33 @@
slot += cd->name_entry_size;
}
- /* Found the named subpattern */
+ /* Found the named subpattern. If the name is duplicated, add one to
+ the opcode to change CREF/RREF into DNCREF/DNRREF and insert
+ appropriate data values. Otherwise, just insert the unique subpattern
+ number. */
if (i < cd->names_found)
{
- recno = GET2(slot, 0);
- PUT2(code, 2+LINK_SIZE, recno);
- code[1+LINK_SIZE]++;
+ int offset = i++;
+ int count = 1;
+ recno = GET2(slot, 0); /* Number from first found */
+ for (; i < cd->names_found; i++)
+ {
+ slot += cd->name_entry_size;
+ if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) != 0) break;
+ count++;
+ }
+ if (count > 1)
+ {
+ PUT2(code, 2+LINK_SIZE, offset);
+ PUT2(code, 2+LINK_SIZE+IMM2_SIZE, count);
+ skipbytes += IMM2_SIZE;
+ code[1+LINK_SIZE]++;
+ }
+ else /* Not a duplicated name */
+ {
+ PUT2(code, 2+LINK_SIZE, recno);
+ }
}
/* If terminator == CHAR_NULL it means that the name followed directly
@@ -7829,9 +7856,9 @@
switch (*scode)
{
case OP_CREF:
- case OP_NCREF:
+ case OP_DNCREF:
case OP_RREF:
- case OP_NRREF:
+ case OP_DNRREF:
case OP_DEF:
return FALSE;
Modified: code/trunk/pcre_dfa_exec.c
===================================================================
--- code/trunk/pcre_dfa_exec.c 2013-10-05 15:45:11 UTC (rev 1364)
+++ code/trunk/pcre_dfa_exec.c 2013-10-06 18:33:56 UTC (rev 1365)
@@ -173,8 +173,8 @@
0, 0, /* ONCE, ONCE_NC */
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
- 0, 0, /* CREF, NCREF */
- 0, 0, /* RREF, NRREF */
+ 0, 0, /* CREF, DNCREF */
+ 0, 0, /* RREF, DNRREF */
0, /* DEF */
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
@@ -244,8 +244,8 @@
0, 0, /* ONCE, ONCE_NC */
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
- 0, 0, /* CREF, NCREF */
- 0, 0, /* RREF, NRREF */
+ 0, 0, /* CREF, DNCREF */
+ 0, 0, /* RREF, DNRREF */
0, /* DEF */
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
@@ -2661,9 +2661,11 @@
condcode = code[LINK_SIZE+1];
- /* Back reference conditions are not supported */
+ /* Back reference conditions and duplicate named recursion conditions
+ are not supported */
- if (condcode == OP_CREF || condcode == OP_NCREF)
+ if (condcode == OP_CREF || condcode == OP_DNCREF ||
+ condcode == OP_DNRREF)
return PCRE_ERROR_DFA_UCOND;
/* The DEFINE condition is always false */
@@ -2675,7 +2677,7 @@
which means "test if in any recursion". We can't test for specifically
recursed groups. */
- else if (condcode == OP_RREF || condcode == OP_NRREF)
+ else if (condcode == OP_RREF)
{
int value = GET2(code, LINK_SIZE + 2);
if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c 2013-10-05 15:45:11 UTC (rev 1364)
+++ code/trunk/pcre_exec.c 2013-10-06 18:33:56 UTC (rev 1365)
@@ -1274,25 +1274,32 @@
/* Control never reaches here. */
- /* Conditional group: compilation checked that there are no more than
- two branches. If the condition is false, skipping the first branch takes us
- past the end if there is only one branch, but that's OK because that is
- exactly what going to the ket would do. */
+ /* Conditional group: compilation checked that there are no more than two
+ branches. If the condition is false, skipping the first branch takes us
+ past the end of the item if there is only one branch, but that's exactly
+ what we want. */
case OP_COND:
case OP_SCOND:
- codelink = GET(ecode, 1);
+
+ /* The variable codelink will be added to ecode when the condition is
+ false, to get to the second branch. Setting it to the offset to the ALT
+ or KET, then incrementing ecode achieves this effect. We now have ecode
+ pointing to the condition or callout. */
+
+ codelink = GET(ecode, 1); /* Offset to the second branch */
+ ecode += 1 + LINK_SIZE; /* From this opcode */
/* Because of the way auto-callout works during compile, a callout item is
inserted between OP_COND and an assertion condition. */
- if (ecode[LINK_SIZE+1] == OP_CALLOUT)
+ if (*ecode == OP_CALLOUT)
{
if (PUBL(callout) != NULL)
{
PUBL(callout_block) cb;
cb.version = 2; /* Version 1 of the callout block */
- cb.callout_number = ecode[LINK_SIZE+2];
+ cb.callout_number = ecode[1];
cb.offset_vector = md->offset_vector;
#if defined COMPILE_PCRE8
cb.subject = (PCRE_SPTR)md->start_subject;
@@ -1304,8 +1311,8 @@
cb.subject_length = (int)(md->end_subject - md->start_subject);
cb.start_match = (int)(mstart - md->start_subject);
cb.current_position = (int)(eptr - md->start_subject);
- cb.pattern_position = GET(ecode, LINK_SIZE + 3);
- cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
+ cb.pattern_position = GET(ecode, 2);
+ cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
cb.capture_top = offset_top/2;
cb.capture_last = md->capture_last & CAPLMASK;
/* Internal change requires this for API compatibility. */
@@ -1315,207 +1322,119 @@
if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
if (rrc < 0) RRETURN(rrc);
}
+
+ /* Advance ecode past the callout, so it now points to the condition. We
+ must adjust codelink so that the value of ecode+codelink is unchanged. */
+
ecode += PRIV(OP_lengths)[OP_CALLOUT];
codelink -= PRIV(OP_lengths)[OP_CALLOUT];
}
- condcode = ecode[LINK_SIZE+1];
+ /* Test the various possible conditions */
- /* Now see what the actual condition is */
-
- if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
- {
- if (md->recursive == NULL) /* Not recursing => FALSE */
+ condition = FALSE;
+ switch(condcode = *ecode)
+ {
+ case OP_RREF: /* Numbered group recursion test */
+ if (md->recursive != NULL) /* Not recursing => FALSE */
{
- condition = FALSE;
- ecode += GET(ecode, 1);
- }
- else
- {
- unsigned int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
+ unsigned int recno = GET2(ecode, 1); /* Recursion group number*/
condition = (recno == RREF_ANY || recno == md->recursive->group_num);
+ }
+ break;
- /* If the test is for recursion into a specific subpattern, and it is
- false, but the test was set up by name, scan the table to see if the
- name refers to any other numbers, and test them. The condition is true
- if any one is set. */
-
- if (!condition && condcode == OP_NRREF)
+ case OP_DNRREF: /* Duplicate named group recursion test */
+ if (md->recursive != NULL)
+ {
+ int count = GET2(ecode, 1 + IMM2_SIZE);
+ pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
+ while (count-- > 0)
{
- pcre_uchar *slotA = md->name_table;
- for (i = 0; i < md->name_count; i++)
- {
- if (GET2(slotA, 0) == recno) break;
- slotA += md->name_entry_size;
- }
-
- /* Found a name for the number - there can be only one; duplicate
- names for different numbers are allowed, but not vice versa. First
- scan down for duplicates. */
-
- if (i < md->name_count)
- {
- pcre_uchar *slotB = slotA;
- while (slotB > md->name_table)
- {
- slotB -= md->name_entry_size;
- if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
- {
- condition = GET2(slotB, 0) == md->recursive->group_num;
- if (condition) break;
- }
- else break;
- }
-
- /* Scan up for duplicates */
-
- if (!condition)
- {
- slotB = slotA;
- for (i++; i < md->name_count; i++)
- {
- slotB += md->name_entry_size;
- if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
- {
- condition = GET2(slotB, 0) == md->recursive->group_num;
- if (condition) break;
- }
- else break;
- }
- }
- }
+ unsigned int recno = GET2(slot, 0);
+ condition = recno == md->recursive->group_num;
+ if (condition) break;
+ slot += md->name_entry_size;
}
+ }
+ break;
- /* Chose branch according to the condition */
-
- ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
- }
- }
-
- else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
- {
- offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
+ case OP_CREF: /* Numbered group used test */
+ offset = GET2(ecode, 1) << 1; /* Doubled ref number */
condition = offset < offset_top && md->offset_vector[offset] >= 0;
+ break;
- /* If the numbered capture is unset, but the reference was by name,
- scan the table to see if the name refers to any other numbers, and test
- them. The condition is true if any one is set. This is tediously similar
- to the code above, but not close enough to try to amalgamate. */
-
- if (!condition && condcode == OP_NCREF)
+ case OP_DNCREF: /* Duplicate named group used test */
{
- unsigned int refno = offset >> 1;
- pcre_uchar *slotA = md->name_table;
-
- for (i = 0; i < md->name_count; i++)
+ int count = GET2(ecode, 1 + IMM2_SIZE);
+ pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
+ while (count-- > 0)
{
- if (GET2(slotA, 0) == refno) break;
- slotA += md->name_entry_size;
+ offset = GET2(slot, 0) << 1;
+ condition = offset < offset_top && md->offset_vector[offset] >= 0;
+ if (condition) break;
+ slot += md->name_entry_size;
}
-
- /* Found a name for the number - there can be only one; duplicate names
- for different numbers are allowed, but not vice versa. First scan down
- for duplicates. */
-
- if (i < md->name_count)
- {
- pcre_uchar *slotB = slotA;
- while (slotB > md->name_table)
- {
- slotB -= md->name_entry_size;
- if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
- {
- offset = GET2(slotB, 0) << 1;
- condition = offset < offset_top &&
- md->offset_vector[offset] >= 0;
- if (condition) break;
- }
- else break;
- }
-
- /* Scan up for duplicates */
-
- if (!condition)
- {
- slotB = slotA;
- for (i++; i < md->name_count; i++)
- {
- slotB += md->name_entry_size;
- if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
- {
- offset = GET2(slotB, 0) << 1;
- condition = offset < offset_top &&
- md->offset_vector[offset] >= 0;
- if (condition) break;
- }
- else break;
- }
- }
- }
}
+ break;
- /* Chose branch according to the condition */
+ case OP_DEF: /* DEFINE - always false */
+ break;
- ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
- }
-
- else if (condcode == OP_DEF) /* DEFINE - always false */
- {
- condition = FALSE;
- ecode += GET(ecode, 1);
- }
-
- /* The condition is an assertion. Call match() to evaluate it - setting
- md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
- an assertion. */
-
- else
- {
+ /* The condition is an assertion. Call match() to evaluate it - setting
+ md->match_function_type to MATCH_CONDASSERT causes it to stop at the end
+ of an assertion. */
+
+ default:
md->match_function_type = MATCH_CONDASSERT;
- RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
+ RMATCH(eptr, ecode, offset_top, md, NULL, RM3);
if (rrc == MATCH_MATCH)
{
if (md->end_offset_top > offset_top)
offset_top = md->end_offset_top; /* Captures may have happened */
condition = TRUE;
- ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
+
+ /* Advance ecode past the assertion to the start of the first branch,
+ but adjust it so that the general choosing code below works. */
+
+ ecode += GET(ecode, 1);
while (*ecode == OP_ALT) ecode += GET(ecode, 1);
+ ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
}
/* PCRE doesn't allow the effect of (*THEN) to escape beyond an
- assertion; it is therefore treated as NOMATCH. */
+ assertion; it is therefore treated as NOMATCH. Any other return is an
+ error. */
else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
{
RRETURN(rrc); /* Need braces because of following else */
}
- else
- {
- condition = FALSE;
- ecode += codelink;
- }
+ break;
}
-
- /* We are now at the branch that is to be obeyed. As there is only one, can
- use tail recursion to avoid using another stack frame, except when there is
- unlimited repeat of a possibly empty group. In the latter case, a recursive
- call to match() is always required, unless the second alternative doesn't
- exist, in which case we can just plough on. Note that, for compatibility
- with Perl, the | in a conditional group is NOT treated as creating two
- alternatives. If a THEN is encountered in the branch, it propagates out to
- the enclosing alternative (unless nested in a deeper set of alternatives,
- of course). */
-
- if (condition || *ecode == OP_ALT)
+
+ /* Choose branch according to the condition */
+
+ ecode += condition? PRIV(OP_lengths)[condcode] : codelink;
+
+ /* We are now at the branch that is to be obeyed. As there is only one, we
+ can use tail recursion to avoid using another stack frame, except when
+ there is unlimited repeat of a possibly empty group. In the latter case, a
+ recursive call to match() is always required, unless the second alternative
+ doesn't exist, in which case we can just plough on. Note that, for
+ compatibility with Perl, the | in a conditional group is NOT treated as
+ creating two alternatives. If a THEN is encountered in the branch, it
+ propagates out to the enclosing alternative (unless nested in a deeper set
+ of alternatives, of course). */
+
+ if (condition || ecode[-(1+LINK_SIZE)] == OP_ALT)
{
if (op != OP_SCOND)
{
- ecode += 1 + LINK_SIZE;
goto TAIL_RECURSE;
}
md->match_function_type = MATCH_CBEGROUP;
- RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
+ RMATCH(eptr, ecode, offset_top, md, eptrb, RM49);
RRETURN(rrc);
}
@@ -1523,7 +1442,6 @@
else
{
- ecode += 1 + LINK_SIZE;
}
break;
@@ -2783,8 +2701,6 @@
caseless = op == OP_REFI;
offset = GET2(ecode, 1) << 1; /* Doubled ref number */
ecode += 1 + IMM2_SIZE;
-
-
if (offset >= offset_top || md->offset_vector[offset] < 0)
length = (md->jscript_compat)? 0 : -1;
else
Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h 2013-10-05 15:45:11 UTC (rev 1364)
+++ code/trunk/pcre_internal.h 2013-10-06 18:33:56 UTC (rev 1365)
@@ -2125,9 +2125,9 @@
/* The next two pairs must (respectively) be kept together. */
OP_CREF, /* 137 Used to hold a capture number as condition */
- OP_NCREF, /* 138 Same, but generated by a name reference*/
+ OP_DNCREF, /* 138 Used to point to duplicate names as a condition */
OP_RREF, /* 139 Used to hold a recursion number as condition */
- OP_NRREF, /* 140 Same, but generated by a name reference*/
+ OP_DNRREF, /* 140 Used to point to duplicate names as a condition */
OP_DEF, /* 141 The DEFINE condition */
OP_BRAZERO, /* 142 These two must remain together and in this */
@@ -2203,7 +2203,7 @@
"Cond", \
"SBra", "SBraPos", "SCBra", "SCBraPos", \
"SCond", \
- "Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def", \
+ "Cond ref", "Cond dnref", "Cond rec", "Cond dnrec", "Cond def", \
"Brazero", "Braminzero", "Braposzero", \
"*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \
"*THEN", "*THEN", "*COMMIT", "*FAIL", \
@@ -2290,8 +2290,8 @@
1+LINK_SIZE+IMM2_SIZE, /* SCBRA */ \
1+LINK_SIZE+IMM2_SIZE, /* SCBRAPOS */ \
1+LINK_SIZE, /* SCOND */ \
- 1+IMM2_SIZE, 1+IMM2_SIZE, /* CREF, NCREF */ \
- 1+IMM2_SIZE, 1+IMM2_SIZE, /* RREF, NRREF */ \
+ 1+IMM2_SIZE, 1+2*IMM2_SIZE, /* CREF, DNCREF */ \
+ 1+IMM2_SIZE, 1+2*IMM2_SIZE, /* RREF, DNRREF */ \
1, /* DEF */ \
1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \
@@ -2300,8 +2300,7 @@
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \
1+IMM2_SIZE, 1 /* CLOSE, SKIPZERO */
-/* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion"
-condition. */
+/* A magic value for OP_RREF to indicate the "any recursion" condition. */
#define RREF_ANY 0xffff
Modified: code/trunk/pcre_printint.c
===================================================================
--- code/trunk/pcre_printint.c 2013-10-05 15:45:11 UTC (rev 1364)
+++ code/trunk/pcre_printint.c 2013-10-06 18:33:56 UTC (rev 1365)
@@ -425,10 +425,19 @@
break;
case OP_CREF:
- case OP_NCREF:
fprintf(f, "%3d %s", GET2(code,1), priv_OP_names[*code]);
break;
+ case OP_DNCREF:
+ {
+ pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
+ IMM2_SIZE;
+ fprintf(f, " %s Cond ref <", flag);
+ print_puchar(f, entry);
+ fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
+ }
+ break;
+
case OP_RREF:
c = GET2(code, 1);
if (c == RREF_ANY)
@@ -437,12 +446,14 @@
fprintf(f, " Cond recurse %d", c);
break;
- case OP_NRREF:
- c = GET2(code, 1);
- if (c == RREF_ANY)
- fprintf(f, " Cond nrecurse any");
- else
- fprintf(f, " Cond nrecurse %d", c);
+ case OP_DNRREF:
+ {
+ pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
+ IMM2_SIZE;
+ fprintf(f, " %s Cond recurse <", flag);
+ print_puchar(f, entry);
+ fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
+ }
break;
case OP_DEF:
Modified: code/trunk/pcre_study.c
===================================================================
--- code/trunk/pcre_study.c 2013-10-05 15:45:11 UTC (rev 1364)
+++ code/trunk/pcre_study.c 2013-10-06 18:33:56 UTC (rev 1365)
@@ -176,9 +176,9 @@
case OP_REVERSE:
case OP_CREF:
- case OP_NCREF:
+ case OP_DNCREF:
case OP_RREF:
- case OP_NRREF:
+ case OP_DNRREF:
case OP_DEF:
case OP_CALLOUT:
case OP_SOD:
@@ -812,6 +812,10 @@
case OP_COND:
case OP_CREF:
case OP_DEF:
+ case OP_DNCREF:
+ case OP_DNREF:
+ case OP_DNREFI:
+ case OP_DNRREF:
case OP_DOLL:
case OP_DOLLM:
case OP_END:
@@ -820,7 +824,6 @@
case OP_EXTUNI:
case OP_FAIL:
case OP_MARK:
- case OP_NCREF:
case OP_NOT:
case OP_NOTEXACT:
case OP_NOTEXACTI:
@@ -852,15 +855,12 @@
case OP_NOTUPTOI:
case OP_NOT_HSPACE:
case OP_NOT_VSPACE:
- case OP_NRREF:
case OP_PROP:
case OP_PRUNE:
case OP_PRUNE_ARG:
case OP_RECURSE:
case OP_REF:
case OP_REFI:
- case OP_DNREF:
- case OP_DNREFI:
case OP_REVERSE:
case OP_RREF:
case OP_SCOND:
Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1 2013-10-05 15:45:11 UTC (rev 1364)
+++ code/trunk/testdata/testinput1 2013-10-06 18:33:56 UTC (rev 1365)
@@ -5623,4 +5623,8 @@
/^(\d+)\s+IN\s+SOA\s+(\S+)\s+(\S+)\s*\(\s*$/
1 IN SOA non-sp1 non-sp2(
+/^ (?:(?<A>A)|(?'B'B)(?<A>A)) (?('A')x) (?(<B>)y)$/xJ
+ Ax
+ BAxy
+
/-- End of testinput1 --/
Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1 2013-10-05 15:45:11 UTC (rev 1364)
+++ code/trunk/testdata/testoutput1 2013-10-06 18:33:56 UTC (rev 1365)
@@ -9237,4 +9237,14 @@
2: non-sp1
3: non-sp2
+/^ (?:(?<A>A)|(?'B'B)(?<A>A)) (?('A')x) (?(<B>)y)$/xJ
+ Ax
+ 0: Ax
+ 1: A
+ BAxy
+ 0: BAxy
+ 1: <unset>
+ 2: B
+ 3: A
+
/-- End of testinput1 --/
Modified: code/trunk/testdata/testoutput11-16
===================================================================
--- code/trunk/testdata/testoutput11-16 2013-10-05 15:45:11 UTC (rev 1364)
+++ code/trunk/testdata/testoutput11-16 2013-10-06 18:33:56 UTC (rev 1365)
@@ -537,7 +537,7 @@
------------------------------------------------------------------
/( ( (?(1)0|) )* )/xBM
-Memory allocation (code space): 52
+Memory allocation (code space): 54
------------------------------------------------------------------
0 23 Bra
2 19 CBra 1
@@ -555,7 +555,7 @@
------------------------------------------------------------------
/( (?(1)0|)* )/xBM
-Memory allocation (code space): 42
+Memory allocation (code space): 44
------------------------------------------------------------------
0 18 Bra
2 14 CBra 1
Modified: code/trunk/testdata/testoutput11-32
===================================================================
--- code/trunk/testdata/testoutput11-32 2013-10-05 15:45:11 UTC (rev 1364)
+++ code/trunk/testdata/testoutput11-32 2013-10-06 18:33:56 UTC (rev 1365)
@@ -537,7 +537,7 @@
------------------------------------------------------------------
/( ( (?(1)0|) )* )/xBM
-Memory allocation (code space): 104
+Memory allocation (code space): 108
------------------------------------------------------------------
0 23 Bra
2 19 CBra 1
@@ -555,7 +555,7 @@
------------------------------------------------------------------
/( (?(1)0|)* )/xBM
-Memory allocation (code space): 84
+Memory allocation (code space): 88
------------------------------------------------------------------
0 18 Bra
2 14 CBra 1
Modified: code/trunk/testdata/testoutput11-8
===================================================================
--- code/trunk/testdata/testoutput11-8 2013-10-05 15:45:11 UTC (rev 1364)
+++ code/trunk/testdata/testoutput11-8 2013-10-06 18:33:56 UTC (rev 1365)
@@ -537,7 +537,7 @@
------------------------------------------------------------------
/( ( (?(1)0|) )* )/xBM
-Memory allocation (code space): 38
+Memory allocation (code space): 40
------------------------------------------------------------------
0 34 Bra
3 28 CBra 1
@@ -555,7 +555,7 @@
------------------------------------------------------------------
/( (?(1)0|)* )/xBM
-Memory allocation (code space): 30
+Memory allocation (code space): 32
------------------------------------------------------------------
0 26 Bra
3 20 CBra 1
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2013-10-05 15:45:11 UTC (rev 1364)
+++ code/trunk/testdata/testoutput2 2013-10-06 18:33:56 UTC (rev 1365)
@@ -7693,7 +7693,7 @@
^
CBra 1
Cond
- 2 Cond nref
+ 2 Cond ref
y
Ket
[()]
@@ -10268,7 +10268,7 @@
Ket
Ket
Cond
- 4 Cond nref
+ Cond ref <D>2
X
Alt
Y
@@ -10314,7 +10314,7 @@
CBra 4
d
Cond
- Cond nrecurse 1
+ Cond recurse <A>2
$
Alt
Recurse