Revision: 677
http://www.exim.org/viewvc/pcre2?view=rev&revision=677
Author: ph10
Date: 2017-03-11 17:59:23 +0000 (Sat, 11 Mar 2017)
Log Message:
-----------
Remove obsolete OP_ONCE_NC internal opcode.
Modified Paths:
--------------
code/trunk/HACKING
code/trunk/src/pcre2_auto_possess.c
code/trunk/src/pcre2_compile.c
code/trunk/src/pcre2_dfa_match.c
code/trunk/src/pcre2_internal.h
code/trunk/src/pcre2_jit_compile.c
code/trunk/src/pcre2_match.c
code/trunk/src/pcre2_printint.c
code/trunk/src/pcre2_study.c
code/trunk/testdata/testoutput2
Modified: code/trunk/HACKING
===================================================================
--- code/trunk/HACKING 2017-03-11 17:39:02 UTC (rev 676)
+++ code/trunk/HACKING 2017-03-11 17:59:23 UTC (rev 677)
@@ -677,17 +677,9 @@
Once-only (atomic) groups
-------------------------
-These are just like other subpatterns, but they start with the opcode
-OP_ONCE or OP_ONCE_NC. The former is used when there are no capturing brackets
-within the atomic group; the latter when there are. The distinction is needed
-for when there is a backtrack to before the group - any captures within the
-group must be reset, so it is necessary to retain backtracking points inside
-the group, even after it is complete, in order to do this. When there are no
-captures in an atomic group, all the backtracking can be discarded when it is
-complete. This is more efficient, and also uses less stack.
-
+These are just like other subpatterns, but they start with the opcode OP_ONCE.
The check for matching an empty string in an unbounded repeat is handled
-entirely at runtime, so there are just these two opcodes for atomic groups.
+entirely at runtime, so there are just this one opcode for atomic groups.
Assertions
@@ -795,4 +787,4 @@
correct length, in order to catch updating errors.
Philip Hazel
-November 2016
+March 2017
Modified: code/trunk/src/pcre2_auto_possess.c
===================================================================
--- code/trunk/src/pcre2_auto_possess.c 2017-03-11 17:39:02 UTC (rev 676)
+++ code/trunk/src/pcre2_auto_possess.c 2017-03-11 17:59:23 UTC (rev 677)
@@ -7,7 +7,7 @@
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -588,7 +588,6 @@
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
- case OP_ONCE_NC:
/* Atomic sub-patterns and assertions can always auto-possessify their
last iterator. However, if the group was entered as a result of checking
@@ -601,7 +600,6 @@
continue;
case OP_ONCE:
- case OP_ONCE_NC:
case OP_BRA:
case OP_CBRA:
next_code = code + GET(code, 1);
@@ -625,8 +623,8 @@
case OP_BRAMINZERO:
next_code = code + 1;
- if (*next_code != OP_BRA && *next_code != OP_CBRA
- && *next_code != OP_ONCE && *next_code != OP_ONCE_NC) return FALSE;
+ if (*next_code != OP_BRA && *next_code != OP_CBRA &&
+ *next_code != OP_ONCE) return FALSE;
do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2017-03-11 17:39:02 UTC (rev 676)
+++ code/trunk/src/pcre2_compile.c 2017-03-11 17:59:23 UTC (rev 677)
@@ -4746,7 +4746,6 @@
int class_has_8bitchar;
int i;
uint32_t mclength;
- uint32_t templastcapture;
uint32_t skipunits;
uint32_t subreqcu, subfirstcu;
uint32_t groupnumber;
@@ -5753,7 +5752,6 @@
pptr++;
tempcode = code;
tempreqvary = cb->req_varyopt; /* Save value before group */
- templastcapture = cb->lastcapture; /* Save value before group */
length_prevgroup = 0; /* Initialize for pre-compile phase */
if ((group_return =
@@ -5783,12 +5781,6 @@
if (note_group_empty && bravalue != OP_COND && group_return > 0)
matched_char = TRUE;
- /* If that was an atomic group and there are no capturing groups within it,
- generate OP_ONCE_NC instead of OP_ONCE. */
-
- if (bravalue == OP_ONCE && cb->lastcapture <= templastcapture)
- *code = OP_ONCE_NC;
-
/* If we've just compiled an assertion, pop the assert depth. */
if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)
@@ -6376,7 +6368,6 @@
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
- case OP_ONCE_NC:
case OP_BRA:
case OP_CBRA:
case OP_COND:
@@ -6620,14 +6611,12 @@
/* Convert possessive ONCE brackets to non-capturing */
- if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) &&
- possessive_quantifier) *bracode = OP_BRA;
+ if (*bracode == OP_ONCE && possessive_quantifier) *bracode = OP_BRA;
/* For non-possessive ONCE brackets, all we need to do is to
set the KET. */
- if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC)
- *ketcode = OP_KETRMAX + repeat_type;
+ if (*bracode == OP_ONCE) *ketcode = OP_KETRMAX + repeat_type;
/* Handle non-ONCE brackets and possessive ONCEs (which have been
converted to non-capturing above). */
@@ -7621,7 +7610,7 @@
/* Atomic groups */
- else if (op == OP_ONCE || op == OP_ONCE_NC)
+ else if (op == OP_ONCE)
{
if (!is_anchored(scode, bracket_map, cb, atomcount + 1, inassert))
return FALSE;
@@ -7751,7 +7740,7 @@
/* Atomic brackets */
- else if (op == OP_ONCE || op == OP_ONCE_NC)
+ else if (op == OP_ONCE)
{
if (!is_startline(scode, bracket_map, cb, atomcount + 1, inassert))
return FALSE;
@@ -7773,9 +7762,8 @@
}
/* Check for explicit circumflex; anything else gives a FALSE result. Note
- in particular that this includes atomic brackets OP_ONCE and OP_ONCE_NC
- because the number of characters matched by .* cannot be adjusted inside
- them. */
+ in particular that this includes atomic brackets OP_ONCE because the number
+ of characters matched by .* cannot be adjusted inside them. */
else if (op != OP_CIRC && op != OP_CIRCM) return FALSE;
@@ -7986,7 +7974,6 @@
case OP_SCBRAPOS:
case OP_ASSERT:
case OP_ONCE:
- case OP_ONCE_NC:
d = find_firstassertedcu(scode, &dflags, op == OP_ASSERT);
if (dflags < 0)
return 0;
Modified: code/trunk/src/pcre2_dfa_match.c
===================================================================
--- code/trunk/src/pcre2_dfa_match.c 2017-03-11 17:39:02 UTC (rev 676)
+++ code/trunk/src/pcre2_dfa_match.c 2017-03-11 17:59:23 UTC (rev 677)
@@ -7,7 +7,7 @@
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -172,7 +172,7 @@
0, /* Assert not */
0, /* Assert behind */
0, /* Assert behind not */
- 0, 0, /* ONCE, ONCE_NC */
+ 0, /* ONCE */
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
0, 0, /* CREF, DNCREF */
@@ -245,7 +245,7 @@
0, /* Assert not */
0, /* Assert behind */
0, /* Assert behind not */
- 0, 0, /* ONCE, ONCE_NC */
+ 0, /* ONCE */
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
0, 0, /* CREF, DNCREF */
@@ -2889,7 +2889,6 @@
/*-----------------------------------------------------------------*/
case OP_ONCE:
- case OP_ONCE_NC:
{
PCRE2_SIZE local_offsets[2];
int local_workspace[1000];
Modified: code/trunk/src/pcre2_internal.h
===================================================================
--- code/trunk/src/pcre2_internal.h 2017-03-11 17:39:02 UTC (rev 676)
+++ code/trunk/src/pcre2_internal.h 2017-03-11 17:59:23 UTC (rev 677)
@@ -1510,68 +1510,67 @@
OP_ASSERTBACK, /* 128 Positive lookbehind */
OP_ASSERTBACK_NOT, /* 129 Negative lookbehind */
- /* ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately
- after the assertions, with ONCE first, as there's a test for >= ONCE for a
- subpattern that isn't an assertion. The POS versions must immediately follow
- the non-POS versions in each case. */
+ /* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately after the
+ assertions, with ONCE first, as there's a test for >= ONCE for a subpattern
+ that isn't an assertion. The POS versions must immediately follow the non-POS
+ versions in each case. */
OP_ONCE, /* 130 Atomic group, contains captures */
- OP_ONCE_NC, /* 131 Atomic group containing no captures */
- OP_BRA, /* 132 Start of non-capturing bracket */
- OP_BRAPOS, /* 133 Ditto, with unlimited, possessive repeat */
- OP_CBRA, /* 134 Start of capturing bracket */
- OP_CBRAPOS, /* 135 Ditto, with unlimited, possessive repeat */
- OP_COND, /* 136 Conditional group */
+ OP_BRA, /* 131 Start of non-capturing bracket */
+ OP_BRAPOS, /* 132 Ditto, with unlimited, possessive repeat */
+ OP_CBRA, /* 133 Start of capturing bracket */
+ OP_CBRAPOS, /* 134 Ditto, with unlimited, possessive repeat */
+ OP_COND, /* 135 Conditional group */
/* These five must follow the previous five, in the same order. There's a
check for >= SBRA to distinguish the two sets. */
- OP_SBRA, /* 137 Start of non-capturing bracket, check empty */
- OP_SBRAPOS, /* 138 Ditto, with unlimited, possessive repeat */
- OP_SCBRA, /* 139 Start of capturing bracket, check empty */
- OP_SCBRAPOS, /* 140 Ditto, with unlimited, possessive repeat */
- OP_SCOND, /* 141 Conditional group, check empty */
+ OP_SBRA, /* 136 Start of non-capturing bracket, check empty */
+ OP_SBRAPOS, /* 137 Ditto, with unlimited, possessive repeat */
+ OP_SCBRA, /* 138 Start of capturing bracket, check empty */
+ OP_SCBRAPOS, /* 139 Ditto, with unlimited, possessive repeat */
+ OP_SCOND, /* 140 Conditional group, check empty */
/* The next two pairs must (respectively) be kept together. */
- OP_CREF, /* 142 Used to hold a capture number as condition */
- OP_DNCREF, /* 143 Used to point to duplicate names as a condition */
- OP_RREF, /* 144 Used to hold a recursion number as condition */
- OP_DNRREF, /* 145 Used to point to duplicate names as a condition */
- OP_FALSE, /* 146 Always false (used by DEFINE and VERSION) */
- OP_TRUE, /* 147 Always true (used by VERSION) */
+ OP_CREF, /* 141 Used to hold a capture number as condition */
+ OP_DNCREF, /* 142 Used to point to duplicate names as a condition */
+ OP_RREF, /* 143 Used to hold a recursion number as condition */
+ OP_DNRREF, /* 144 Used to point to duplicate names as a condition */
+ OP_FALSE, /* 145 Always false (used by DEFINE and VERSION) */
+ OP_TRUE, /* 146 Always true (used by VERSION) */
- OP_BRAZERO, /* 148 These two must remain together and in this */
- OP_BRAMINZERO, /* 149 order. */
- OP_BRAPOSZERO, /* 150 */
+ OP_BRAZERO, /* 147 These two must remain together and in this */
+ OP_BRAMINZERO, /* 148 order. */
+ OP_BRAPOSZERO, /* 149 */
/* These are backtracking control verbs */
- OP_MARK, /* 151 always has an argument */
- OP_PRUNE, /* 152 */
- OP_PRUNE_ARG, /* 153 same, but with argument */
- OP_SKIP, /* 154 */
- OP_SKIP_ARG, /* 155 same, but with argument */
- OP_THEN, /* 156 */
- OP_THEN_ARG, /* 157 same, but with argument */
- OP_COMMIT, /* 158 */
+ OP_MARK, /* 150 always has an argument */
+ OP_PRUNE, /* 151 */
+ OP_PRUNE_ARG, /* 152 same, but with argument */
+ OP_SKIP, /* 153 */
+ OP_SKIP_ARG, /* 154 same, but with argument */
+ OP_THEN, /* 155 */
+ OP_THEN_ARG, /* 156 same, but with argument */
+ OP_COMMIT, /* 157 */
/* These are forced failure and success verbs */
- OP_FAIL, /* 159 */
- OP_ACCEPT, /* 160 */
- OP_ASSERT_ACCEPT, /* 161 Used inside assertions */
- OP_CLOSE, /* 162 Used before OP_ACCEPT to close open captures */
+ OP_FAIL, /* 158 */
+ OP_ACCEPT, /* 159 */
+ OP_ASSERT_ACCEPT, /* 160 Used inside assertions */
+ OP_CLOSE, /* 161 Used before OP_ACCEPT to close open captures */
/* This is used to skip a subpattern with a {0} quantifier */
- OP_SKIPZERO, /* 163 */
+ OP_SKIPZERO, /* 162 */
/* This is used to identify a DEFINE group during compilation so that it can
be checked for having only one branch. It is changed to OP_FALSE before
compilation finishes. */
- OP_DEFINE, /* 164 */
+ OP_DEFINE, /* 163 */
/* This is not an opcode, but is used to check that tables indexed by opcode
are the correct length, in order to catch updating errors - there have been
@@ -1618,7 +1617,7 @@
"Recurse", "Callout", "CalloutStr", \
"Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \
"Reverse", "Assert", "Assert not", "AssertB", "AssertB not", \
- "Once", "Once_NC", \
+ "Once", \
"Bra", "BraPos", "CBra", "CBraPos", \
"Cond", \
"SBra", "SBraPos", "SCBra", "SCBraPos", \
@@ -1702,7 +1701,6 @@
1+LINK_SIZE, /* Assert behind */ \
1+LINK_SIZE, /* Assert behind not */ \
1+LINK_SIZE, /* ONCE */ \
- 1+LINK_SIZE, /* ONCE_NC */ \
1+LINK_SIZE, /* BRA */ \
1+LINK_SIZE, /* BRAPOS */ \
1+LINK_SIZE+IMM2_SIZE, /* CBRA */ \
Modified: code/trunk/src/pcre2_jit_compile.c
===================================================================
--- code/trunk/src/pcre2_jit_compile.c 2017-03-11 17:39:02 UTC (rev 676)
+++ code/trunk/src/pcre2_jit_compile.c 2017-03-11 17:59:23 UTC (rev 677)
@@ -7,7 +7,7 @@
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -675,7 +675,6 @@
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
- case OP_ONCE_NC:
case OP_BRA:
case OP_BRAPOS:
case OP_CBRA:
@@ -1304,7 +1303,7 @@
if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
break;
- if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
+ if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
{
if (detect_repeat(common, cc))
{
@@ -1333,7 +1332,6 @@
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
- case OP_ONCE_NC:
case OP_BRAPOS:
case OP_SBRA:
case OP_SBRAPOS:
@@ -1802,7 +1800,6 @@
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
- case OP_ONCE_NC:
case OP_BRAPOS:
case OP_SBRA:
case OP_SBRAPOS:
@@ -1982,7 +1979,6 @@
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
- case OP_ONCE_NC:
case OP_BRAPOS:
case OP_SBRA:
case OP_SBRAPOS:
@@ -3583,7 +3579,6 @@
continue;
case OP_ONCE:
- case OP_ONCE_NC:
case OP_BRA:
case OP_BRAPOS:
case OP_CBRA:
@@ -7826,7 +7821,6 @@
(|) OP_*BRA | OP_ALT ... M A
(?()|) OP_*COND | OP_ALT M A
(?>|) OP_ONCE | OP_ALT ... [stack trace] M A
- (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
Or nothing, if trace is unnecessary
*/
@@ -7894,8 +7888,6 @@
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
opcode = OP_SCOND;
-if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
- opcode = OP_ONCE;
if (opcode == OP_CBRA || opcode == OP_SCBRA)
{
@@ -9546,7 +9538,6 @@
break;
case OP_ONCE:
- case OP_ONCE_NC:
case OP_BRA:
case OP_CBRA:
case OP_COND:
@@ -9953,8 +9944,6 @@
offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
opcode = OP_SCOND;
-if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
- opcode = OP_ONCE;
alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
@@ -10627,7 +10616,6 @@
break;
case OP_ONCE:
- case OP_ONCE_NC:
case OP_BRA:
case OP_CBRA:
case OP_COND:
Modified: code/trunk/src/pcre2_match.c
===================================================================
--- code/trunk/src/pcre2_match.c 2017-03-11 17:39:02 UTC (rev 676)
+++ code/trunk/src/pcre2_match.c 2017-03-11 17:59:23 UTC (rev 677)
@@ -5021,7 +5021,6 @@
/* Atomic groups and non-capturing brackets that can match an empty string
must record a backtracking point and also set up a chained frame. */
- case OP_ONCE_NC: /* Obsolete */
case OP_ONCE:
case OP_SBRA:
Lframe_type = GF_NOCAPTURE | Fop;
@@ -5518,7 +5517,6 @@
frame so that it points to the final branch. */
case OP_ONCE:
- case OP_ONCE_NC: /* Obsolete */
Fback_frame = ((char *)F - (char *)P) + frame_size;
for (;;)
{
Modified: code/trunk/src/pcre2_printint.c
===================================================================
--- code/trunk/src/pcre2_printint.c 2017-03-11 17:39:02 UTC (rev 676)
+++ code/trunk/src/pcre2_printint.c 2017-03-11 17:59:23 UTC (rev 677)
@@ -7,7 +7,7 @@
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -393,7 +393,6 @@
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
- case OP_ONCE_NC:
case OP_COND:
case OP_SCOND:
case OP_REVERSE:
Modified: code/trunk/src/pcre2_study.c
===================================================================
--- code/trunk/src/pcre2_study.c 2017-03-11 17:39:02 UTC (rev 676)
+++ code/trunk/src/pcre2_study.c 2017-03-11 17:59:23 UTC (rev 677)
@@ -171,7 +171,6 @@
/* Fall through */
case OP_ONCE:
- case OP_ONCE_NC:
case OP_SBRA:
case OP_BRAPOS:
case OP_SBRAPOS:
@@ -1068,7 +1067,6 @@
case OP_CBRAPOS:
case OP_SCBRAPOS:
case OP_ONCE:
- case OP_ONCE_NC:
case OP_ASSERT:
rc = set_start_bits(re, tcode, utf);
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2017-03-11 17:39:02 UTC (rev 676)
+++ code/trunk/testdata/testoutput2 2017-03-11 17:59:23 UTC (rev 677)
@@ -11042,7 +11042,7 @@
------------------------------------------------------------------
Bra
^
- Once_NC
+ Once
a++
Ket
Once
@@ -12510,7 +12510,7 @@
cc
Ket
a++
- Once_NC
+ Once
bb
Alt
cc
@@ -12859,7 +12859,7 @@
------------------------------------------------------------------
Bra
[a-f]*+
- Once_NC
+ Once
gg
Alt
hh
@@ -12867,7 +12867,7 @@
#
[a-f]*+
Brazero
- Once_NC
+ Once
gg
Alt
hh
@@ -12875,7 +12875,7 @@
#
[a-f]*
Brazero
- Once_NC
+ Once
gg
Alt
hh
@@ -12883,7 +12883,7 @@
a#
[a-f]*+
Brazero
- Once_NC
+ Once
gg
Alt
hh
@@ -13173,7 +13173,7 @@
Bra
^
\w+
- Once_NC
+ Once
\s*+
Ket
AssertB