Revision: 621
http://vcs.pcre.org/viewvc?view=rev&revision=621
Author: ph10
Date: 2011-07-18 11:14:09 +0100 (Mon, 18 Jul 2011)
Log Message:
-----------
Remove atomic from single repeats; convert possessive atomic to possessive
non-atomic (because they are the same).
Modified Paths:
--------------
code/trunk/pcre_compile.c
code/trunk/testdata/testinput1
code/trunk/testdata/testinput11
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput1
code/trunk/testdata/testoutput10
code/trunk/testdata/testoutput11
code/trunk/testdata/testoutput2
code/trunk/testdata/testoutput5
code/trunk/testdata/testoutput7
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2011-07-17 13:53:14 UTC (rev 620)
+++ code/trunk/pcre_compile.c 2011-07-18 10:14:09 UTC (rev 621)
@@ -4208,6 +4208,35 @@
ptr++;
}
else repeat_type = greedy_default;
+
+ /* If previous was a recursion call, wrap it in atomic brackets so that
+ previous becomes the atomic group. All recursions were so wrapped in the
+ past, but it no longer happens for non-repeated recursions. In fact, the
+ repeated ones could be re-implemented independently so as not to need this,
+ but for the moment we rely on the code for repeating groups. */
+
+ if (*previous == OP_RECURSE)
+ {
+ memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);
+ *previous = OP_ONCE;
+ PUT(previous, 1, 2 + 2*LINK_SIZE);
+ previous[2 + 2*LINK_SIZE] = OP_KET;
+ PUT(previous, 3 + 2*LINK_SIZE, 2 + 2*LINK_SIZE);
+ code += 2 + 2 * LINK_SIZE;
+ length_prevgroup = 3 + 3*LINK_SIZE;
+
+ /* When actually compiling, we need to check whether this was a forward
+ reference, and if so, adjust the offset. */
+
+ if (lengthptr == NULL && cd->hwm >= cd->start_workspace + LINK_SIZE)
+ {
+ int offset = GET(cd->hwm, -LINK_SIZE);
+ if (offset == previous + 1 - cd->start_code)
+ PUT(cd->hwm, -LINK_SIZE, offset + 1 + LINK_SIZE);
+ }
+ }
+
+ /* Now handle repetition for the different types of item. */
/* If previous was a character match, abolish the item and generate a
repeat item instead. If a char item has a minumum of more than one, ensure
@@ -4727,14 +4756,10 @@
}
/* If the maximum is unlimited, set a repeater in the final copy. For
- ONCE brackets, that's all we need to do.
-
- (To be done next, after recursion adjusted)
- However, possessively repeated
- ONCE brackets can be converted into non-capturing brackets, as the
- behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to
+ ONCE brackets, that's all we need to do. However, possessively repeated
+ ONCE brackets can be converted into non-capturing brackets, as the
+ behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to
deal with possessive ONCEs specially.
- (....)
Otherwise, if the quantifier was possessive, we convert the BRA code to
the POS form, and the KET code to KETRPOS. (It turns out to be convenient
@@ -4755,12 +4780,8 @@
{
uschar *ketcode = code - 1 - LINK_SIZE;
uschar *bracode = ketcode - GET(ketcode, 1);
-
-/****
- if (*bracode == OP_ONCE && possessive_quantifier)
- *bracode = OP_BRA;
-****/
-
+
+ if (*bracode == OP_ONCE && possessive_quantifier) *bracode = OP_BRA;
if (*bracode == OP_ONCE)
*ketcode = OP_KETRMAX + repeat_type;
else
@@ -5677,10 +5698,10 @@
/* Fudge the value of "called" so that when it is inserted as an
offset below, what it actually inserted is the reference number
- of the group. */
+ of the group. Then remember the forward reference. */
called = cd->start_code + recno;
- PUTINC(cd->hwm, 0, (int)(code + 2 + LINK_SIZE - cd->start_code));
+ PUTINC(cd->hwm, 0, (int)(code + 1 - cd->start_code));
}
/* If not a forward reference, and the subpattern is still open,
@@ -5695,23 +5716,11 @@
}
}
- /* Insert the recursion/subroutine item, automatically wrapped inside
- "once" brackets. Set up a "previous group" length so that a
- subsequent quantifier will work. */
+ /* Insert the recursion/subroutine item. */
- *code = OP_ONCE;
- PUT(code, 1, 2 + 2*LINK_SIZE);
- code += 1 + LINK_SIZE;
-
*code = OP_RECURSE;
PUT(code, 1, (int)(called - cd->start_code));
code += 1 + LINK_SIZE;
-
- *code = OP_KET;
- PUT(code, 1, 2 + 2*LINK_SIZE);
- code += 1 + LINK_SIZE;
-
- length_prevgroup = 3 + 3*LINK_SIZE;
}
/* Can't determine a first byte now */
Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1 2011-07-17 13:53:14 UTC (rev 620)
+++ code/trunk/testdata/testinput1 2011-07-18 10:14:09 UTC (rev 621)
@@ -4181,4 +4181,13 @@
/(?(?=(a))a)(b)/
ab
+/^(?:a|ab)++c/
+ aaaabc
+
+/^(?>a|ab)++c/
+ aaaabc
+
+/^(?:a|ab)+c/
+ aaaabc
+
/-- End of testinput1 --/
Modified: code/trunk/testdata/testinput11
===================================================================
--- code/trunk/testdata/testinput11 2011-07-17 13:53:14 UTC (rev 620)
+++ code/trunk/testdata/testinput11 2011-07-18 10:14:09 UTC (rev 621)
@@ -633,4 +633,10 @@
/(?(?=(a(*ACCEPT)z))a)/
a
+/^(a)(?1)+ab/
+ aaaab
+
+/^(a)(?1)++ab/
+ aaaab
+
/-- End of testinput11 --/
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2011-07-17 13:53:14 UTC (rev 620)
+++ code/trunk/testdata/testinput2 2011-07-18 10:14:09 UTC (rev 621)
@@ -3771,4 +3771,12 @@
/(abc)(?1)/SI
+/^(?>a)++/
+ aa\M
+ aaaaaaaaa\M
+
+/(a)(?1)++/
+ aa\M
+ aaaaaaaaa\M
+
/-- End of testinput2 --/
Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1 2011-07-17 13:53:14 UTC (rev 620)
+++ code/trunk/testdata/testoutput1 2011-07-18 10:14:09 UTC (rev 621)
@@ -6843,4 +6843,16 @@
1: a
2: b
+/^(?:a|ab)++c/
+ aaaabc
+No match
+
+/^(?>a|ab)++c/
+ aaaabc
+No match
+
+/^(?:a|ab)+c/
+ aaaabc
+ 0: aaaabc
+
/-- End of testinput1 --/
Modified: code/trunk/testdata/testoutput10
===================================================================
--- code/trunk/testdata/testoutput10 2011-07-17 13:53:14 UTC (rev 620)
+++ code/trunk/testdata/testoutput10 2011-07-18 10:14:09 UTC (rev 621)
@@ -166,18 +166,16 @@
------------------------------------------------------------------
/(a(?1)b)/BM
-Memory allocation (code space): 28
+Memory allocation (code space): 22
------------------------------------------------------------------
- 0 24 Bra
- 3 18 CBra 1
+ 0 18 Bra
+ 3 12 CBra 1
8 a
- 10 6 Once
- 13 3 Recurse
- 16 6 Ket
- 19 b
- 21 18 Ket
- 24 24 Ket
- 27 End
+ 10 3 Recurse
+ 13 b
+ 15 12 Ket
+ 18 18 Ket
+ 21 End
------------------------------------------------------------------
/(a(?1)+b)/BM
@@ -234,9 +232,9 @@
------------------------------------------------------------------
/(?P<a>a)...(?P=a)bbb(?P>a)d/BM
-Memory allocation (code space): 43
+Memory allocation (code space): 37
------------------------------------------------------------------
- 0 36 Bra
+ 0 30 Bra
3 7 CBra 1
8 a
10 7 Ket
@@ -245,12 +243,10 @@
15 Any
16 \1
19 bbb
- 25 6 Once
- 28 3 Recurse
- 31 6 Ket
- 34 d
- 36 36 Ket
- 39 End
+ 25 3 Recurse
+ 28 d
+ 30 30 Ket
+ 33 End
------------------------------------------------------------------
/abc(?C255)de(?C)f/BM
Modified: code/trunk/testdata/testoutput11
===================================================================
--- code/trunk/testdata/testoutput11 2011-07-17 13:53:14 UTC (rev 620)
+++ code/trunk/testdata/testoutput11 2011-07-18 10:14:09 UTC (rev 621)
@@ -1203,4 +1203,13 @@
0: a
1: a
+/^(a)(?1)+ab/
+ aaaab
+ 0: aaaab
+ 1: a
+
+/^(a)(?1)++ab/
+ aaaab
+No match
+
/-- End of testinput11 --/
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2011-07-17 13:53:14 UTC (rev 620)
+++ code/trunk/testdata/testoutput2 2011-07-18 10:14:09 UTC (rev 621)
@@ -4004,9 +4004,7 @@
Bra
CBra 1
a
- Once
Recurse
- Ket
b
Ket
Ket
@@ -4227,9 +4225,7 @@
Any
\1
bbb
- Once
Recurse
- Ket
d
Ket
End
@@ -4601,9 +4597,7 @@
a
CBra 2
b
- Once
Recurse
- Ket
c
Ket
Ket
@@ -4623,9 +4617,7 @@
a
CBra 2
b
- Once
Recurse
- Ket
c
Ket
KetRmax
@@ -4646,9 +4638,7 @@
a
CBra 2
b
- Once
Recurse
- Ket
c
Ket
Ket
@@ -4657,9 +4647,7 @@
a
CBra 2
b
- Once
Recurse
- Ket
c
Ket
Ket
@@ -7765,9 +7753,7 @@
------------------------------------------------------------------
Bra
^
- Once
Recurse
- Ket
[()]
CBra 1
Ket
@@ -7801,9 +7787,7 @@
------------------------------------------------------------------
Bra
^
- Once
Recurse
- Ket
()
CBra 1
Ket
@@ -7815,9 +7799,7 @@
------------------------------------------------------------------
Bra
^
- Once
Recurse
- Ket
[(\]a]
CBra 1
Ket
@@ -7830,9 +7812,7 @@
------------------------------------------------------------------
Bra
^
- Once
Recurse
- Ket
CBra 1
Ket
Ket
@@ -8284,10 +8264,8 @@
Alt
c
Ket
- Once
Recurse
Ket
- Ket
End
------------------------------------------------------------------
abc
@@ -8298,9 +8276,7 @@
------------------------------------------------------------------
Bra
xy
- Once
Recurse
- Ket
CBra 1
abc
Ket
@@ -9928,12 +9904,8 @@
/(?&word)(?&element)(?(DEFINE)(?<element><[^m][^>]>[^<])(?<word>\w*+))/BZ
------------------------------------------------------------------
Bra
- Once
Recurse
- Ket
- Once
Recurse
- Ket
Cond
Cond def
CBra 1
@@ -9954,12 +9926,8 @@
/(?&word)(?&element)(?(DEFINE)(?<element><[^\d][^>]>[^<])(?<word>\w*+))/BZ
------------------------------------------------------------------
Bra
- Once
Recurse
- Ket
- Once
Recurse
- Ket
Cond
Cond def
CBra 1
@@ -10700,12 +10668,10 @@
Cond nrecurse 1
$
Alt
- Once
Recurse
Ket
Ket
Ket
- Ket
End
------------------------------------------------------------------
Capturing subpattern count = 4
@@ -10923,12 +10889,10 @@
------------------------------------------------------------------
Bra
^
- Once
- Brazero
- Once
+ Braposzero
+ SBraPos
Recurse
- KetRmax
- Ket
+ KetRpos
Cond
Cond def
CBra 1
@@ -11940,4 +11904,26 @@
Subject length lower bound = 6
No set of starting bytes
+/^(?>a)++/
+ aa\M
+Minimum match() limit = 5
+Minimum match() recursion limit = 3
+ 0: aa
+ aaaaaaaaa\M
+Minimum match() limit = 12
+Minimum match() recursion limit = 3
+ 0: aaaaaaaaa
+
+/(a)(?1)++/
+ aa\M
+Minimum match() limit = 7
+Minimum match() recursion limit = 5
+ 0: aa
+ 1: a
+ aaaaaaaaa\M
+Minimum match() limit = 21
+Minimum match() recursion limit = 5
+ 0: aaaaaaaaa
+ 1: a
+
/-- End of testinput2 --/
Modified: code/trunk/testdata/testoutput5
===================================================================
--- code/trunk/testdata/testoutput5 2011-07-17 13:53:14 UTC (rev 620)
+++ code/trunk/testdata/testoutput5 2011-07-18 10:14:09 UTC (rev 621)
@@ -599,10 +599,8 @@
Bra
\x{100}*+
\d
- Once
Recurse
Ket
- Ket
End
------------------------------------------------------------------
Capturing subpattern count = 0
@@ -906,11 +904,9 @@
\x{100}abc
CBra 1
xyz
- Once
Recurse
Ket
Ket
- Ket
End
------------------------------------------------------------------
Capturing subpattern count = 1
@@ -925,11 +921,9 @@
abc
CBra 1
xyz
- Once
Recurse
Ket
Ket
- Ket
End
------------------------------------------------------------------
Capturing subpattern count = 1
@@ -944,11 +938,9 @@
abc
CBra 1
xyz
- Once
Recurse
Ket
Ket
- Ket
End
------------------------------------------------------------------
Capturing subpattern count = 1
@@ -964,9 +956,7 @@
\x{100}
CBra 2
b
- Once
Recurse
- Ket
c
Ket
Ket
@@ -987,9 +977,7 @@
\x{100}
CBra 2
b
- Once
Recurse
- Ket
c
Ket
Ket
@@ -998,9 +986,7 @@
\x{100}
CBra 2
b
- Once
Recurse
- Ket
c
Ket
Ket
@@ -1021,9 +1007,7 @@
\x{100}
CBra 2
b
- Once
Recurse
- Ket
c
Ket
Ket
@@ -1044,9 +1028,7 @@
\x{100}
CBra 2
b
- Once
Recurse
- Ket
c
Ket
Ket
@@ -1055,9 +1037,7 @@
\x{100}
CBra 2
b
- Once
Recurse
- Ket
c
Ket
Ket
Modified: code/trunk/testdata/testoutput7
===================================================================
--- code/trunk/testdata/testoutput7 2011-07-17 13:53:14 UTC (rev 620)
+++ code/trunk/testdata/testoutput7 2011-07-18 10:14:09 UTC (rev 621)
@@ -676,6 +676,7 @@
/^(a*\w|ab)=(?1)/
ab=ab
0: ab=ab
+ 1: ab=a
/^([^()]|\((?1)*\))*$/
abc