Revision: 1348
http://vcs.pcre.org/viewvc?view=rev&revision=1348
Author: ph10
Date: 2013-07-05 11:38:37 +0100 (Fri, 05 Jul 2013)
Log Message:
-----------
Implement PCRE_INFO_MATCH_EMPTY and fix 2 bugs concerned with scanning for
empty string matching.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/doc/pcreapi.3
code/trunk/pcre.h.in
code/trunk/pcre_compile.c
code/trunk/pcre_fullinfo.c
code/trunk/pcre_internal.h
code/trunk/pcretest.c
code/trunk/testdata/testinput14
code/trunk/testdata/testoutput11-16
code/trunk/testdata/testoutput11-32
code/trunk/testdata/testoutput11-8
code/trunk/testdata/testoutput12
code/trunk/testdata/testoutput14
code/trunk/testdata/testoutput2
code/trunk/testdata/testoutput5
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/ChangeLog 2013-07-05 10:38:37 UTC (rev 1348)
@@ -24,7 +24,21 @@
5. Cleaned up a "may be uninitialized" compiler warning in pcre_exec.c.
+6. In UTF mode, the code for checking whether a group could match an empty
+ string (which is used for indefinitely repeated groups to allow for
+ breaking an infinite loop) was broken when the group contained a repeated
+ negated single-character class with a character that occupied more than one
+ data item and had a minimum repetition of zero (for example, [^\x{100}]* in
+ UTF-8 mode). The effect was undefined: the group might or might not be
+ deemed as matching an empty string, or the program might have crashed.
+
+7. The code for checking whether a group could match an empty string was not
+ recognizing that \h, \H, \v, \V, and \R must match a character.
+
+8. Implemented PCRE_INFO_MATCH_EMPTY, which yields 1 if the pattern can match
+ an empty string. If it can, pcretest shows this in its information output.
+
Version 8.33 28-May-2013
--------------------------
Modified: code/trunk/doc/pcreapi.3
===================================================================
--- code/trunk/doc/pcreapi.3 2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/doc/pcreapi.3 2013-07-05 10:38:37 UTC (rev 1348)
@@ -1,4 +1,4 @@
-.TH PCREAPI 3 "12 June 2013" "PCRE 8.33"
+.TH PCREAPI 3 "05 July 2013" "PCRE 8.34"
.SH NAME
PCRE - Perl-compatible regular expressions
.sp
@@ -1286,10 +1286,15 @@
is -1.
.P
Since for the 32-bit library using the non-UTF-32 mode, this function is unable
-to return the full 32-bit range of the character, this value is deprecated;
+to return the full 32-bit range of characters, this value is deprecated;
instead the PCRE_INFO_REQUIREDCHARFLAGS and PCRE_INFO_REQUIREDCHAR values should
be used.
.sp
+ PCRE_INFO_MATCH_EMPTY
+.sp
+Return 1 if the pattern can match an empty string, otherwise 0. The fourth
+argument should point to an \fBint\fP variable.
+.sp
PCRE_INFO_MATCHLIMIT
.sp
If the pattern set a match limit by including an item of the form
@@ -2842,6 +2847,6 @@
.rs
.sp
.nf
-Last updated: 02 July 2013
+Last updated: 05 July 2013
Copyright (c) 1997-2013 University of Cambridge.
.fi
Modified: code/trunk/pcre.h.in
===================================================================
--- code/trunk/pcre.h.in 2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/pcre.h.in 2013-07-05 10:38:37 UTC (rev 1348)
@@ -277,6 +277,7 @@
#define PCRE_INFO_REQUIREDCHARFLAGS 22
#define PCRE_INFO_MATCHLIMIT 23
#define PCRE_INFO_RECURSIONLIMIT 24
+#define PCRE_INFO_MATCH_EMPTY 25
/* Request types for pcre_config(). Do not re-arrange, in order to remain
compatible. */
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/pcre_compile.c 2013-07-05 10:38:37 UTC (rev 1348)
@@ -2353,15 +2353,23 @@
endcode points to where to stop
utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode
cd contains pointers to tables etc.
+ recurses chain of recurse_check to catch mutual recursion
Returns: TRUE if what is matched could be empty
*/
+typedef struct recurse_check {
+ struct recurse_check *prev;
+ const pcre_uchar *group;
+} recurse_check;
+
static BOOL
could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
- BOOL utf, compile_data *cd)
+ BOOL utf, compile_data *cd, recurse_check *recurses)
{
register pcre_uchar c;
+recurse_check this_recurse;
+
for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
code < endcode;
code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))
@@ -2369,7 +2377,7 @@
const pcre_uchar *ccode;
c = *code;
-
+
/* Skip over forward assertions; the other assertions are skipped by
first_significant_code() with a TRUE final argument. */
@@ -2389,25 +2397,50 @@
if (c == OP_RECURSE)
{
- const pcre_uchar *scode;
+ const pcre_uchar *scode = cd->start_code + GET(code, 1);
BOOL empty_branch;
- /* Test for forward reference */
+ /* Test for forward reference or uncompleted reference. This is disabled
+ when called to scan a completed pattern by setting cd->start_workspace to
+ NULL. */
- for (scode = cd->start_workspace; scode < cd->hwm; scode += LINK_SIZE)
- if ((int)GET(scode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
+ if (cd->start_workspace != NULL)
+ {
+ const pcre_uchar *tcode;
+ for (tcode = cd->start_workspace; tcode < cd->hwm; tcode += LINK_SIZE)
+ if ((int)GET(tcode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
+ if (GET(scode, 1) == 0) return TRUE; /* Unclosed */
+ }
+
+ /* If we are scanning a completed pattern, there are no forward references
+ and all groups are complete. We need to detect whether this is a recursive
+ call, as otherwise there will be an infinite loop. If it is a recursion,
+ just skip over it. Simple recursions are easily detected. For mutual
+ recursions we keep a chain on the stack. */
+
+ else
+ {
+ recurse_check *r = recurses;
+ const pcre_uchar *endgroup = scode;
+
+ do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);
+ if (code >= scode && code <= endgroup) continue; /* Simple recursion */
+
+ for (r = recurses; r != NULL; r = r->prev)
+ if (r->group == scode) break;
+ if (r != NULL) continue; /* Mutual recursion */
+ }
- /* Not a forward reference, test for completed backward reference */
+ /* Completed reference; scan the referenced group, remembering it on the
+ stack chain to detect mutual recursions. */
empty_branch = FALSE;
- scode = cd->start_code + GET(code, 1);
- if (GET(scode, 1) == 0) return TRUE; /* Unclosed */
-
- /* Completed backwards reference */
-
+ this_recurse.prev = recurses;
+ this_recurse.group = scode;
+
do
{
- if (could_be_empty_branch(scode, endcode, utf, cd))
+ if (could_be_empty_branch(scode, endcode, utf, cd, &this_recurse))
{
empty_branch = TRUE;
break;
@@ -2463,7 +2496,7 @@
empty_branch = FALSE;
do
{
- if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd))
+ if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd, NULL))
empty_branch = TRUE;
code += GET(code, 1);
}
@@ -2521,34 +2554,57 @@
/* Opcodes that must match a character */
+ case OP_ANY:
+ case OP_ALLANY:
+ case OP_ANYBYTE:
+
case OP_PROP:
case OP_NOTPROP:
+ case OP_ANYNL:
+
+ case OP_NOT_HSPACE:
+ case OP_HSPACE:
+ case OP_NOT_VSPACE:
+ case OP_VSPACE:
case OP_EXTUNI:
+
case OP_NOT_DIGIT:
case OP_DIGIT:
case OP_NOT_WHITESPACE:
case OP_WHITESPACE:
case OP_NOT_WORDCHAR:
case OP_WORDCHAR:
- case OP_ANY:
- case OP_ALLANY:
- case OP_ANYBYTE:
+
case OP_CHAR:
case OP_CHARI:
case OP_NOT:
case OP_NOTI:
+
case OP_PLUS:
+ case OP_PLUSI:
case OP_MINPLUS:
- case OP_POSPLUS:
- case OP_EXACT:
+ case OP_MINPLUSI:
+
case OP_NOTPLUS:
+ case OP_NOTPLUSI:
case OP_NOTMINPLUS:
+ case OP_NOTMINPLUSI:
+
+ case OP_POSPLUS:
+ case OP_POSPLUSI:
case OP_NOTPOSPLUS:
+ case OP_NOTPOSPLUSI:
+
+ case OP_EXACT:
+ case OP_EXACTI:
case OP_NOTEXACT:
+ case OP_NOTEXACTI:
+
case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
case OP_TYPEPOSPLUS:
case OP_TYPEEXACT:
+
return FALSE;
/* These are going to continue, as they may be empty, but we have to
@@ -2582,30 +2638,58 @@
return TRUE;
/* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,
- MINUPTO, and POSUPTO may be followed by a multibyte character */
+ MINUPTO, and POSUPTO and their caseless and negative versions may be
+ followed by a multibyte character. */
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
case OP_STAR:
case OP_STARI:
+ case OP_NOTSTAR:
+ case OP_NOTSTARI:
+
case OP_MINSTAR:
case OP_MINSTARI:
+ case OP_NOTMINSTAR:
+ case OP_NOTMINSTARI:
+
case OP_POSSTAR:
case OP_POSSTARI:
+ case OP_NOTPOSSTAR:
+ case OP_NOTPOSSTARI:
+
case OP_QUERY:
case OP_QUERYI:
+ case OP_NOTQUERY:
+ case OP_NOTQUERYI:
+
case OP_MINQUERY:
case OP_MINQUERYI:
+ case OP_NOTMINQUERY:
+ case OP_NOTMINQUERYI:
+
case OP_POSQUERY:
case OP_POSQUERYI:
+ case OP_NOTPOSQUERY:
+ case OP_NOTPOSQUERYI:
+
if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
break;
case OP_UPTO:
case OP_UPTOI:
+ case OP_NOTUPTO:
+ case OP_NOTUPTOI:
+
case OP_MINUPTO:
case OP_MINUPTOI:
+ case OP_NOTMINUPTO:
+ case OP_NOTMINUPTOI:
+
case OP_POSUPTO:
case OP_POSUPTOI:
+ case OP_NOTPOSUPTO:
+ case OP_NOTPOSUPTOI:
+
if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
break;
#endif
@@ -2659,7 +2743,7 @@
{
while (bcptr != NULL && bcptr->current_branch >= code)
{
- if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd))
+ if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd, NULL))
return FALSE;
bcptr = bcptr->outer;
}
@@ -5391,7 +5475,7 @@
pcre_uchar *scode = bracode;
do
{
- if (could_be_empty_branch(scode, ketcode, utf, cd))
+ if (could_be_empty_branch(scode, ketcode, utf, cd, NULL))
{
*bracode += OP_SBRA - OP_BRA;
break;
@@ -8213,10 +8297,12 @@
}
}
-/* If the workspace had to be expanded, free the new memory. */
+/* If the workspace had to be expanded, free the new memory. Set the pointer to
+NULL to indicate that forward references have been filled in. */
if (cd->workspace_size > COMPILE_WORK_SIZE)
(PUBL(free))((void *)cd->start_workspace);
+cd->start_workspace = NULL;
/* Give an error if there's back reference to a non-existent capturing
subpattern. */
@@ -8420,6 +8506,20 @@
}
#endif /* PCRE_DEBUG */
+/* Check for a pattern than can match an empty string, so that this information
+can be provided to applications. */
+
+do
+ {
+ if (could_be_empty_branch(codestart, code, utf, cd, NULL))
+ {
+ re->flags |= PCRE_MATCH_EMPTY;
+ break;
+ }
+ codestart += GET(codestart, 1);
+ }
+while (*codestart == OP_ALT);
+
#if defined COMPILE_PCRE8
return (pcre *)re;
#elif defined COMPILE_PCRE16
Modified: code/trunk/pcre_fullinfo.c
===================================================================
--- code/trunk/pcre_fullinfo.c 2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/pcre_fullinfo.c 2013-07-05 10:38:37 UTC (rev 1348)
@@ -231,6 +231,10 @@
if ((re->flags & PCRE_RLSET) == 0) return PCRE_ERROR_UNSET;
*((pcre_uint32 *)where) = re->limit_recursion;
break;
+
+ case PCRE_INFO_MATCH_EMPTY:
+ *((int *)where) = (re->flags & PCRE_MATCH_EMPTY) != 0;
+ break;
default: return PCRE_ERROR_BADOPTION;
}
Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h 2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/pcre_internal.h 2013-07-05 10:38:37 UTC (rev 1348)
@@ -1149,6 +1149,7 @@
#define PCRE_HASTHEN 0x00001000 /* pattern contains (*THEN) */
#define PCRE_MLSET 0x00002000 /* match limit set by regex */
#define PCRE_RLSET 0x00004000 /* recursion limit set by regex */
+#define PCRE_MATCH_EMPTY 0x00008000 /* pattern can match empty string */
#if defined COMPILE_PCRE8
#define PCRE_MODE PCRE_MODE8
Modified: code/trunk/pcretest.c
===================================================================
--- code/trunk/pcretest.c 2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/pcretest.c 2013-07-05 10:38:37 UTC (rev 1348)
@@ -4020,7 +4020,7 @@
pcre_uint32 first_char, need_char;
pcre_uint32 match_limit, recursion_limit;
int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
- hascrorlf, maxlookbehind;
+ hascrorlf, maxlookbehind, match_empty;
int nameentrysize, namecount;
const pcre_uint8 *nametable;
@@ -4037,6 +4037,7 @@
new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
+ new_info(re, NULL, PCRE_INFO_MATCH_EMPTY, &match_empty) +
new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
!= 0)
goto SKIP_DATA;
@@ -4085,8 +4086,9 @@
}
}
- if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
- if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
+ if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
+ if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
+ if (match_empty) fprintf(outfile, "May match empty string\n");
all_options = REAL_PCRE_OPTIONS(re);
if (do_flip) all_options = swap_uint32(all_options);
Modified: code/trunk/testdata/testinput14
===================================================================
--- code/trunk/testdata/testinput14 2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/testdata/testinput14 2013-07-05 10:38:37 UTC (rev 1348)
@@ -294,8 +294,12 @@
/\h/SI
+/\H/SI
+
/\v/SI
+/\V/SI
+
/\R/SI
/[\h]/BZ
Modified: code/trunk/testdata/testoutput11-16
===================================================================
--- code/trunk/testdata/testoutput11-16 2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/testdata/testoutput11-16 2013-07-05 10:38:37 UTC (rev 1348)
@@ -63,6 +63,7 @@
4 End
------------------------------------------------------------------
Capturing subpattern count = 0
+May match empty string
Options: extended
No first char
No need char
Modified: code/trunk/testdata/testoutput11-32
===================================================================
--- code/trunk/testdata/testoutput11-32 2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/testdata/testoutput11-32 2013-07-05 10:38:37 UTC (rev 1348)
@@ -63,6 +63,7 @@
4 End
------------------------------------------------------------------
Capturing subpattern count = 0
+May match empty string
Options: extended
No first char
No need char
Modified: code/trunk/testdata/testoutput11-8
===================================================================
--- code/trunk/testdata/testoutput11-8 2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/testdata/testoutput11-8 2013-07-05 10:38:37 UTC (rev 1348)
@@ -63,6 +63,7 @@
6 End
------------------------------------------------------------------
Capturing subpattern count = 0
+May match empty string
Options: extended
No first char
No need char
Modified: code/trunk/testdata/testoutput12
===================================================================
--- code/trunk/testdata/testoutput12 2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/testdata/testoutput12 2013-07-05 10:38:37 UTC (rev 1348)
@@ -13,6 +13,7 @@
/(?(?C1)(?=a)a)/S+I
Capturing subpattern count = 0
+May match empty string
No options
First char = 'a'
No need char
@@ -21,6 +22,7 @@
/(?(?C1)(?=a)a)/S!+I
Capturing subpattern count = 0
+May match empty string
No options
First char = 'a'
No need char
@@ -47,6 +49,7 @@
/a*/SI
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
Modified: code/trunk/testdata/testoutput14
===================================================================
--- code/trunk/testdata/testoutput14 2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/testdata/testoutput14 2013-07-05 10:38:37 UTC (rev 1348)
@@ -380,6 +380,14 @@
Subject length lower bound = 1
Starting byte set: \x09 \x20 \xa0
+/\H/SI
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+Subject length lower bound = 1
+No set of starting bytes
+
/\v/SI
Capturing subpattern count = 0
No options
@@ -388,6 +396,14 @@
Subject length lower bound = 1
Starting byte set: \x0a \x0b \x0c \x0d \x85
+/\V/SI
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+Subject length lower bound = 1
+No set of starting bytes
+
/\R/SI
Capturing subpattern count = 0
No options
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/testdata/testoutput2 2013-07-05 10:38:37 UTC (rev 1348)
@@ -10,6 +10,7 @@
/(a)b|/I
Capturing subpattern count = 1
+May match empty string
No options
No first char
No need char
@@ -498,6 +499,7 @@
/(?s).*/I
Capturing subpattern count = 0
+May match empty string
Options: anchored dotall
No first char
No need char
@@ -762,6 +764,7 @@
/(?<=ab(?i)x|y|z)/I
Capturing subpattern count = 0
Max lookbehind = 3
+May match empty string
No options
No first char
No need char
@@ -769,6 +772,7 @@
/(?>.*)(?<=(abcd)|(xyz))/I
Capturing subpattern count = 2
Max lookbehind = 4
+May match empty string
No options
No first char
No need char
@@ -1377,6 +1381,7 @@
/a*/I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -1395,6 +1400,7 @@
/a{0,3}/I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -1594,6 +1600,7 @@
/a?b?/I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -1612,6 +1619,7 @@
/|-/I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -2625,6 +2633,7 @@
End
------------------------------------------------------------------
Capturing subpattern count = 0
+May match empty string
Options: extended
No first char
No need char
@@ -2767,6 +2776,7 @@
End
------------------------------------------------------------------
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -2866,6 +2876,7 @@
End
------------------------------------------------------------------
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -2908,12 +2919,14 @@
End
------------------------------------------------------------------
Capturing subpattern count = 1
+May match empty string
No options
No first char
No need char
/^(\w++|\s++)*$/I
Capturing subpattern count = 1
+May match empty string
Options: anchored
No first char
No need char
@@ -3289,6 +3302,7 @@
/(?=a).*/I
Capturing subpattern count = 0
+May match empty string
No options
First char = 'a'
No need char
@@ -3307,6 +3321,7 @@
/(?=a)(?=b)/I
Capturing subpattern count = 0
+May match empty string
No options
First char = 'a'
No need char
@@ -3373,6 +3388,7 @@
/(a)*/I
Capturing subpattern count = 1
+May match empty string
No options
No first char
No need char
@@ -3601,6 +3617,7 @@
/(?C0)(abc(?C1))*/I
Capturing subpattern count = 1
+May match empty string
No options
No first char
No need char
@@ -3634,6 +3651,7 @@
/(\d{3}(?C))*/I
Capturing subpattern count = 1
+May match empty string
No options
No first char
No need char
@@ -3880,6 +3898,7 @@
/^([^()]|\((?1)*\))*$/I
Capturing subpattern count = 1
+May match empty string
Options: anchored
No first char
No need char
@@ -4159,6 +4178,7 @@
one 1
three 3
two 2
+May match empty string
Options: anchored caseless
No first char
No need char
@@ -4258,6 +4278,7 @@
/(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)a/Is
Capturing subpattern count = 31
+May match empty string
Options: anchored dotall
No first char
No need char
@@ -4265,6 +4286,7 @@
/(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)a\31/Is
Capturing subpattern count = 31
Max back reference = 31
+May match empty string
Options: dotall
No first char
No need char
@@ -4272,6 +4294,7 @@
/(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)a\32/Is
Capturing subpattern count = 32
Max back reference = 32
+May match empty string
Options: dotall
No first char
No need char
@@ -4423,6 +4446,7 @@
Named capturing subpatterns:
Tes 1
Test 2
+May match empty string
No options
No first char
No need char
@@ -4441,6 +4465,7 @@
Named capturing subpatterns:
Tes 2
Test 1
+May match empty string
No options
No first char
No need char
@@ -4518,6 +4543,7 @@
End
------------------------------------------------------------------
Capturing subpattern count = 2
+May match empty string
No options
No first char
No need char
@@ -4538,6 +4564,7 @@
End
------------------------------------------------------------------
Capturing subpattern count = 2
+May match empty string
No options
No first char
No need char
@@ -4569,6 +4596,7 @@
End
------------------------------------------------------------------
Capturing subpattern count = 2
+May match empty string
No options
No first char
No need char
@@ -5397,6 +5425,7 @@
/\b.*/I
Capturing subpattern count = 0
Max lookbehind = 1
+May match empty string
No options
No first char
No need char
@@ -5406,6 +5435,7 @@
/\b.*/Is
Capturing subpattern count = 0
Max lookbehind = 1
+May match empty string
Options: dotall
No first char
No need char
@@ -5414,6 +5444,7 @@
/(?!.bcd).*/I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -6002,6 +6033,7 @@
/[^()]*(?:\((?R)\)[^()]*)*/I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -6014,6 +6046,7 @@
/[^()]*(?:\((?>(?R))\)[^()]*)*/I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -6024,6 +6057,7 @@
/[^()]*(?:\((?R)\))*[^()]*/I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -6034,6 +6068,7 @@
/(?:\((?R)\))*[^()]*/I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -6046,6 +6081,7 @@
/(?:\((?R)\))|[^()]*/I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -6503,6 +6539,7 @@
/.*/I<lf>
Capturing subpattern count = 0
+May match empty string
Options:
Forced newline sequence: LF
First char at start or follows newline
@@ -6544,6 +6581,7 @@
+((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)+I
Capturing subpattern count = 1
+May match empty string
No options
No first char
No need char
@@ -7710,6 +7748,7 @@
one 1
three 3
two 2
+May match empty string
Options: anchored caseless
No first char
No need char
@@ -9249,6 +9288,7 @@
/(?(?=.*b)b|^)/CI
Capturing subpattern count = 0
+May match empty string
Options:
No first char
No need char
@@ -11036,12 +11076,14 @@
/(^ab|^)+/I
Capturing subpattern count = 1
+May match empty string
Options: anchored
No first char
No need char
/(^ab|^)++/I
Capturing subpattern count = 1
+May match empty string
Options: anchored
No first char
No need char
@@ -11060,12 +11102,14 @@
/(?:^ab|^)+/I
Capturing subpattern count = 0
+May match empty string
Options: anchored
No first char
No need char
/(?:^ab|^)++/I
Capturing subpattern count = 0
+May match empty string
Options: anchored
No first char
No need char
@@ -11084,12 +11128,14 @@
/(.*ab|.*)+/I
Capturing subpattern count = 1
+May match empty string
No options
First char at start or follows newline
No need char
/(.*ab|.*)++/I
Capturing subpattern count = 1
+May match empty string
No options
First char at start or follows newline
No need char
@@ -11108,12 +11154,14 @@
/(?:.*ab|.*)+/I
Capturing subpattern count = 0
+May match empty string
No options
First char at start or follows newline
No need char
/(?:.*ab|.*)++/I
Capturing subpattern count = 0
+May match empty string
No options
First char at start or follows newline
No need char
@@ -11645,6 +11693,7 @@
/a(*SKIP)c|b(*ACCEPT)|/+S!I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -12293,6 +12342,7 @@
/(?>.*?)(?<=(abcd)|(wxyz))/I
Capturing subpattern count = 2
Max lookbehind = 4
+May match empty string
No options
No first char
No need char
@@ -12300,6 +12350,7 @@
/(?>.*)(?<=(abcd)|(wxyz))/I
Capturing subpattern count = 2
Max lookbehind = 4
+May match empty string
No options
No first char
No need char
@@ -12338,6 +12389,7 @@
/.?/S-I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -12345,6 +12397,7 @@
/.?/S!I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
Modified: code/trunk/testdata/testoutput5
===================================================================
--- code/trunk/testdata/testoutput5 2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/testdata/testoutput5 2013-07-05 10:38:37 UTC (rev 1348)
@@ -151,6 +151,7 @@
End
------------------------------------------------------------------
Capturing subpattern count = 0
+May match empty string
Options: utf
No first char
No need char
@@ -373,6 +374,7 @@
End
------------------------------------------------------------------
Capturing subpattern count = 2
+May match empty string
Options: utf
No first char
No need char
@@ -404,6 +406,7 @@
End
------------------------------------------------------------------
Capturing subpattern count = 2
+May match empty string
Options: utf
No first char
No need char
@@ -424,6 +427,7 @@
End
------------------------------------------------------------------
Capturing subpattern count = 2
+May match empty string
Options: utf
No first char
No need char
@@ -455,6 +459,7 @@
End
------------------------------------------------------------------
Capturing subpattern count = 2
+May match empty string
Options: utf
No first char
No need char