[Pcre-svn] [1348] code/trunk: Implement PCRE_INFO_MATCH_EMPT…

トップ ページ
このメッセージを削除
著者: Subversion repository
日付:  
To: pcre-svn
題目: [Pcre-svn] [1348] code/trunk: Implement PCRE_INFO_MATCH_EMPTY and fix 2 bugs concerned with scanning for
Revision: 1348
          http://vcs.pcre.org/viewvc?view=rev&revision=1348
Author:   ph10
Date:     2013-07-05 11:38:37 +0100 (Fri, 05 Jul 2013)


Log Message:
-----------
Implement PCRE_INFO_MATCH_EMPTY and fix 2 bugs concerned with scanning for
empty string matching.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/doc/pcreapi.3
    code/trunk/pcre.h.in
    code/trunk/pcre_compile.c
    code/trunk/pcre_fullinfo.c
    code/trunk/pcre_internal.h
    code/trunk/pcretest.c
    code/trunk/testdata/testinput14
    code/trunk/testdata/testoutput11-16
    code/trunk/testdata/testoutput11-32
    code/trunk/testdata/testoutput11-8
    code/trunk/testdata/testoutput12
    code/trunk/testdata/testoutput14
    code/trunk/testdata/testoutput2
    code/trunk/testdata/testoutput5


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/ChangeLog    2013-07-05 10:38:37 UTC (rev 1348)
@@ -24,7 +24,21 @@


5. Cleaned up a "may be uninitialized" compiler warning in pcre_exec.c.

+6.  In UTF mode, the code for checking whether a group could match an empty
+    string (which is used for indefinitely repeated groups to allow for
+    breaking an infinite loop) was broken when the group contained a repeated
+    negated single-character class with a character that occupied more than one
+    data item and had a minimum repetition of zero (for example, [^\x{100}]* in
+    UTF-8 mode). The effect was undefined: the group might or might not be
+    deemed as matching an empty string, or the program might have crashed.
+    
+7.  The code for checking whether a group could match an empty string was not
+    recognizing that \h, \H, \v, \V, and \R must match a character.
+    
+8.  Implemented PCRE_INFO_MATCH_EMPTY, which yields 1 if the pattern can match
+    an empty string. If it can, pcretest shows this in its information output. 


+
Version 8.33 28-May-2013
--------------------------


Modified: code/trunk/doc/pcreapi.3
===================================================================
--- code/trunk/doc/pcreapi.3    2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/doc/pcreapi.3    2013-07-05 10:38:37 UTC (rev 1348)
@@ -1,4 +1,4 @@
-.TH PCREAPI 3 "12 June 2013" "PCRE 8.33"
+.TH PCREAPI 3 "05 July 2013" "PCRE 8.34"
 .SH NAME
 PCRE - Perl-compatible regular expressions
 .sp
@@ -1286,10 +1286,15 @@
 is -1.
 .P
 Since for the 32-bit library using the non-UTF-32 mode, this function is unable
-to return the full 32-bit range of the character, this value is deprecated;
+to return the full 32-bit range of characters, this value is deprecated;
 instead the PCRE_INFO_REQUIREDCHARFLAGS and PCRE_INFO_REQUIREDCHAR values should
 be used.
 .sp
+  PCRE_INFO_MATCH_EMPTY
+.sp
+Return 1 if the pattern can match an empty string, otherwise 0. The fourth 
+argument should point to an \fBint\fP variable.   
+.sp
   PCRE_INFO_MATCHLIMIT
 .sp
 If the pattern set a match limit by including an item of the form
@@ -2842,6 +2847,6 @@
 .rs
 .sp
 .nf
-Last updated: 02 July 2013
+Last updated: 05 July 2013
 Copyright (c) 1997-2013 University of Cambridge.
 .fi


Modified: code/trunk/pcre.h.in
===================================================================
--- code/trunk/pcre.h.in    2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/pcre.h.in    2013-07-05 10:38:37 UTC (rev 1348)
@@ -277,6 +277,7 @@
 #define PCRE_INFO_REQUIREDCHARFLAGS 22
 #define PCRE_INFO_MATCHLIMIT        23
 #define PCRE_INFO_RECURSIONLIMIT    24
+#define PCRE_INFO_MATCH_EMPTY       25


/* Request types for pcre_config(). Do not re-arrange, in order to remain
compatible. */

Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c    2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/pcre_compile.c    2013-07-05 10:38:37 UTC (rev 1348)
@@ -2353,15 +2353,23 @@
   endcode     points to where to stop
   utf         TRUE if in UTF-8 / UTF-16 / UTF-32 mode
   cd          contains pointers to tables etc.
+  recurses    chain of recurse_check to catch mutual recursion


 Returns:      TRUE if what is matched could be empty
 */


+typedef struct recurse_check {
+  struct recurse_check *prev;
+  const pcre_uchar *group;
+} recurse_check;    
+
 static BOOL
 could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
-  BOOL utf, compile_data *cd)
+  BOOL utf, compile_data *cd, recurse_check *recurses)
 {
 register pcre_uchar c;
+recurse_check this_recurse;
+
 for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
      code < endcode;
      code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))
@@ -2369,7 +2377,7 @@
   const pcre_uchar *ccode;


c = *code;
-
+
/* Skip over forward assertions; the other assertions are skipped by
first_significant_code() with a TRUE final argument. */

@@ -2389,25 +2397,50 @@

   if (c == OP_RECURSE)
     {
-    const pcre_uchar *scode;
+    const pcre_uchar *scode = cd->start_code + GET(code, 1);
     BOOL empty_branch;


-    /* Test for forward reference */
+    /* Test for forward reference or uncompleted reference. This is disabled
+    when called to scan a completed pattern by setting cd->start_workspace to
+    NULL. */


-    for (scode = cd->start_workspace; scode < cd->hwm; scode += LINK_SIZE)
-      if ((int)GET(scode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
+    if (cd->start_workspace != NULL)
+      { 
+      const pcre_uchar *tcode; 
+      for (tcode = cd->start_workspace; tcode < cd->hwm; tcode += LINK_SIZE)
+        if ((int)GET(tcode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
+      if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */
+      }
+    
+    /* If we are scanning a completed pattern, there are no forward references 
+    and all groups are complete. We need to detect whether this is a recursive 
+    call, as otherwise there will be an infinite loop. If it is a recursion,
+    just skip over it. Simple recursions are easily detected. For mutual 
+    recursions we keep a chain on the stack. */ 
+     
+    else
+      {  
+      recurse_check *r = recurses;
+      const pcre_uchar *endgroup = scode;
+       
+      do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);
+      if (code >= scode && code <= endgroup) continue;  /* Simple recursion */
+      
+      for (r = recurses; r != NULL; r = r->prev)
+        if (r->group == scode) break;
+      if (r != NULL) continue;   /* Mutual recursion */
+      }


-    /* Not a forward reference, test for completed backward reference */
+    /* Completed reference; scan the referenced group, remembering it on the
+    stack chain to detect mutual recursions. */


     empty_branch = FALSE;
-    scode = cd->start_code + GET(code, 1);
-    if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */
-
-    /* Completed backwards reference */
-
+    this_recurse.prev = recurses;
+    this_recurse.group = scode; 
+     
     do
       {
-      if (could_be_empty_branch(scode, endcode, utf, cd))
+      if (could_be_empty_branch(scode, endcode, utf, cd, &this_recurse))
         {
         empty_branch = TRUE;
         break;
@@ -2463,7 +2496,7 @@
       empty_branch = FALSE;
       do
         {
-        if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd))
+        if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd, NULL))
           empty_branch = TRUE;
         code += GET(code, 1);
         }
@@ -2521,34 +2554,57 @@


     /* Opcodes that must match a character */


+    case OP_ANY:
+    case OP_ALLANY:
+    case OP_ANYBYTE:
+     
     case OP_PROP:
     case OP_NOTPROP:
+    case OP_ANYNL:
+     
+    case OP_NOT_HSPACE:
+    case OP_HSPACE:
+    case OP_NOT_VSPACE:
+    case OP_VSPACE:    
     case OP_EXTUNI:
+ 
     case OP_NOT_DIGIT:
     case OP_DIGIT:
     case OP_NOT_WHITESPACE:
     case OP_WHITESPACE:
     case OP_NOT_WORDCHAR:
     case OP_WORDCHAR:
-    case OP_ANY:
-    case OP_ALLANY:
-    case OP_ANYBYTE:
+     
     case OP_CHAR:
     case OP_CHARI:
     case OP_NOT:
     case OP_NOTI:
+     
     case OP_PLUS:
+    case OP_PLUSI:
     case OP_MINPLUS:
-    case OP_POSPLUS:
-    case OP_EXACT:
+    case OP_MINPLUSI:
+
     case OP_NOTPLUS:
+    case OP_NOTPLUSI:
     case OP_NOTMINPLUS:
+    case OP_NOTMINPLUSI:
+     
+    case OP_POSPLUS:
+    case OP_POSPLUSI:
     case OP_NOTPOSPLUS:
+    case OP_NOTPOSPLUSI:
+     
+    case OP_EXACT:
+    case OP_EXACTI:
     case OP_NOTEXACT:
+    case OP_NOTEXACTI:  
+     
     case OP_TYPEPLUS:
     case OP_TYPEMINPLUS:
     case OP_TYPEPOSPLUS:
     case OP_TYPEEXACT:
+     
     return FALSE;


     /* These are going to continue, as they may be empty, but we have to
@@ -2582,30 +2638,58 @@
     return TRUE;


     /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,
-    MINUPTO, and POSUPTO may be followed by a multibyte character */
+    MINUPTO, and POSUPTO and their caseless and negative versions may be
+    followed by a multibyte character. */


 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
     case OP_STAR:
     case OP_STARI:
+    case OP_NOTSTAR:    
+    case OP_NOTSTARI:   
+     
     case OP_MINSTAR:
     case OP_MINSTARI:
+    case OP_NOTMINSTAR: 
+    case OP_NOTMINSTARI:
+     
     case OP_POSSTAR:
     case OP_POSSTARI:
+    case OP_NOTPOSSTAR: 
+    case OP_NOTPOSSTARI:
+     
     case OP_QUERY:
     case OP_QUERYI:
+    case OP_NOTQUERY:   
+    case OP_NOTQUERYI:  
+     
     case OP_MINQUERY:
     case OP_MINQUERYI:
+    case OP_NOTMINQUERY:
+    case OP_NOTMINQUERYI:
+     
     case OP_POSQUERY:
     case OP_POSQUERYI:
+    case OP_NOTPOSQUERY:
+    case OP_NOTPOSQUERYI:
+     
     if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
     break;


     case OP_UPTO:
     case OP_UPTOI:
+    case OP_NOTUPTO:    
+    case OP_NOTUPTOI:   
+     
     case OP_MINUPTO:
     case OP_MINUPTOI:
+    case OP_NOTMINUPTO: 
+    case OP_NOTMINUPTOI:
+     
     case OP_POSUPTO:
     case OP_POSUPTOI:
+    case OP_NOTPOSUPTO: 
+    case OP_NOTPOSUPTOI:
+     
     if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
     break;
 #endif
@@ -2659,7 +2743,7 @@
 {
 while (bcptr != NULL && bcptr->current_branch >= code)
   {
-  if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd))
+  if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd, NULL))
     return FALSE;
   bcptr = bcptr->outer;
   }
@@ -5391,7 +5475,7 @@
             pcre_uchar *scode = bracode;
             do
               {
-              if (could_be_empty_branch(scode, ketcode, utf, cd))
+              if (could_be_empty_branch(scode, ketcode, utf, cd, NULL))
                 {
                 *bracode += OP_SBRA - OP_BRA;
                 break;
@@ -8213,10 +8297,12 @@
     }
   }


-/* If the workspace had to be expanded, free the new memory. */
+/* If the workspace had to be expanded, free the new memory. Set the pointer to
+NULL to indicate that forward references have been filled in. */

if (cd->workspace_size > COMPILE_WORK_SIZE)
(PUBL(free))((void *)cd->start_workspace);
+cd->start_workspace = NULL;

/* Give an error if there's back reference to a non-existent capturing
subpattern. */
@@ -8420,6 +8506,20 @@
}
#endif /* PCRE_DEBUG */

+/* Check for a pattern than can match an empty string, so that this information 
+can be provided to applications. */
+
+do
+  {
+  if (could_be_empty_branch(codestart, code, utf, cd, NULL))
+    {
+    re->flags |= PCRE_MATCH_EMPTY;
+    break;
+    }  
+  codestart += GET(codestart, 1);
+  }
+while (*codestart == OP_ALT);
+
 #if defined COMPILE_PCRE8
 return (pcre *)re;
 #elif defined COMPILE_PCRE16


Modified: code/trunk/pcre_fullinfo.c
===================================================================
--- code/trunk/pcre_fullinfo.c    2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/pcre_fullinfo.c    2013-07-05 10:38:37 UTC (rev 1348)
@@ -231,6 +231,10 @@
   if ((re->flags & PCRE_RLSET) == 0) return PCRE_ERROR_UNSET;
   *((pcre_uint32 *)where) = re->limit_recursion;
   break;
+  
+  case PCRE_INFO_MATCH_EMPTY: 
+  *((int *)where) = (re->flags & PCRE_MATCH_EMPTY) != 0;
+  break; 


default: return PCRE_ERROR_BADOPTION;
}

Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h    2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/pcre_internal.h    2013-07-05 10:38:37 UTC (rev 1348)
@@ -1149,6 +1149,7 @@
 #define PCRE_HASTHEN       0x00001000  /* pattern contains (*THEN) */
 #define PCRE_MLSET         0x00002000  /* match limit set by regex */
 #define PCRE_RLSET         0x00004000  /* recursion limit set by regex */
+#define PCRE_MATCH_EMPTY   0x00008000  /* pattern can match empty string */


 #if defined COMPILE_PCRE8
 #define PCRE_MODE          PCRE_MODE8


Modified: code/trunk/pcretest.c
===================================================================
--- code/trunk/pcretest.c    2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/pcretest.c    2013-07-05 10:38:37 UTC (rev 1348)
@@ -4020,7 +4020,7 @@
       pcre_uint32 first_char, need_char;
       pcre_uint32 match_limit, recursion_limit;
       int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
-        hascrorlf, maxlookbehind;
+        hascrorlf, maxlookbehind, match_empty;
       int nameentrysize, namecount;
       const pcre_uint8 *nametable;


@@ -4037,6 +4037,7 @@
           new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
           new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
           new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
+          new_info(re, NULL, PCRE_INFO_MATCH_EMPTY, &match_empty) +
           new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
           != 0)
         goto SKIP_DATA;
@@ -4085,8 +4086,9 @@
           }
         }


-      if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
-      if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
+      if (!okpartial)  fprintf(outfile, "Partial matching not supported\n");
+      if (hascrorlf)   fprintf(outfile, "Contains explicit CR or LF match\n");
+      if (match_empty) fprintf(outfile, "May match empty string\n");


       all_options = REAL_PCRE_OPTIONS(re);
       if (do_flip) all_options = swap_uint32(all_options);


Modified: code/trunk/testdata/testinput14
===================================================================
--- code/trunk/testdata/testinput14    2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/testdata/testinput14    2013-07-05 10:38:37 UTC (rev 1348)
@@ -294,8 +294,12 @@


/\h/SI

+/\H/SI
+
/\v/SI

+/\V/SI
+
/\R/SI

/[\h]/BZ

Modified: code/trunk/testdata/testoutput11-16
===================================================================
--- code/trunk/testdata/testoutput11-16    2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/testdata/testoutput11-16    2013-07-05 10:38:37 UTC (rev 1348)
@@ -63,6 +63,7 @@
   4     End
 ------------------------------------------------------------------
 Capturing subpattern count = 0
+May match empty string
 Options: extended
 No first char
 No need char


Modified: code/trunk/testdata/testoutput11-32
===================================================================
--- code/trunk/testdata/testoutput11-32    2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/testdata/testoutput11-32    2013-07-05 10:38:37 UTC (rev 1348)
@@ -63,6 +63,7 @@
   4     End
 ------------------------------------------------------------------
 Capturing subpattern count = 0
+May match empty string
 Options: extended
 No first char
 No need char


Modified: code/trunk/testdata/testoutput11-8
===================================================================
--- code/trunk/testdata/testoutput11-8    2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/testdata/testoutput11-8    2013-07-05 10:38:37 UTC (rev 1348)
@@ -63,6 +63,7 @@
   6     End
 ------------------------------------------------------------------
 Capturing subpattern count = 0
+May match empty string
 Options: extended
 No first char
 No need char


Modified: code/trunk/testdata/testoutput12
===================================================================
--- code/trunk/testdata/testoutput12    2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/testdata/testoutput12    2013-07-05 10:38:37 UTC (rev 1348)
@@ -13,6 +13,7 @@


/(?(?C1)(?=a)a)/S+I
Capturing subpattern count = 0
+May match empty string
No options
First char = 'a'
No need char
@@ -21,6 +22,7 @@

/(?(?C1)(?=a)a)/S!+I
Capturing subpattern count = 0
+May match empty string
No options
First char = 'a'
No need char
@@ -47,6 +49,7 @@

/a*/SI
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char

Modified: code/trunk/testdata/testoutput14
===================================================================
--- code/trunk/testdata/testoutput14    2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/testdata/testoutput14    2013-07-05 10:38:37 UTC (rev 1348)
@@ -380,6 +380,14 @@
 Subject length lower bound = 1
 Starting byte set: \x09 \x20 \xa0 


+/\H/SI
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+Subject length lower bound = 1
+No set of starting bytes
+
/\v/SI
Capturing subpattern count = 0
No options
@@ -388,6 +396,14 @@
Subject length lower bound = 1
Starting byte set: \x0a \x0b \x0c \x0d \x85

+/\V/SI
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+Subject length lower bound = 1
+No set of starting bytes
+
/\R/SI
Capturing subpattern count = 0
No options

Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/testdata/testoutput2    2013-07-05 10:38:37 UTC (rev 1348)
@@ -10,6 +10,7 @@


/(a)b|/I
Capturing subpattern count = 1
+May match empty string
No options
No first char
No need char
@@ -498,6 +499,7 @@

/(?s).*/I
Capturing subpattern count = 0
+May match empty string
Options: anchored dotall
No first char
No need char
@@ -762,6 +764,7 @@
/(?<=ab(?i)x|y|z)/I
Capturing subpattern count = 0
Max lookbehind = 3
+May match empty string
No options
No first char
No need char
@@ -769,6 +772,7 @@
/(?>.*)(?<=(abcd)|(xyz))/I
Capturing subpattern count = 2
Max lookbehind = 4
+May match empty string
No options
No first char
No need char
@@ -1377,6 +1381,7 @@

/a*/I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -1395,6 +1400,7 @@

/a{0,3}/I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -1594,6 +1600,7 @@

/a?b?/I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -1612,6 +1619,7 @@

 /|-/I
 Capturing subpattern count = 0
+May match empty string
 No options
 No first char
 No need char
@@ -2625,6 +2633,7 @@
         End
 ------------------------------------------------------------------
 Capturing subpattern count = 0
+May match empty string
 Options: extended
 No first char
 No need char
@@ -2767,6 +2776,7 @@
         End
 ------------------------------------------------------------------
 Capturing subpattern count = 0
+May match empty string
 No options
 No first char
 No need char
@@ -2866,6 +2876,7 @@
         End
 ------------------------------------------------------------------
 Capturing subpattern count = 0
+May match empty string
 No options
 No first char
 No need char
@@ -2908,12 +2919,14 @@
         End
 ------------------------------------------------------------------
 Capturing subpattern count = 1
+May match empty string
 No options
 No first char
 No need char


/^(\w++|\s++)*$/I
Capturing subpattern count = 1
+May match empty string
Options: anchored
No first char
No need char
@@ -3289,6 +3302,7 @@

/(?=a).*/I
Capturing subpattern count = 0
+May match empty string
No options
First char = 'a'
No need char
@@ -3307,6 +3321,7 @@

/(?=a)(?=b)/I
Capturing subpattern count = 0
+May match empty string
No options
First char = 'a'
No need char
@@ -3373,6 +3388,7 @@

/(a)*/I
Capturing subpattern count = 1
+May match empty string
No options
No first char
No need char
@@ -3601,6 +3617,7 @@

/(?C0)(abc(?C1))*/I
Capturing subpattern count = 1
+May match empty string
No options
No first char
No need char
@@ -3634,6 +3651,7 @@

/(\d{3}(?C))*/I
Capturing subpattern count = 1
+May match empty string
No options
No first char
No need char
@@ -3880,6 +3898,7 @@

 /^([^()]|\((?1)*\))*$/I
 Capturing subpattern count = 1
+May match empty string
 Options: anchored
 No first char
 No need char
@@ -4159,6 +4178,7 @@
   one     1
   three   3
   two     2
+May match empty string
 Options: anchored caseless
 No first char
 No need char
@@ -4258,6 +4278,7 @@


 /(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)a/Is
 Capturing subpattern count = 31
+May match empty string
 Options: anchored dotall
 No first char
 No need char
@@ -4265,6 +4286,7 @@
 /(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)a\31/Is
 Capturing subpattern count = 31
 Max back reference = 31
+May match empty string
 Options: dotall
 No first char
 No need char
@@ -4272,6 +4294,7 @@
 /(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)|(.*)a\32/Is
 Capturing subpattern count = 32
 Max back reference = 32
+May match empty string
 Options: dotall
 No first char
 No need char
@@ -4423,6 +4446,7 @@
 Named capturing subpatterns:
   Tes    1
   Test   2
+May match empty string
 No options
 No first char
 No need char
@@ -4441,6 +4465,7 @@
 Named capturing subpatterns:
   Tes    2
   Test   1
+May match empty string
 No options
 No first char
 No need char
@@ -4518,6 +4543,7 @@
         End
 ------------------------------------------------------------------
 Capturing subpattern count = 2
+May match empty string
 No options
 No first char
 No need char
@@ -4538,6 +4564,7 @@
         End
 ------------------------------------------------------------------
 Capturing subpattern count = 2
+May match empty string
 No options
 No first char
 No need char
@@ -4569,6 +4596,7 @@
         End
 ------------------------------------------------------------------
 Capturing subpattern count = 2
+May match empty string
 No options
 No first char
 No need char
@@ -5397,6 +5425,7 @@
 /\b.*/I
 Capturing subpattern count = 0
 Max lookbehind = 1
+May match empty string
 No options
 No first char
 No need char
@@ -5406,6 +5435,7 @@
 /\b.*/Is
 Capturing subpattern count = 0
 Max lookbehind = 1
+May match empty string
 Options: dotall
 No first char
 No need char
@@ -5414,6 +5444,7 @@


/(?!.bcd).*/I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -6002,6 +6033,7 @@

/[^()]*(?:\((?R)\)[^()]*)*/I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -6014,6 +6046,7 @@

/[^()]*(?:\((?>(?R))\)[^()]*)*/I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -6024,6 +6057,7 @@

/[^()]*(?:\((?R)\))*[^()]*/I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -6034,6 +6068,7 @@

/(?:\((?R)\))*[^()]*/I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -6046,6 +6081,7 @@

/(?:\((?R)\))|[^()]*/I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -6503,6 +6539,7 @@

/.*/I<lf>
Capturing subpattern count = 0
+May match empty string
Options:
Forced newline sequence: LF
First char at start or follows newline
@@ -6544,6 +6581,7 @@

 +((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)+I
 Capturing subpattern count = 1
+May match empty string
 No options
 No first char
 No need char
@@ -7710,6 +7748,7 @@
   one     1
   three   3
   two     2
+May match empty string
 Options: anchored caseless
 No first char
 No need char
@@ -9249,6 +9288,7 @@


/(?(?=.*b)b|^)/CI
Capturing subpattern count = 0
+May match empty string
Options:
No first char
No need char
@@ -11036,12 +11076,14 @@

/(^ab|^)+/I
Capturing subpattern count = 1
+May match empty string
Options: anchored
No first char
No need char

/(^ab|^)++/I
Capturing subpattern count = 1
+May match empty string
Options: anchored
No first char
No need char
@@ -11060,12 +11102,14 @@

/(?:^ab|^)+/I
Capturing subpattern count = 0
+May match empty string
Options: anchored
No first char
No need char

/(?:^ab|^)++/I
Capturing subpattern count = 0
+May match empty string
Options: anchored
No first char
No need char
@@ -11084,12 +11128,14 @@

/(.*ab|.*)+/I
Capturing subpattern count = 1
+May match empty string
No options
First char at start or follows newline
No need char

/(.*ab|.*)++/I
Capturing subpattern count = 1
+May match empty string
No options
First char at start or follows newline
No need char
@@ -11108,12 +11154,14 @@

/(?:.*ab|.*)+/I
Capturing subpattern count = 0
+May match empty string
No options
First char at start or follows newline
No need char

/(?:.*ab|.*)++/I
Capturing subpattern count = 0
+May match empty string
No options
First char at start or follows newline
No need char
@@ -11645,6 +11693,7 @@

/a(*SKIP)c|b(*ACCEPT)|/+S!I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -12293,6 +12342,7 @@
/(?>.*?)(?<=(abcd)|(wxyz))/I
Capturing subpattern count = 2
Max lookbehind = 4
+May match empty string
No options
No first char
No need char
@@ -12300,6 +12350,7 @@
/(?>.*)(?<=(abcd)|(wxyz))/I
Capturing subpattern count = 2
Max lookbehind = 4
+May match empty string
No options
No first char
No need char
@@ -12338,6 +12389,7 @@

/.?/S-I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char
@@ -12345,6 +12397,7 @@

/.?/S!I
Capturing subpattern count = 0
+May match empty string
No options
No first char
No need char

Modified: code/trunk/testdata/testoutput5
===================================================================
--- code/trunk/testdata/testoutput5    2013-07-02 18:52:42 UTC (rev 1347)
+++ code/trunk/testdata/testoutput5    2013-07-05 10:38:37 UTC (rev 1348)
@@ -151,6 +151,7 @@
         End
 ------------------------------------------------------------------
 Capturing subpattern count = 0
+May match empty string
 Options: utf
 No first char
 No need char
@@ -373,6 +374,7 @@
         End
 ------------------------------------------------------------------
 Capturing subpattern count = 2
+May match empty string
 Options: utf
 No first char
 No need char
@@ -404,6 +406,7 @@
         End
 ------------------------------------------------------------------
 Capturing subpattern count = 2
+May match empty string
 Options: utf
 No first char
 No need char
@@ -424,6 +427,7 @@
         End
 ------------------------------------------------------------------
 Capturing subpattern count = 2
+May match empty string
 Options: utf
 No first char
 No need char
@@ -455,6 +459,7 @@
         End
 ------------------------------------------------------------------
 Capturing subpattern count = 2
+May match empty string
 Options: utf
 No first char
 No need char