[Pcre-svn] [626] code/trunk: Add the /= modifier to pcretest…

トップ ページ
このメッセージを削除
著者: Subversion repository
日付:  
To: pcre-svn
題目: [Pcre-svn] [626] code/trunk: Add the /= modifier to pcretest so as to be able to check unset capturing
Revision: 626
          http://vcs.pcre.org/viewvc?view=rev&revision=626
Author:   ph10
Date:     2011-07-20 18:51:54 +0100 (Wed, 20 Jul 2011)


Log Message:
-----------
Add the /= modifier to pcretest so as to be able to check unset capturing
parentheses at the ends of patterns.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/doc/pcretest.1
    code/trunk/pcre_exec.c
    code/trunk/pcretest.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2011-07-20 16:46:19 UTC (rev 625)
+++ code/trunk/ChangeLog    2011-07-20 17:51:54 UTC (rev 626)
@@ -154,7 +154,10 @@


 28. Some minor code refactoring concerning Unicode properties and scripts 
     should reduce the stack requirement of match() slightly. 
- 
+    
+29. Added the '=' option to pcretest to check the setting of unused capturing
+    slots at the end of the pattern, which are documented as being -1, but are
+    not included in the return count.  



Version 8.12 15-Jan-2011

Modified: code/trunk/doc/pcretest.1
===================================================================
--- code/trunk/doc/pcretest.1    2011-07-20 16:46:19 UTC (rev 625)
+++ code/trunk/doc/pcretest.1    2011-07-20 17:51:54 UTC (rev 626)
@@ -261,6 +261,14 @@
 remainder is output on the following line with a plus character following the 
 capture number.
 .P
+The \fB/=\fP modifier requests that the values of all potential captured 
+parentheses be output after a match by \fBpcre_exec()\fP. By default, only
+those up to the highest one actually used in the match are output
+(corresponding to the return code from \fBpcre_exec()\fP). Values in the 
+offsets vector corresponding to higher numbers should be set to -1, and these 
+are output as "<unset>". This modifier gives a way of checking that this is
+happening.
+.P
 The \fB/B\fP modifier is a debugging feature. It requests that \fBpcretest\fP
 output a representation of the compiled byte code after compilation. Normally
 this information contains length and offset values; however, if \fB/Z\fP is
@@ -815,6 +823,6 @@
 .rs
 .sp
 .nf
-Last updated: 11 July 2011
+Last updated: 20 July 2011
 Copyright (c) 1997-2011 University of Cambridge.
 .fi


Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c    2011-07-20 16:46:19 UTC (rev 625)
+++ code/trunk/pcre_exec.c    2011-07-20 17:51:54 UTC (rev 626)
@@ -6370,7 +6370,7 @@
   those at the end that need unsetting here. We can't just unset them all at
   the start of the whole thing because they may get set in one branch that is
   not the final matching branch. */
-  
+
   if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
     {
     register int *iptr, *iend;


Modified: code/trunk/pcretest.c
===================================================================
--- code/trunk/pcretest.c    2011-07-20 16:46:19 UTC (rev 625)
+++ code/trunk/pcretest.c    2011-07-20 17:51:54 UTC (rev 626)
@@ -1434,6 +1434,7 @@
   const unsigned char *tables = NULL;
   unsigned long int true_size, true_study_size = 0;
   size_t size, regex_gotten_store;
+  int do_allcaps = 0; 
   int do_mark = 0;
   int do_study = 0;
   int no_force_study = 0; 
@@ -1611,7 +1612,8 @@
       case '+':
       if (do_showrest) do_showcaprest = 1; else do_showrest = 1; 
       break;
-       
+      
+      case '=': do_allcaps = 1; break; 
       case 'A': options |= PCRE_ANCHORED; break;
       case 'B': do_debug = 1; break;
       case 'C': options |= PCRE_AUTO_CALLOUT; break;
@@ -2734,11 +2736,31 @@
             do_g = do_G = FALSE;        /* Break g/G loop */
             }
           }
+        
+        /* do_allcaps requests showing of all captures in the pattern, to check
+        unset ones at the end. */
+          
+        if (do_allcaps)
+          {
+          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
+          count++;   /* Allow for full match */ 
+          if (count * 2 > use_size_offsets) count = use_size_offsets/2;  
+          }  


+        /* Output the captured substrings */
+         
         for (i = 0; i < count * 2; i += 2)
           {
           if (use_offsets[i] < 0)
+            { 
+            if (use_offsets[i] != -1)
+              fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
+                use_offsets[i], i);   
+            if (use_offsets[i+1] != -1)
+              fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
+                use_offsets[i+1], i+1);   
             fprintf(outfile, "%2d: <unset>\n", i/2);
+            } 
           else
             {
             fprintf(outfile, "%2d: ", i/2);


Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2011-07-20 16:46:19 UTC (rev 625)
+++ code/trunk/testdata/testinput2    2011-07-20 17:51:54 UTC (rev 626)
@@ -3779,4 +3779,17 @@
     aa\M
     aaaaaaaaa\M  


+/(?:(foo)|(bar)|(baz))X/=
+    bazfooX
+    foobazbarX
+    barfooX
+    bazX
+    foobarbazX    
+    bazfooX\O0
+    bazfooX\O2
+    bazfooX\O4
+    bazfooX\O6
+    bazfooX\O8
+    bazfooX\O10
+
 /-- End of testinput2 --/


Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2011-07-20 16:46:19 UTC (rev 625)
+++ code/trunk/testdata/testoutput2    2011-07-20 17:51:54 UTC (rev 626)
@@ -11926,4 +11926,57 @@
  0: aaaaaaaaa
  1: a


+/(?:(foo)|(bar)|(baz))X/=
+    bazfooX
+ 0: fooX
+ 1: foo
+ 2: <unset>
+ 3: <unset>
+    foobazbarX
+ 0: barX
+ 1: <unset>
+ 2: bar
+ 3: <unset>
+    barfooX
+ 0: fooX
+ 1: foo
+ 2: <unset>
+ 3: <unset>
+    bazX
+ 0: bazX
+ 1: <unset>
+ 2: <unset>
+ 3: baz
+    foobarbazX    
+ 0: bazX
+ 1: <unset>
+ 2: <unset>
+ 3: baz
+    bazfooX\O0
+Matched, but too many substrings
+    bazfooX\O2
+Matched, but too many substrings
+ 0: fooX
+    bazfooX\O4
+Matched, but too many substrings
+ 0: fooX
+ 1: <unset>
+    bazfooX\O6
+Matched, but too many substrings
+ 0: fooX
+ 1: foo
+ 2: <unset>
+    bazfooX\O8
+Matched, but too many substrings
+ 0: fooX
+ 1: foo
+ 2: <unset>
+ 3: <unset>
+    bazfooX\O10
+Matched, but too many substrings
+ 0: fooX
+ 1: foo
+ 2: <unset>
+ 3: <unset>
+
 /-- End of testinput2 --/