[Pcre-svn] [960] code/trunk: Fix long-standing DFA testing r…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [960] code/trunk: Fix long-standing DFA testing restart bug in pcretest, and add some
Revision: 960
          http://vcs.pcre.org/viewvc?view=rev&revision=960
Author:   ph10
Date:     2012-04-19 18:30:38 +0100 (Thu, 19 Apr 2012)


Log Message:
-----------
Fix long-standing DFA testing restart bug in pcretest, and add some
plausibility checks when restarting in pcre_dfa_exec().

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/doc/pcreapi.3
    code/trunk/pcre.h.in
    code/trunk/pcre_dfa_exec.c
    code/trunk/pcretest.c
    code/trunk/testdata/testinput8
    code/trunk/testdata/testoutput8


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2012-04-14 16:16:58 UTC (rev 959)
+++ code/trunk/ChangeLog    2012-04-19 17:30:38 UTC (rev 960)
@@ -82,7 +82,12 @@


23. Support PCRE_NO_START_OPTIMIZE in JIT as (*MARK) support requires it.

+24. Fixed a very old bug in pcretest that caused errors with restarted DFA
+    matches in certain environments (the workspace was not being correctly
+    retained). Also added to pcre_dfa_exec() a simple plausibility check on
+    some of the workspace data at the beginning of a restart.


+
Version 8.30 04-February-2012
-----------------------------


Modified: code/trunk/doc/pcreapi.3
===================================================================
--- code/trunk/doc/pcreapi.3    2012-04-14 16:16:58 UTC (rev 959)
+++ code/trunk/doc/pcreapi.3    2012-04-19 17:30:38 UTC (rev 960)
@@ -1,4 +1,4 @@
-.TH PCREAPI 3 "14 April 2012" "PCRE 8.31"
+.TH PCREAPI 3 "19 April 2012" "PCRE 8.31"
 .SH NAME
 PCRE - Perl-compatible regular expressions
 .sp
@@ -2104,19 +2104,19 @@
 .\"
 documentation for more details.
 .sp
-  PCRE_ERROR_BADMODE (-28)
+  PCRE_ERROR_BADMODE        (-28)
 .sp
 This error is given if a pattern that was compiled by the 8-bit library is
 passed to a 16-bit library function, or vice versa.
 .sp
-  PCRE_ERROR_BADENDIANNESS (-29)
+  PCRE_ERROR_BADENDIANNESS  (-29)
 .sp
 This error is given if a pattern that was compiled and saved is reloaded on a
 host with different endianness. The utility function
 \fBpcre_pattern_to_host_byte_order()\fP can be used to convert such a pattern
 so that it runs on the new host.
 .P
-Error numbers -16 to -20 and -22 are not used by \fBpcre_exec()\fP.
+Error numbers -16 to -20, -22, and -30 are not used by \fBpcre_exec()\fP.
 .
 .
 .\" HTML <a name="badutf8reasons"></a>
@@ -2634,6 +2634,13 @@
 recursively, using private vectors for \fIovector\fP and \fIworkspace\fP. This
 error is given if the output vector is not large enough. This should be
 extremely rare, as a vector of size 1000 is used.
+.sp
+  PCRE_ERROR_DFA_BADRESTART (-30)
+.sp
+When \fBpcre_dfa_exec()\fP is called with the \fBPCRE_DFA_RESTART\fP option,
+some plausibility checks are made on the contents of the workspace, which 
+should contain data about the previous partial match. If any of these checks 
+fail, this error is given.   
 .
 .
 .SH "SEE ALSO"
@@ -2658,6 +2665,6 @@
 .rs
 .sp
 .nf
-Last updated: 14 April 2012
+Last updated: 19 April 2012
 Copyright (c) 1997-2012 University of Cambridge.
 .fi


Modified: code/trunk/pcre.h.in
===================================================================
--- code/trunk/pcre.h.in    2012-04-14 16:16:58 UTC (rev 959)
+++ code/trunk/pcre.h.in    2012-04-19 17:30:38 UTC (rev 960)
@@ -179,6 +179,7 @@
 #define PCRE_ERROR_JIT_STACKLIMIT  (-27)
 #define PCRE_ERROR_BADMODE         (-28)
 #define PCRE_ERROR_BADENDIANNESS   (-29)
+#define PCRE_ERROR_DFA_BADRESTART  (-30)


/* Specific error codes for UTF-8 validity checks */


Modified: code/trunk/pcre_dfa_exec.c
===================================================================
--- code/trunk/pcre_dfa_exec.c    2012-04-14 16:16:58 UTC (rev 959)
+++ code/trunk/pcre_dfa_exec.c    2012-04-19 17:30:38 UTC (rev 960)
@@ -41,7 +41,7 @@


/* This module contains the external function pcre_dfa_exec(), which is an
alternative matching function that uses a sort of DFA algorithm (not a true
-FSM). This is NOT Perl- compatible, but it has advantages in certain
+FSM). This is NOT Perl-compatible, but it has advantages in certain
applications. */


@@ -282,7 +282,7 @@
   int data;                       /* Some use extra data */
 } stateblock;


-#define INTS_PER_STATEBLOCK (sizeof(stateblock)/sizeof(int))
+#define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))


#ifdef PCRE_DEBUG
@@ -3162,11 +3162,28 @@
if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;

-/* We need to find the pointer to any study data before we test for byte
-flipping, so we scan the extra_data block first. This may set two fields in the
-match block, so we must initialize them beforehand. However, the other fields
-in the match block must not be set until after the byte flipping. */
+/* Check that the first field in the block is the magic number. If it is not,
+return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
+REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
+means that the pattern is likely compiled with different endianness. */

+if (re->magic_number != MAGIC_NUMBER)
+  return re->magic_number == REVERSED_MAGIC_NUMBER?
+    PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
+if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
+
+/* If restarting after a partial match, do some sanity checks on the contents 
+of the workspace. */
+
+if ((options & PCRE_DFA_RESTART) != 0)
+  {
+  if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 || 
+    workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
+      return PCRE_ERROR_DFA_BADRESTART; 
+  } 
+
+/* Set up study, callout, and table data */
+
 md->tables = re->tables;
 md->callout_data = NULL;


@@ -3184,16 +3201,6 @@
     md->tables = extra_data->tables;
   }


-/* Check that the first field in the block is the magic number. If it is not,
-return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
-REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
-means that the pattern is likely compiled with different endianness. */
-
-if (re->magic_number != MAGIC_NUMBER)
-  return re->magic_number == REVERSED_MAGIC_NUMBER?
-    PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
-if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
-
 /* Set some local values */


current_subject = (const pcre_uchar *)subject + start_offset;

Modified: code/trunk/pcretest.c
===================================================================
--- code/trunk/pcretest.c    2012-04-14 16:16:58 UTC (rev 959)
+++ code/trunk/pcretest.c    2012-04-19 17:30:38 UTC (rev 960)
@@ -46,7 +46,6 @@
 however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
 only supported library functions. */


-
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
@@ -60,8 +59,8 @@
#include <errno.h>

/* Both libreadline and libedit are optionally supported. The user-supplied
-original patch uses readline/readline.h for libedit, but in at least one system
-it is installed as editline/readline.h, so the configuration code now looks for
+original patch uses readline/readline.h for libedit, but in at least one system
+it is installed as editline/readline.h, so the configuration code now looks for
that first, falling back to readline/readline.h. */

#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
@@ -611,6 +610,10 @@
#endif
#endif

+#if !defined NODFA
+#define DFA_WS_DIMENSION 1000
+#endif
+
/* This is the default loop count for timing. */

#define LOOPREPEAT 500000
@@ -625,7 +628,7 @@
static int callout_fail_id;
static int debug_lengths;
static int first_callout;
-static int jit_was_used;
+static int jit_was_used;
static int locale_set = 0;
static int show_malloc;
static int use_utf;
@@ -690,16 +693,16 @@
/* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */

 static int jit_study_bits[] =
-  { 
-  PCRE_STUDY_JIT_COMPILE, 
+  {
+  PCRE_STUDY_JIT_COMPILE,
   PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
   PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
-  PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE, 
-  PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE, 
-  PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE, 
-  PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + 
-    PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE 
-};   
+  PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
+  PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
+  PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
+  PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
+    PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
+};


/* Textual explanations for runtime error codes */

@@ -732,7 +735,9 @@
NULL, /* SHORTUTF8/16 is handled specially */
"nested recursion at the same subject position",
"JIT stack limit reached",
- "pattern compiled in wrong mode: 8-bit/16-bit error"
+ "pattern compiled in wrong mode: 8-bit/16-bit error",
+ "pattern compiled with other endianness",
+ "invalid data in workspace for DFA restart"
};


@@ -2160,10 +2165,10 @@
 printf("  -S <n>   set stack size to <n> megabytes\n");
 printf("  -s       force each pattern to be studied at basic level\n"
        "  -s+      force each pattern to be studied, using JIT if available\n"
-       "  -s++     ditto, verifying when JIT was actually used\n" 
+       "  -s++     ditto, verifying when JIT was actually used\n"
        "  -s+n     force each pattern to be studied, using JIT if available,\n"
-       "             where 1 <= n <= 7 selects JIT options\n"  
-       "  -s++n    ditto, verifying when JIT was actually used\n" 
+       "             where 1 <= n <= 7 selects JIT options\n"
+       "  -s++n    ditto, verifying when JIT was actually used\n"
        "  -t       time compilation and execution\n");
 printf("  -t <n>   time compilation and execution, repeating <n> times\n");
 printf("  -tm      time execution (matching) only\n");
@@ -2198,9 +2203,6 @@
 int size_offsets = 45;
 int size_offsets_max;
 int *offsets = NULL;
-#if !defined NOPOSIX
-int posix = 0;
-#endif
 int debug = 0;
 int done = 0;
 int all_use_dfa = 0;
@@ -2208,6 +2210,13 @@
 int yield = 0;
 int stack_size;


+#if !defined NOPOSIX
+int posix = 0;
+#endif
+#if !defined NODFA
+int *dfa_workspace = NULL;
+#endif
+
pcre_jit_stack *jit_stack = NULL;

/* These vectors store, end-to-end, a list of zero-terminated captured
@@ -2266,20 +2275,20 @@
while (argc > 1 && argv[op][0] == '-')
{
pcre_uint8 *endptr;
- char *arg = argv[op];
+ char *arg = argv[op];

   if (strcmp(arg, "-m") == 0) showstore = 1;
   else if (strcmp(arg, "-s") == 0) force_study = 0;
-   
+
   else if (strncmp(arg, "-s+", 3) == 0)
     {
     arg += 3;
     if (*arg == '+') { arg++; verify_jit = TRUE; }
     force_study = 1;
     if (*arg == 0)
-      force_study_options = jit_study_bits[6]; 
+      force_study_options = jit_study_bits[6];
     else if (*arg >= '1' && *arg <= '7')
-      force_study_options = jit_study_bits[*arg - '1']; 
+      force_study_options = jit_study_bits[*arg - '1'];
     else goto BAD_ARG;
     }
   else if (strcmp(arg, "-16") == 0)
@@ -2493,7 +2502,7 @@
     }
   else
     {
-    BAD_ARG: 
+    BAD_ARG:
     printf("** Unknown or malformed option %s\n", arg);
     usage();
     yield = 1;
@@ -2591,6 +2600,10 @@
   int do_showcaprest = 0;
   int do_flip = 0;
   int erroroffset, len, delimiter, poffset;
+  
+#if !defined NODFA 
+  int dfa_matched = 0;
+#endif   


   use_utf = 0;
   debug_lengths = 1;
@@ -2813,12 +2826,12 @@
           if (*(++pp) == '+')
             {
             verify_jit = TRUE;
-            pp++;  
-            }  
+            pp++;
+            }
           if (*pp >= '1' && *pp <= '7')
             study_options |= jit_study_bits[*pp++ - '1'];
-          else 
-            study_options |= jit_study_bits[6];     
+          else
+            study_options |= jit_study_bits[6];
           }
         }
       else
@@ -3126,7 +3139,7 @@
           new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
           new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
           new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
-          new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind) 
+          new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
           != 0)
         goto SKIP_DATA;


@@ -3265,10 +3278,10 @@
           fprintf(outfile, "%s\n", caseless);
           }
         }
-        
-      if (maxlookbehind > 0) 
-        fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind); 


+      if (maxlookbehind > 0)
+        fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
+
       /* Don't output study size; at present it is in any case a fixed
       value, but it varies, depending on the computer architecture, and
       so messes up the test suite. (And with the /F option, it might be
@@ -3908,7 +3921,7 @@
       }
 #endif


-    /* Ensure that there is a JIT callback if we want to verify that JIT was 
+    /* Ensure that there is a JIT callback if we want to verify that JIT was
     actually used. If jit_stack == NULL, no stack has yet been assigned. */


     if (verify_jit && jit_stack == NULL && extra != NULL)
@@ -3917,7 +3930,7 @@
     for (;; gmatched++)    /* Loop for /g or /G */
       {
       markptr = NULL;
-      jit_was_used = FALSE; 
+      jit_was_used = FALSE;


       if (timeitm > 0)
         {
@@ -3928,12 +3941,18 @@
 #if !defined NODFA
         if (all_use_dfa || use_dfa)
           {
-          int workspace[1000];
+          if ((options & PCRE_DFA_RESTART) != 0)
+            {
+            fprintf(outfile, "Timing DFA restarts is not supported\n");
+            break;
+            }    
+          if (dfa_workspace == NULL) 
+            dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));  
           for (i = 0; i < timeitm; i++)
             {
             PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
-              (options | g_notempty), use_offsets, use_size_offsets, workspace,
-              (sizeof(workspace)/sizeof(int)));
+              (options | g_notempty), use_offsets, use_size_offsets,
+              dfa_workspace, DFA_WS_DIMENSION);
             }
           }
         else
@@ -3999,10 +4018,13 @@
 #if !defined NODFA
       else if (all_use_dfa || use_dfa)
         {
-        int workspace[1000];
+        if (dfa_workspace == NULL) 
+          dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));  
+        if (dfa_matched++ == 0)  
+          dfa_workspace[0] = -1;  /* To catch bad restart */
         PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
-          (options | g_notempty), use_offsets, use_size_offsets, workspace,
-          (sizeof(workspace)/sizeof(int)));
+          (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
+          DFA_WS_DIMENSION);
         if (count == 0)
           {
           fprintf(outfile, "Matched, but too many subsidiary matches\n");
@@ -4021,7 +4043,7 @@
           count = use_size_offsets/3;
           }
         }
-        
+
       /* Matched */


       if (count >= 0)
@@ -4079,7 +4101,7 @@
             fprintf(outfile, "%2d: ", i/2);
             PCHARSV(bptr, use_offsets[i],
               use_offsets[i+1] - use_offsets[i], outfile);
-            if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");  
+            if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
             fprintf(outfile, "\n");
             if (do_showcaprest || (i == 0 && do_showrest))
               {
@@ -4246,7 +4268,7 @@
           PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
             outfile);
           }
-        if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");  
+        if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
         fprintf(outfile, "\n");
         break;  /* Out of the /g loop */
         }
@@ -4333,7 +4355,7 @@
                 fprintf(outfile, "No match, mark = ");
                 PCHARSV(markptr, 0, -1, outfile);
                 }
-              if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");  
+              if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
               putc('\n', outfile);
               }
             break;


Modified: code/trunk/testdata/testinput8
===================================================================
--- code/trunk/testdata/testinput8    2012-04-14 16:16:58 UTC (rev 959)
+++ code/trunk/testdata/testinput8    2012-04-19 17:30:38 UTC (rev 960)
@@ -4782,4 +4782,9 @@
     \r\r\r\P
     \r\r\r\P\P     


+/-- Test simple validity check for restarts --/
+
+/abcdef/
+ abc\R
+
/-- End of testinput8 --/

Modified: code/trunk/testdata/testoutput8
===================================================================
--- code/trunk/testdata/testoutput8    2012-04-14 16:16:58 UTC (rev 959)
+++ code/trunk/testdata/testoutput8    2012-04-19 17:30:38 UTC (rev 960)
@@ -7990,4 +7990,10 @@
     \r\r\r\P\P     
 Partial match: \x0d\x0d\x0d


+/-- Test simple validity check for restarts --/
+
+/abcdef/
+ abc\R
+Error -30 (invalid data in workspace for DFA restart)
+
/-- End of testinput8 --/