Revision: 960
http://vcs.pcre.org/viewvc?view=rev&revision=960
Author: ph10
Date: 2012-04-19 18:30:38 +0100 (Thu, 19 Apr 2012)
Log Message:
-----------
Fix long-standing DFA testing restart bug in pcretest, and add some
plausibility checks when restarting in pcre_dfa_exec().
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/doc/pcreapi.3
code/trunk/pcre.h.in
code/trunk/pcre_dfa_exec.c
code/trunk/pcretest.c
code/trunk/testdata/testinput8
code/trunk/testdata/testoutput8
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2012-04-14 16:16:58 UTC (rev 959)
+++ code/trunk/ChangeLog 2012-04-19 17:30:38 UTC (rev 960)
@@ -82,7 +82,12 @@
23. Support PCRE_NO_START_OPTIMIZE in JIT as (*MARK) support requires it.
+24. Fixed a very old bug in pcretest that caused errors with restarted DFA
+ matches in certain environments (the workspace was not being correctly
+ retained). Also added to pcre_dfa_exec() a simple plausibility check on
+ some of the workspace data at the beginning of a restart.
+
Version 8.30 04-February-2012
-----------------------------
Modified: code/trunk/doc/pcreapi.3
===================================================================
--- code/trunk/doc/pcreapi.3 2012-04-14 16:16:58 UTC (rev 959)
+++ code/trunk/doc/pcreapi.3 2012-04-19 17:30:38 UTC (rev 960)
@@ -1,4 +1,4 @@
-.TH PCREAPI 3 "14 April 2012" "PCRE 8.31"
+.TH PCREAPI 3 "19 April 2012" "PCRE 8.31"
.SH NAME
PCRE - Perl-compatible regular expressions
.sp
@@ -2104,19 +2104,19 @@
.\"
documentation for more details.
.sp
- PCRE_ERROR_BADMODE (-28)
+ PCRE_ERROR_BADMODE (-28)
.sp
This error is given if a pattern that was compiled by the 8-bit library is
passed to a 16-bit library function, or vice versa.
.sp
- PCRE_ERROR_BADENDIANNESS (-29)
+ PCRE_ERROR_BADENDIANNESS (-29)
.sp
This error is given if a pattern that was compiled and saved is reloaded on a
host with different endianness. The utility function
\fBpcre_pattern_to_host_byte_order()\fP can be used to convert such a pattern
so that it runs on the new host.
.P
-Error numbers -16 to -20 and -22 are not used by \fBpcre_exec()\fP.
+Error numbers -16 to -20, -22, and -30 are not used by \fBpcre_exec()\fP.
.
.
.\" HTML <a name="badutf8reasons"></a>
@@ -2634,6 +2634,13 @@
recursively, using private vectors for \fIovector\fP and \fIworkspace\fP. This
error is given if the output vector is not large enough. This should be
extremely rare, as a vector of size 1000 is used.
+.sp
+ PCRE_ERROR_DFA_BADRESTART (-30)
+.sp
+When \fBpcre_dfa_exec()\fP is called with the \fBPCRE_DFA_RESTART\fP option,
+some plausibility checks are made on the contents of the workspace, which
+should contain data about the previous partial match. If any of these checks
+fail, this error is given.
.
.
.SH "SEE ALSO"
@@ -2658,6 +2665,6 @@
.rs
.sp
.nf
-Last updated: 14 April 2012
+Last updated: 19 April 2012
Copyright (c) 1997-2012 University of Cambridge.
.fi
Modified: code/trunk/pcre.h.in
===================================================================
--- code/trunk/pcre.h.in 2012-04-14 16:16:58 UTC (rev 959)
+++ code/trunk/pcre.h.in 2012-04-19 17:30:38 UTC (rev 960)
@@ -179,6 +179,7 @@
#define PCRE_ERROR_JIT_STACKLIMIT (-27)
#define PCRE_ERROR_BADMODE (-28)
#define PCRE_ERROR_BADENDIANNESS (-29)
+#define PCRE_ERROR_DFA_BADRESTART (-30)
/* Specific error codes for UTF-8 validity checks */
Modified: code/trunk/pcre_dfa_exec.c
===================================================================
--- code/trunk/pcre_dfa_exec.c 2012-04-14 16:16:58 UTC (rev 959)
+++ code/trunk/pcre_dfa_exec.c 2012-04-19 17:30:38 UTC (rev 960)
@@ -41,7 +41,7 @@
/* This module contains the external function pcre_dfa_exec(), which is an
alternative matching function that uses a sort of DFA algorithm (not a true
-FSM). This is NOT Perl- compatible, but it has advantages in certain
+FSM). This is NOT Perl-compatible, but it has advantages in certain
applications. */
@@ -282,7 +282,7 @@
int data; /* Some use extra data */
} stateblock;
-#define INTS_PER_STATEBLOCK (sizeof(stateblock)/sizeof(int))
+#define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))
#ifdef PCRE_DEBUG
@@ -3162,11 +3162,28 @@
if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
-/* We need to find the pointer to any study data before we test for byte
-flipping, so we scan the extra_data block first. This may set two fields in the
-match block, so we must initialize them beforehand. However, the other fields
-in the match block must not be set until after the byte flipping. */
+/* Check that the first field in the block is the magic number. If it is not,
+return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
+REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
+means that the pattern is likely compiled with different endianness. */
+if (re->magic_number != MAGIC_NUMBER)
+ return re->magic_number == REVERSED_MAGIC_NUMBER?
+ PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
+if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
+
+/* If restarting after a partial match, do some sanity checks on the contents
+of the workspace. */
+
+if ((options & PCRE_DFA_RESTART) != 0)
+ {
+ if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
+ workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
+ return PCRE_ERROR_DFA_BADRESTART;
+ }
+
+/* Set up study, callout, and table data */
+
md->tables = re->tables;
md->callout_data = NULL;
@@ -3184,16 +3201,6 @@
md->tables = extra_data->tables;
}
-/* Check that the first field in the block is the magic number. If it is not,
-return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
-REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
-means that the pattern is likely compiled with different endianness. */
-
-if (re->magic_number != MAGIC_NUMBER)
- return re->magic_number == REVERSED_MAGIC_NUMBER?
- PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
-if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
-
/* Set some local values */
current_subject = (const pcre_uchar *)subject + start_offset;
Modified: code/trunk/pcretest.c
===================================================================
--- code/trunk/pcretest.c 2012-04-14 16:16:58 UTC (rev 959)
+++ code/trunk/pcretest.c 2012-04-19 17:30:38 UTC (rev 960)
@@ -46,7 +46,6 @@
however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
only supported library functions. */
-
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
@@ -60,8 +59,8 @@
#include <errno.h>
/* Both libreadline and libedit are optionally supported. The user-supplied
-original patch uses readline/readline.h for libedit, but in at least one system
-it is installed as editline/readline.h, so the configuration code now looks for
+original patch uses readline/readline.h for libedit, but in at least one system
+it is installed as editline/readline.h, so the configuration code now looks for
that first, falling back to readline/readline.h. */
#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
@@ -611,6 +610,10 @@
#endif
#endif
+#if !defined NODFA
+#define DFA_WS_DIMENSION 1000
+#endif
+
/* This is the default loop count for timing. */
#define LOOPREPEAT 500000
@@ -625,7 +628,7 @@
static int callout_fail_id;
static int debug_lengths;
static int first_callout;
-static int jit_was_used;
+static int jit_was_used;
static int locale_set = 0;
static int show_malloc;
static int use_utf;
@@ -690,16 +693,16 @@
/* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
static int jit_study_bits[] =
- {
- PCRE_STUDY_JIT_COMPILE,
+ {
+ PCRE_STUDY_JIT_COMPILE,
PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
- PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
- PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
- PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
- PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
- PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
-};
+ PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
+ PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
+ PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
+ PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
+ PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
+};
/* Textual explanations for runtime error codes */
@@ -732,7 +735,9 @@
NULL, /* SHORTUTF8/16 is handled specially */
"nested recursion at the same subject position",
"JIT stack limit reached",
- "pattern compiled in wrong mode: 8-bit/16-bit error"
+ "pattern compiled in wrong mode: 8-bit/16-bit error",
+ "pattern compiled with other endianness",
+ "invalid data in workspace for DFA restart"
};
@@ -2160,10 +2165,10 @@
printf(" -S <n> set stack size to <n> megabytes\n");
printf(" -s force each pattern to be studied at basic level\n"
" -s+ force each pattern to be studied, using JIT if available\n"
- " -s++ ditto, verifying when JIT was actually used\n"
+ " -s++ ditto, verifying when JIT was actually used\n"
" -s+n force each pattern to be studied, using JIT if available,\n"
- " where 1 <= n <= 7 selects JIT options\n"
- " -s++n ditto, verifying when JIT was actually used\n"
+ " where 1 <= n <= 7 selects JIT options\n"
+ " -s++n ditto, verifying when JIT was actually used\n"
" -t time compilation and execution\n");
printf(" -t <n> time compilation and execution, repeating <n> times\n");
printf(" -tm time execution (matching) only\n");
@@ -2198,9 +2203,6 @@
int size_offsets = 45;
int size_offsets_max;
int *offsets = NULL;
-#if !defined NOPOSIX
-int posix = 0;
-#endif
int debug = 0;
int done = 0;
int all_use_dfa = 0;
@@ -2208,6 +2210,13 @@
int yield = 0;
int stack_size;
+#if !defined NOPOSIX
+int posix = 0;
+#endif
+#if !defined NODFA
+int *dfa_workspace = NULL;
+#endif
+
pcre_jit_stack *jit_stack = NULL;
/* These vectors store, end-to-end, a list of zero-terminated captured
@@ -2266,20 +2275,20 @@
while (argc > 1 && argv[op][0] == '-')
{
pcre_uint8 *endptr;
- char *arg = argv[op];
+ char *arg = argv[op];
if (strcmp(arg, "-m") == 0) showstore = 1;
else if (strcmp(arg, "-s") == 0) force_study = 0;
-
+
else if (strncmp(arg, "-s+", 3) == 0)
{
arg += 3;
if (*arg == '+') { arg++; verify_jit = TRUE; }
force_study = 1;
if (*arg == 0)
- force_study_options = jit_study_bits[6];
+ force_study_options = jit_study_bits[6];
else if (*arg >= '1' && *arg <= '7')
- force_study_options = jit_study_bits[*arg - '1'];
+ force_study_options = jit_study_bits[*arg - '1'];
else goto BAD_ARG;
}
else if (strcmp(arg, "-16") == 0)
@@ -2493,7 +2502,7 @@
}
else
{
- BAD_ARG:
+ BAD_ARG:
printf("** Unknown or malformed option %s\n", arg);
usage();
yield = 1;
@@ -2591,6 +2600,10 @@
int do_showcaprest = 0;
int do_flip = 0;
int erroroffset, len, delimiter, poffset;
+
+#if !defined NODFA
+ int dfa_matched = 0;
+#endif
use_utf = 0;
debug_lengths = 1;
@@ -2813,12 +2826,12 @@
if (*(++pp) == '+')
{
verify_jit = TRUE;
- pp++;
- }
+ pp++;
+ }
if (*pp >= '1' && *pp <= '7')
study_options |= jit_study_bits[*pp++ - '1'];
- else
- study_options |= jit_study_bits[6];
+ else
+ study_options |= jit_study_bits[6];
}
}
else
@@ -3126,7 +3139,7 @@
new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
- new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
+ new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
!= 0)
goto SKIP_DATA;
@@ -3265,10 +3278,10 @@
fprintf(outfile, "%s\n", caseless);
}
}
-
- if (maxlookbehind > 0)
- fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
+ if (maxlookbehind > 0)
+ fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
+
/* Don't output study size; at present it is in any case a fixed
value, but it varies, depending on the computer architecture, and
so messes up the test suite. (And with the /F option, it might be
@@ -3908,7 +3921,7 @@
}
#endif
- /* Ensure that there is a JIT callback if we want to verify that JIT was
+ /* Ensure that there is a JIT callback if we want to verify that JIT was
actually used. If jit_stack == NULL, no stack has yet been assigned. */
if (verify_jit && jit_stack == NULL && extra != NULL)
@@ -3917,7 +3930,7 @@
for (;; gmatched++) /* Loop for /g or /G */
{
markptr = NULL;
- jit_was_used = FALSE;
+ jit_was_used = FALSE;
if (timeitm > 0)
{
@@ -3928,12 +3941,18 @@
#if !defined NODFA
if (all_use_dfa || use_dfa)
{
- int workspace[1000];
+ if ((options & PCRE_DFA_RESTART) != 0)
+ {
+ fprintf(outfile, "Timing DFA restarts is not supported\n");
+ break;
+ }
+ if (dfa_workspace == NULL)
+ dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
for (i = 0; i < timeitm; i++)
{
PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
- (options | g_notempty), use_offsets, use_size_offsets, workspace,
- (sizeof(workspace)/sizeof(int)));
+ (options | g_notempty), use_offsets, use_size_offsets,
+ dfa_workspace, DFA_WS_DIMENSION);
}
}
else
@@ -3999,10 +4018,13 @@
#if !defined NODFA
else if (all_use_dfa || use_dfa)
{
- int workspace[1000];
+ if (dfa_workspace == NULL)
+ dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
+ if (dfa_matched++ == 0)
+ dfa_workspace[0] = -1; /* To catch bad restart */
PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
- (options | g_notempty), use_offsets, use_size_offsets, workspace,
- (sizeof(workspace)/sizeof(int)));
+ (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
+ DFA_WS_DIMENSION);
if (count == 0)
{
fprintf(outfile, "Matched, but too many subsidiary matches\n");
@@ -4021,7 +4043,7 @@
count = use_size_offsets/3;
}
}
-
+
/* Matched */
if (count >= 0)
@@ -4079,7 +4101,7 @@
fprintf(outfile, "%2d: ", i/2);
PCHARSV(bptr, use_offsets[i],
use_offsets[i+1] - use_offsets[i], outfile);
- if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
+ if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
fprintf(outfile, "\n");
if (do_showcaprest || (i == 0 && do_showrest))
{
@@ -4246,7 +4268,7 @@
PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
outfile);
}
- if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
+ if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
fprintf(outfile, "\n");
break; /* Out of the /g loop */
}
@@ -4333,7 +4355,7 @@
fprintf(outfile, "No match, mark = ");
PCHARSV(markptr, 0, -1, outfile);
}
- if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
+ if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
putc('\n', outfile);
}
break;
Modified: code/trunk/testdata/testinput8
===================================================================
--- code/trunk/testdata/testinput8 2012-04-14 16:16:58 UTC (rev 959)
+++ code/trunk/testdata/testinput8 2012-04-19 17:30:38 UTC (rev 960)
@@ -4782,4 +4782,9 @@
\r\r\r\P
\r\r\r\P\P
+/-- Test simple validity check for restarts --/
+
+/abcdef/
+ abc\R
+
/-- End of testinput8 --/
Modified: code/trunk/testdata/testoutput8
===================================================================
--- code/trunk/testdata/testoutput8 2012-04-14 16:16:58 UTC (rev 959)
+++ code/trunk/testdata/testoutput8 2012-04-19 17:30:38 UTC (rev 960)
@@ -7990,4 +7990,10 @@
\r\r\r\P\P
Partial match: \x0d\x0d\x0d
+/-- Test simple validity check for restarts --/
+
+/abcdef/
+ abc\R
+Error -30 (invalid data in workspace for DFA restart)
+
/-- End of testinput8 --/