Revision: 1313
http://vcs.pcre.org/viewvc?view=rev&revision=1313
Author: ph10
Date: 2013-04-24 13:07:09 +0100 (Wed, 24 Apr 2013)
Log Message:
-----------
Code (but not yet documentation) for *LIMIT_MATCH and *LIMIT_RECURSION.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/doc/pcreapi.3
code/trunk/pcre.h.in
code/trunk/pcre_byte_order.c
code/trunk/pcre_compile.c
code/trunk/pcre_exec.c
code/trunk/pcre_fullinfo.c
code/trunk/pcre_internal.h
code/trunk/pcretest.c
code/trunk/testdata/saved16
code/trunk/testdata/saved16BE-1
code/trunk/testdata/saved16BE-2
code/trunk/testdata/saved16LE-1
code/trunk/testdata/saved16LE-2
code/trunk/testdata/saved32
code/trunk/testdata/saved32BE-1
code/trunk/testdata/saved32BE-2
code/trunk/testdata/saved32LE-1
code/trunk/testdata/saved32LE-2
code/trunk/testdata/saved8
code/trunk/testdata/testinput2
code/trunk/testdata/testinput21
code/trunk/testdata/testinput22
code/trunk/testdata/testoutput18-16
code/trunk/testdata/testoutput18-32
code/trunk/testdata/testoutput2
code/trunk/testdata/testoutput21-16
code/trunk/testdata/testoutput21-32
code/trunk/testdata/testoutput22-16
code/trunk/testdata/testoutput22-32
code/trunk/testdata/testoutput5
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/ChangeLog 2013-04-24 12:07:09 UTC (rev 1313)
@@ -137,6 +137,11 @@
36. In the interpreter, maximizing pattern repetitions for characters and
character types now use tail recursion, which reduces stack usage.
+
+37. The value of the max lookbehind was not correctly preserved if a compiled
+ and saved regex was reloaded on a host of different endianness.
+
+38. Implemented (*LIMIT_MATCH) and (*LIMIT_RECURSION).
Version 8.32 30-November-2012
Modified: code/trunk/doc/pcreapi.3
===================================================================
--- code/trunk/doc/pcreapi.3 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/doc/pcreapi.3 2013-04-24 12:07:09 UTC (rev 1313)
@@ -962,7 +962,7 @@
name/number or by a plain number
58 a numbered reference must not be zero
59 an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)
- 60 (*VERB) not recognized
+ 60 (*VERB) not recognized or malformed
61 number is too big
62 subpattern name expected
63 digit expected after (?+
Modified: code/trunk/pcre.h.in
===================================================================
--- code/trunk/pcre.h.in 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/pcre.h.in 2013-04-24 12:07:09 UTC (rev 1313)
@@ -206,6 +206,7 @@
#define PCRE_ERROR_DFA_BADRESTART (-30)
#define PCRE_ERROR_JIT_BADOPTION (-31)
#define PCRE_ERROR_BADLENGTH (-32)
+#define PCRE_ERROR_UNSET (-33)
/* Specific error codes for UTF-8 validity checks */
@@ -270,10 +271,12 @@
#define PCRE_INFO_JIT 16
#define PCRE_INFO_JITSIZE 17
#define PCRE_INFO_MAXLOOKBEHIND 18
-#define PCRE_INFO_FIRSTCHARACTER 19
-#define PCRE_INFO_FIRSTCHARACTERFLAGS 20
+#define PCRE_INFO_FIRSTCHARACTER 19
+#define PCRE_INFO_FIRSTCHARACTERFLAGS 20
#define PCRE_INFO_REQUIREDCHAR 21
-#define PCRE_INFO_REQUIREDCHARFLAGS 22
+#define PCRE_INFO_REQUIREDCHARFLAGS 22
+#define PCRE_INFO_MATCHLIMIT 23
+#define PCRE_INFO_RECURSIONLIMIT 24
/* Request types for pcre_config(). Do not re-arrange, in order to remain
compatible. */
Modified: code/trunk/pcre_byte_order.c
===================================================================
--- code/trunk/pcre_byte_order.c 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/pcre_byte_order.c 2013-04-24 12:07:09 UTC (rev 1313)
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -126,14 +126,15 @@
}
if (re->magic_number != REVERSED_MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
-if ((swap_uint16(re->flags) & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
+if ((swap_uint32(re->flags) & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
re->magic_number = MAGIC_NUMBER;
re->size = swap_uint32(re->size);
re->options = swap_uint32(re->options);
-re->flags = swap_uint16(re->flags);
-re->top_bracket = swap_uint16(re->top_bracket);
-re->top_backref = swap_uint16(re->top_backref);
+re->flags = swap_uint32(re->flags);
+re->limit_match = swap_uint32(re->limit_match);
+re->limit_recursion = swap_uint32(re->limit_recursion);
+
#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
re->first_char = swap_uint16(re->first_char);
re->req_char = swap_uint16(re->req_char);
@@ -141,15 +142,15 @@
re->first_char = swap_uint32(re->first_char);
re->req_char = swap_uint32(re->req_char);
#endif
+
+re->max_lookbehind = swap_uint16(re->max_lookbehind);
+re->top_bracket = swap_uint16(re->top_bracket);
+re->top_backref = swap_uint16(re->top_backref);
re->name_table_offset = swap_uint16(re->name_table_offset);
re->name_entry_size = swap_uint16(re->name_entry_size);
re->name_count = swap_uint16(re->name_count);
re->ref_count = swap_uint16(re->ref_count);
re->tables = tables;
-#ifdef COMPILE_PCRE32
-re->dummy1 = swap_uint16(re->dummy1);
-re->dummy2 = swap_uint16(re->dummy2);
-#endif
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
{
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/pcre_compile.c 2013-04-24 12:07:09 UTC (rev 1313)
@@ -487,7 +487,7 @@
"a numbered reference must not be zero\0"
"an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
/* 60 */
- "(*VERB) not recognized\0"
+ "(*VERB) not recognized or malformed\0"
"number is too big\0"
"subpattern name expected\0"
"digit expected after (?+\0"
@@ -798,7 +798,7 @@
#ifndef EBCDIC /* ASCII/UTF-8 coding */
/* Not alphanumeric */
else if (c < CHAR_0 || c > CHAR_z) {}
-else if ((i = escapes[c - CHAR_0]) != 0)
+else if ((i = escapes[c - CHAR_0]) != 0)
{ if (i > 0) c = (pcre_uint32)i; else escape = -i; }
#else /* EBCDIC coding */
@@ -1410,11 +1410,11 @@
{
/* Handle specials such as (*SKIP) or (*UTF8) etc. */
- if (ptr[1] == CHAR_ASTERISK)
+ if (ptr[1] == CHAR_ASTERISK)
{
ptr += 2;
while (ptr < cd->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
- }
+ }
/* Handle a normal, unnamed capturing parenthesis. */
@@ -3091,7 +3091,7 @@
if (*ptr == CHAR_BACKSLASH)
{
int temperrorcode = 0;
- escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options,
+ escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options,
FALSE);
if (temperrorcode != 0) return FALSE;
ptr++; /* Point after the escape sequence */
@@ -4275,7 +4275,7 @@
if (c == CHAR_BACKSLASH)
{
- escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options,
+ escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options,
TRUE);
if (*errorcodeptr != 0) goto FAILED;
if (escape == 0) c = ec;
@@ -5725,7 +5725,7 @@
/* ------------------------------------------------------------ */
case CHAR_LEFT_PARENTHESIS:
bravalue = OP_COND; /* Conditional group */
- tempptr = ptr;
+ tempptr = ptr;
/* A condition can be an assertion, a number (referring to a numbered
group), a name (referring to a named group), or 'R', referring to
@@ -5739,26 +5739,26 @@
by digits), and (b) a number could be a name that consists of digits.
In both cases, we look for a name first; if not found, we try the other
cases.
-
- For compatibility with auto-callouts, we allow a callout to be
- specified before a condition that is an assertion. First, check for the
- syntax of a callout; if found, adjust the temporary pointer that is
+
+ For compatibility with auto-callouts, we allow a callout to be
+ specified before a condition that is an assertion. First, check for the
+ syntax of a callout; if found, adjust the temporary pointer that is
used to check for an assertion condition. That's all that is needed! */
-
+
if (ptr[1] == CHAR_QUESTION_MARK && ptr[2] == CHAR_C)
{
for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break;
if (ptr[i] == CHAR_RIGHT_PARENTHESIS)
- tempptr += i + 1;
- }
+ tempptr += i + 1;
+ }
/* For conditions that are assertions, check the syntax, and then exit
the switch. This will take control down to where bracketed groups,
including assertions, are processed. */
- if (tempptr[1] == CHAR_QUESTION_MARK &&
+ if (tempptr[1] == CHAR_QUESTION_MARK &&
(tempptr[2] == CHAR_EQUALS_SIGN ||
- tempptr[2] == CHAR_EXCLAMATION_MARK ||
+ tempptr[2] == CHAR_EXCLAMATION_MARK ||
tempptr[2] == CHAR_LESS_THAN_SIGN))
break;
@@ -6901,7 +6901,7 @@
else
{
- if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
+ if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
cd->max_lookbehind == 0)
cd->max_lookbehind = 1;
#ifdef SUPPORT_UCP
@@ -7766,8 +7766,10 @@
{
REAL_PCRE *re;
int length = 1; /* For final END opcode */
+pcre_int32 firstcharflags, reqcharflags;
pcre_uint32 firstchar, reqchar;
-pcre_int32 firstcharflags, reqcharflags;
+pcre_uint32 limit_match = PCRE_UINT32_MAX;
+pcre_uint32 limit_recursion = PCRE_UINT32_MAX;
int newline;
int errorcode = 0;
int skipatstart = 0;
@@ -7831,19 +7833,16 @@
errorcode = ERR17;
goto PCRE_EARLY_ERROR_RETURN;
}
-
-/* If PCRE_NEVER_UTF is set, remember it. As this option steals a bit that is
-also used for execution options, flatten it just in case. */
-if ((options & PCRE_NEVER_UTF) != 0)
- {
- never_utf = TRUE;
- options &= ~PCRE_NEVER_UTF;
- }
+/* If PCRE_NEVER_UTF is set, remember it. */
+if ((options & PCRE_NEVER_UTF) != 0) never_utf = TRUE;
+
/* Check for global one-time settings at the start of the pattern, and remember
the offset for later. */
+cd->external_flags = 0; /* Initialize here for LIMIT_MATCH/RECURSION */
+
while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
ptr[skipatstart+1] == CHAR_ASTERISK)
{
@@ -7874,6 +7873,44 @@
else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)
{ skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; }
+ else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_MATCH_EQ, 12) == 0)
+ {
+ pcre_uint32 c = 0;
+ int p = skipatstart + 14;
+ while (isdigit(ptr[p]))
+ {
+ if (c > PCRE_UINT32_MAX / 10 - 1) break; /* Integer overflow */
+ c = c*10 + ptr[p++] - CHAR_0;
+ }
+ if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break;
+ if (c < limit_match)
+ {
+ limit_match = c;
+ cd->external_flags |= PCRE_MLSET;
+ }
+ skipatstart = p;
+ continue;
+ }
+
+ else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_RECURSION_EQ, 16) == 0)
+ {
+ pcre_uint32 c = 0;
+ int p = skipatstart + 18;
+ while (isdigit(ptr[p]))
+ {
+ if (c > PCRE_UINT32_MAX / 10 - 1) break; /* Integer overflow check */
+ c = c*10 + ptr[p++] - CHAR_0;
+ }
+ if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break;
+ if (c < limit_recursion)
+ {
+ limit_recursion = c;
+ cd->external_flags |= PCRE_RLSET;
+ }
+ skipatstart = p;
+ continue;
+ }
+
if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0)
{ skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3) == 0)
@@ -7896,14 +7933,14 @@
options = (options & ~(PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) | newbsr;
else break;
}
-
+
/* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
utf = (options & PCRE_UTF8) != 0;
if (utf && never_utf)
{
errorcode = ERR78;
goto PCRE_EARLY_ERROR_RETURN2;
- }
+ }
/* Can't support UTF unless PCRE has been compiled to include the code. The
return of an error code from PRIV(valid_utf)() is a new feature, introduced in
@@ -8026,7 +8063,6 @@
cd->assert_depth = 0;
cd->max_lookbehind = 0;
cd->external_options = options;
-cd->external_flags = 0;
cd->open_caps = NULL;
/* Now do the pre-compile. On error, errorcode will be set non-zero, so we
@@ -8076,6 +8112,8 @@
re->size = (int)size;
re->options = cd->external_options;
re->flags = cd->external_flags;
+re->limit_match = limit_match;
+re->limit_recursion = limit_recursion;
re->first_char = 0;
re->req_char = 0;
re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);
@@ -8085,7 +8123,9 @@
re->tables = (tables == PRIV(default_tables))? NULL : tables;
re->nullpad = NULL;
#ifdef COMPILE_PCRE32
-re->dummy1 = re->dummy2 = 0;
+re->dummy = 0;
+#else
+re->dummy1 = re->dummy2 = re->dummy3 = 0;
#endif
/* The starting points of the name/number translation table and of the code are
Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/pcre_exec.c 2013-04-24 12:07:09 UTC (rev 1313)
@@ -6511,6 +6511,30 @@
&& extra_data->executable_jit != NULL
&& (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
{
+ /* A facility for setting the match limit in the regex was added; this puts
+ a value in the compiled block. (Similarly for recursion limit, but the JIT
+ does not make use of that.) Because the regex is not passed to jit_exec, we
+ fudge up an alternative extra block, because we must not modify the extra
+ block that the user has passed. */
+
+#if defined COMPILE_PCRE8
+ pcre_extra extra_data_copy;
+#elif defined COMPILE_PCRE16
+ pcre16_extra extra_data_copy;
+#elif defined COMPILE_PCRE32
+ pcre32_extra extra_data_copy;
+#endif
+
+ if ((re->flags & PCRE_MLSET) != 0 &&
+ ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0 ||
+ re->limit_match < extra_data->match_limit))
+ {
+ extra_data_copy = *extra_data;
+ extra_data_copy.match_limit = re->limit_match;
+ extra_data_copy.flags |= PCRE_EXTRA_MATCH_LIMIT;
+ extra_data = &extra_data_copy;
+ }
+
rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
start_offset, options, offsets, offsetcount);
@@ -6540,6 +6564,8 @@
tables = re->tables;
+/* The two limit values override the defaults, whatever their value. */
+
if (extra_data != NULL)
{
register unsigned int flags = extra_data->flags;
@@ -6554,6 +6580,15 @@
if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
}
+/* Limits in the regex override only if they are smaller. */
+
+if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit)
+ md->match_limit = re->limit_match;
+
+if ((re->flags & PCRE_RLSET) != 0 &&
+ re->limit_recursion < md->match_limit_recursion)
+ md->match_limit_recursion = re->limit_recursion;
+
/* If the exec call supplied NULL for tables, use the inbuilt ones. This
is a feature that makes it possible to save compiled regex and re-use them
in other programs later. */
Modified: code/trunk/pcre_fullinfo.c
===================================================================
--- code/trunk/pcre_fullinfo.c 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/pcre_fullinfo.c 2013-04-24 12:07:09 UTC (rev 1313)
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -221,7 +221,17 @@
case PCRE_INFO_MAXLOOKBEHIND:
*((int *)where) = re->max_lookbehind;
break;
+
+ case PCRE_INFO_MATCHLIMIT:
+ if ((re->flags & PCRE_MLSET) == 0) return PCRE_ERROR_UNSET;
+ *((unsigned long int *)where) = re->limit_match;
+ break;
+ case PCRE_INFO_RECURSIONLIMIT:
+ if ((re->flags & PCRE_RLSET) == 0) return PCRE_ERROR_UNSET;
+ *((unsigned long int *)where) = re->limit_recursion;
+ break;
+
default: return PCRE_ERROR_BADOPTION;
}
Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/pcre_internal.h 2013-04-24 12:07:09 UTC (rev 1313)
@@ -194,23 +194,31 @@
typedef unsigned char pcre_uint8;
#if USHRT_MAX == 65535
- typedef unsigned short pcre_uint16;
- typedef short pcre_int16;
+typedef unsigned short pcre_uint16;
+typedef short pcre_int16;
+#define PCRE_UINT16_MAX USHRT_MAX
+#define PCRE_INT16_MAX SHRT_MAX
#elif UINT_MAX == 65535
- typedef unsigned int pcre_uint16;
- typedef int pcre_int16;
+typedef unsigned int pcre_uint16;
+typedef int pcre_int16;
+#define PCRE_UINT16_MAX UINT_MAX
+#define PCRE_INT16_MAX INT_MAX
#else
-# error Cannot determine a type for 16-bit unsigned integers
+#error Cannot determine a type for 16-bit integers
#endif
-#if UINT_MAX == 4294967295
- typedef unsigned int pcre_uint32;
- typedef int pcre_int32;
-#elif ULONG_MAX == 4294967295
- typedef unsigned long int pcre_uint32;
- typedef long int pcre_int32;
+#if UINT_MAX == 4294967295U
+typedef unsigned int pcre_uint32;
+typedef int pcre_int32;
+#define PCRE_UINT32_MAX UINT_MAX
+#define PCRE_INT32_MAX INT_MAX
+#elif ULONG_MAX == 4294967295UL
+typedef unsigned long int pcre_uint32;
+typedef long int pcre_int32;
+#define PCRE_UINT32_MAX ULONG_MAX
+#define PCRE_INT32_MAX LONG_MAX
#else
-# error Cannot determine a type for 32-bit unsigned integers
+#error Cannot determine a type for 32-bit integers
#endif
/* When checking for integer overflow in pcre_compile(), we need to handle
@@ -1121,23 +1129,26 @@
/* Private flags containing information about the compiled regex. They used to
-live at the top end of the options word, but that got almost full, so now they
-are in a 16-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as
-the restrictions on partial matching have been lifted. It remains for backwards
+live at the top end of the options word, but that got almost full, so they were
+moved to a 16-bit flags word - which got almost full, so now they are in a
+32-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as the
+restrictions on partial matching have been lifted. It remains for backwards
compatibility. */
-#define PCRE_MODE8 0x0001 /* compiled in 8 bit mode */
-#define PCRE_MODE16 0x0002 /* compiled in 16 bit mode */
-#define PCRE_MODE32 0x0004 /* compiled in 32 bit mode */
-#define PCRE_FIRSTSET 0x0010 /* first_char is set */
-#define PCRE_FCH_CASELESS 0x0020 /* caseless first char */
-#define PCRE_REQCHSET 0x0040 /* req_byte is set */
-#define PCRE_RCH_CASELESS 0x0080 /* caseless requested char */
-#define PCRE_STARTLINE 0x0100 /* start after \n for multiline */
-#define PCRE_NOPARTIAL 0x0200 /* can't use partial with this regex */
-#define PCRE_JCHANGED 0x0400 /* j option used in regex */
-#define PCRE_HASCRORLF 0x0800 /* explicit \r or \n in pattern */
-#define PCRE_HASTHEN 0x1000 /* pattern contains (*THEN) */
+#define PCRE_MODE8 0x00000001 /* compiled in 8 bit mode */
+#define PCRE_MODE16 0x00000002 /* compiled in 16 bit mode */
+#define PCRE_MODE32 0x00000004 /* compiled in 32 bit mode */
+#define PCRE_FIRSTSET 0x00000010 /* first_char is set */
+#define PCRE_FCH_CASELESS 0x00000020 /* caseless first char */
+#define PCRE_REQCHSET 0x00000040 /* req_byte is set */
+#define PCRE_RCH_CASELESS 0x00000080 /* caseless requested char */
+#define PCRE_STARTLINE 0x00000100 /* start after \n for multiline */
+#define PCRE_NOPARTIAL 0x00000200 /* can't use partial with this regex */
+#define PCRE_JCHANGED 0x00000400 /* j option used in regex */
+#define PCRE_HASCRORLF 0x00000800 /* explicit \r or \n in pattern */
+#define PCRE_HASTHEN 0x00001000 /* pattern contains (*THEN) */
+#define PCRE_MLSET 0x00002000 /* match limit set by regex */
+#define PCRE_RLSET 0x00004000 /* recursion limit set by regex */
#if defined COMPILE_PCRE8
#define PCRE_MODE PCRE_MODE8
@@ -1534,6 +1545,8 @@
#define STRING_UTF_RIGHTPAR "UTF)"
#define STRING_UCP_RIGHTPAR "UCP)"
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
+#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
+#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
#else /* SUPPORT_UTF */
@@ -1795,6 +1808,8 @@
#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
+#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
+#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
#endif /* SUPPORT_UTF */
@@ -2281,48 +2296,49 @@
code vector run on as long as necessary after the end. We store an explicit
offset to the name table so that if a regex is compiled on one host, saved, and
then run on another where the size of pointers is different, all might still
-be well. For the case of compiled-on-4 and run-on-8, we include an extra
-pointer that is always NULL. For future-proofing, a few dummy fields were
-originally included - even though you can never get this planning right - but
-there is only one left now.
+be well.
-NOTE NOTE NOTE:
-Because people can now save and re-use compiled patterns, any additions to this
-structure should be made at the end, and something earlier (e.g. a new
-flag in the options or one of the dummy fields) should indicate that the new
-fields are present. Currently PCRE always sets the dummy fields to zero.
-NOTE NOTE NOTE
-*/
+The size of the structure must be a multiple of 8 bytes. For the case of
+compiled-on-4 and run-on-8, we include an extra pointer that is always NULL so
+that there are an even number of pointers which therefore are a multiple of 8
+bytes.
-#if defined COMPILE_PCRE8
-#define REAL_PCRE real_pcre
-#elif defined COMPILE_PCRE16
-#define REAL_PCRE real_pcre16
-#elif defined COMPILE_PCRE32
-#define REAL_PCRE real_pcre32
-#endif
+It is necessary to fork the struct for the 32 bit library, since it needs to
+use pcre_uint32 for first_char and req_char. We can't put an ifdef inside the
+typedef because pcretest needs access to the struct of the 8-, 16- and 32-bit
+variants.
-/* It is necessary to fork the struct for 32 bit, since it needs to use
- * pcre_uchar for first_char and req_char. Can't put an ifdef inside the
- * typedef since pcretest needs access to the struct of the 8-, 16-
- * and 32-bit variants. */
+*** WARNING ***
+When new fields are added to these structures, remember to adjust the code in
+pcre_byte_order.c that is concerned with swapping the byte order of the fields
+when a compiled regex is reloaded on a host with different endianness.
+*** WARNING ***
+There is also similar byte-flipping code in pcretest.c, which is used for
+testing the byte-flipping features. It must also be kept in step.
+*** WARNING ***
+*/
typedef struct real_pcre8_or_16 {
pcre_uint32 magic_number;
pcre_uint32 size; /* Total that was malloced */
pcre_uint32 options; /* Public options */
- pcre_uint16 flags; /* Private flags */
+ pcre_uint32 flags; /* Private flags */
+ pcre_uint32 limit_match; /* Limit set from regex */
+ pcre_uint32 limit_recursion; /* Limit set from regex */
+ pcre_uint16 first_char; /* Starting character */
+ pcre_uint16 req_char; /* This character must be seen */
pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */
pcre_uint16 top_bracket; /* Highest numbered group */
pcre_uint16 top_backref; /* Highest numbered back reference */
- pcre_uint16 first_char; /* Starting character */
- pcre_uint16 req_char; /* This character must be seen */
pcre_uint16 name_table_offset; /* Offset to name table that follows */
pcre_uint16 name_entry_size; /* Size of any name items */
pcre_uint16 name_count; /* Number of name items */
pcre_uint16 ref_count; /* Reference count */
+ pcre_uint16 dummy1; /* To ensure size is a multiple of 8 */
+ pcre_uint16 dummy2; /* To ensure size is a multiple of 8 */
+ pcre_uint16 dummy3; /* To ensure size is a multiple of 8 */
const pcre_uint8 *tables; /* Pointer to tables or NULL for std */
- const pcre_uint8 *nullpad; /* NULL padding */
+ void *nullpad; /* NULL padding */
} real_pcre8_or_16;
typedef struct real_pcre8_or_16 real_pcre;
@@ -2332,22 +2348,31 @@
pcre_uint32 magic_number;
pcre_uint32 size; /* Total that was malloced */
pcre_uint32 options; /* Public options */
- pcre_uint16 flags; /* Private flags */
+ pcre_uint32 flags; /* Private flags */
+ pcre_uint32 limit_match; /* Limit set from regex */
+ pcre_uint32 limit_recursion; /* Limit set from regex */
+ pcre_uint32 first_char; /* Starting character */
+ pcre_uint32 req_char; /* This character must be seen */
pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */
pcre_uint16 top_bracket; /* Highest numbered group */
pcre_uint16 top_backref; /* Highest numbered back reference */
- pcre_uint32 first_char; /* Starting character */
- pcre_uint32 req_char; /* This character must be seen */
pcre_uint16 name_table_offset; /* Offset to name table that follows */
pcre_uint16 name_entry_size; /* Size of any name items */
pcre_uint16 name_count; /* Number of name items */
pcre_uint16 ref_count; /* Reference count */
- pcre_uint16 dummy1; /* for later expansion */
- pcre_uint16 dummy2; /* for later expansion */
+ pcre_uint16 dummy; /* To ensure size is a multiple of 8 */
const pcre_uint8 *tables; /* Pointer to tables or NULL for std */
- void *nullpad; /* for later expansion */
+ void *nullpad; /* NULL padding */
} real_pcre32;
+#if defined COMPILE_PCRE8
+#define REAL_PCRE real_pcre
+#elif defined COMPILE_PCRE16
+#define REAL_PCRE real_pcre16
+#elif defined COMPILE_PCRE32
+#define REAL_PCRE real_pcre32
+#endif
+
/* Assert that the size of REAL_PCRE is divisible by 8 */
typedef int __assert_real_pcre_size_divisible_8[(sizeof(REAL_PCRE) % 8) == 0 ? 1 : -1];
@@ -2399,14 +2424,14 @@
int names_found; /* Number of entries so far */
int name_entry_size; /* Size of each entry */
int workspace_size; /* Size of workspace */
- unsigned int bracount; /* Count of capturing parens as we compile */
+ unsigned int bracount; /* Count of capturing parens as we compile */
int final_bracount; /* Saved value after first pass */
int max_lookbehind; /* Maximum lookbehind (characters) */
int top_backref; /* Maximum back reference */
unsigned int backref_map; /* Bitmap of low back refs */
int assert_depth; /* Depth of nested assertions */
- int external_options; /* External (initial) options */
- int external_flags; /* External flag bits to be set */
+ pcre_uint32 external_options; /* External (initial) options */
+ pcre_uint32 external_flags; /* External flag bits to be set */
int req_varyopt; /* "After variable item" flag for reqbyte */
BOOL had_accept; /* (*ACCEPT) encountered */
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
Modified: code/trunk/pcretest.c
===================================================================
--- code/trunk/pcretest.c 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/pcretest.c 2013-04-24 12:07:09 UTC (rev 1313)
@@ -2395,7 +2395,7 @@
rc = PCRE_ERROR_BADMODE;
#endif
-if (rc < 0)
+if (rc < 0 && rc != PCRE_ERROR_UNSET)
{
fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
@@ -2471,14 +2471,18 @@
re->magic_number = REVERSED_MAGIC_NUMBER;
re->size = swap_uint32(re->size);
re->options = swap_uint32(re->options);
-re->flags = swap_uint16(re->flags);
+re->flags = swap_uint32(re->flags);
+re->limit_match = swap_uint32(re->limit_match);
+re->limit_recursion = swap_uint32(re->limit_recursion);
+re->first_char = swap_uint16(re->first_char);
+re->req_char = swap_uint16(re->req_char);
+re->max_lookbehind = swap_uint16(re->max_lookbehind);
re->top_bracket = swap_uint16(re->top_bracket);
re->top_backref = swap_uint16(re->top_backref);
-re->first_char = swap_uint16(re->first_char);
-re->req_char = swap_uint16(re->req_char);
re->name_table_offset = swap_uint16(re->name_table_offset);
re->name_entry_size = swap_uint16(re->name_entry_size);
re->name_count = swap_uint16(re->name_count);
+re->ref_count = swap_uint16(re->ref_count);
if (extra != NULL)
{
@@ -2648,14 +2652,18 @@
re->magic_number = REVERSED_MAGIC_NUMBER;
re->size = swap_uint32(re->size);
re->options = swap_uint32(re->options);
-re->flags = swap_uint16(re->flags);
+re->flags = swap_uint32(re->flags);
+re->limit_match = swap_uint32(re->limit_match);
+re->limit_recursion = swap_uint32(re->limit_recursion);
+re->first_char = swap_uint32(re->first_char);
+re->req_char = swap_uint32(re->req_char);
+re->max_lookbehind = swap_uint16(re->max_lookbehind);
re->top_bracket = swap_uint16(re->top_bracket);
re->top_backref = swap_uint16(re->top_backref);
-re->first_char = swap_uint32(re->first_char);
-re->req_char = swap_uint32(re->req_char);
re->name_table_offset = swap_uint16(re->name_table_offset);
re->name_entry_size = swap_uint16(re->name_entry_size);
re->name_count = swap_uint16(re->name_count);
+re->ref_count = swap_uint16(re->ref_count);
if (extra != NULL)
{
@@ -3525,11 +3533,11 @@
PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
if (rc == PCRE_ERROR_BADMODE)
{
- pcre_uint16 flags_in_host_byte_order;
+ pcre_uint32 flags_in_host_byte_order;
if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
else
- flags_in_host_byte_order = swap_uint16(REAL_PCRE_FLAGS(re));
+ flags_in_host_byte_order = swap_uint32(REAL_PCRE_FLAGS(re));
/* Simulate the result of the function call below. */
fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
@@ -4010,6 +4018,7 @@
{
unsigned long int all_options;
pcre_uint32 first_char, need_char;
+ pcre_uint32 match_limit, recursion_limit;
int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
hascrorlf, maxlookbehind;
int nameentrysize, namecount;
@@ -4037,9 +4046,19 @@
(int)size, (int)regex_gotten_store);
fprintf(outfile, "Capturing subpattern count = %d\n", count);
+
if (backrefmax > 0)
fprintf(outfile, "Max back reference = %d\n", backrefmax);
+ if (maxlookbehind > 0)
+ fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
+
+ if (new_info(re, NULL, PCRE_INFO_MATCHLIMIT, &match_limit) == 0)
+ fprintf(outfile, "Match limit = %u\n", match_limit);
+
+ if (new_info(re, NULL, PCRE_INFO_RECURSIONLIMIT, &recursion_limit) == 0)
+ fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
+
if (namecount > 0)
{
fprintf(outfile, "Named capturing subpatterns:\n");
@@ -4073,7 +4092,7 @@
if (do_flip) all_options = swap_uint32(all_options);
if (get_options == 0) fprintf(outfile, "No options\n");
- else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+ else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
@@ -4090,7 +4109,8 @@
((get_options & PCRE_UCP) != 0)? " ucp" : "",
((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
- ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
+ ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "",
+ ((get_options & PCRE_NEVER_UTF) != 0)? " never_utf" : "");
if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
@@ -4164,9 +4184,6 @@
}
}
- if (maxlookbehind > 0)
- fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
-
/* Don't output study size; at present it is in any case a fixed
value, but it varies, depending on the computer architecture, and
so messes up the test suite. (And with the /F option, it might be
Modified: code/trunk/testdata/saved16
===================================================================
(Binary files differ)
Modified: code/trunk/testdata/saved16BE-1
===================================================================
(Binary files differ)
Modified: code/trunk/testdata/saved16BE-2
===================================================================
(Binary files differ)
Modified: code/trunk/testdata/saved16LE-1
===================================================================
(Binary files differ)
Modified: code/trunk/testdata/saved16LE-2
===================================================================
(Binary files differ)
Modified: code/trunk/testdata/saved32
===================================================================
--- code/trunk/testdata/saved32 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/testdata/saved32 2013-04-24 12:07:09 UTC (rev 1313)
@@ -1 +1 @@
-???\????ERCP\???????T???????a???c???????????????????????}???????????a???????b???????c???r???????????
\ No newline at end of file
+???d????ERCPd???????T???????????a???c???????????????????????????}???????????a???????b???????c???r???????????
\ No newline at end of file
Modified: code/trunk/testdata/saved32BE-1
===================================================================
--- code/trunk/testdata/saved32BE-1 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/testdata/saved32BE-1 2013-04-24 12:07:09 UTC (rev 1313)
@@ -1 +1 @@
-???\xEC???,PCRE???\xEC???????????????????????????????????????????????n???a???m???e???????????????o???t???h???e???r???????}???^???????j??????????????????????????????????????????????}???????j???????????????????????????????????d???r???????k\xFF\xFF\xFF\xFF\xFF\xDF\xFF\xFF\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF???c???r???????\x80???????????l??? ???????????P???P???????????????q???????l??????????????????8???8???????????????????????????????\xD8???\xDF\xFF???????u???????l???????????????????????????????????????????????????????????????h???????????????r???^???????,????????????????????????????????????????
\ No newline at end of file
+???\xF4???,PCRE???\xF4???????????????????????????????????????????????????????n???a???m???e???????????????o???t???h???e???r???????}???^???????j??????????????????????????????????????????????}???????j???????????????????????????????????d???r???????k\xFF\xFF\xFF\xFF\xFF\xDF\xFF\xFF\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF???c???r???????\x80???????????l??? ???????????P???P???????????????q???????l??????????????????8???8???????????????????????????????\xD8???\xDF\xFF???????u???????l???????????????????????????????????????????????????????????????h???????????????r???^???????,????????????????????????????????????????
\ No newline at end of file
Modified: code/trunk/testdata/saved32BE-2
===================================================================
--- code/trunk/testdata/saved32BE-2 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/testdata/saved32BE-2 2013-04-24 12:07:09 UTC (rev 1313)
@@ -1 +1 @@
-???\x8C???,PCRE???\x8C???????????????????????????????????????????????c???b???2???????1???????????c???b???r???a???1???????}???F??????!???????l??????????????????????????????????????????????????\xFF\xFF???????h???????????l??????????????\xF1#???????9???????????4???????f???q???????l???????????????????????????????????????????????????\xA7???????4???????r???4??????????????k\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD\xFF\xFF\xFB\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF???r???????r???F???????,????????????????????????????????????????
\ No newline at end of file
+???\x94???,PCRE???\x94???????????????????????????????????????????????????????c???b???2???????1???????????c???b???r???a???1???????}???F??????!???????l??????????????????????????????????????????????????\xFF\xFF???????h???????????l??????????????\xF1#???????9???????????4???????f???q???????l???????????????????????????????????????????????????\xA7???????4???????r???4??????????????k\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD\xFF\xFF\xFB\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF???r???????r???F???????,????????????????????????????????????????
\ No newline at end of file
Modified: code/trunk/testdata/saved32LE-1
===================================================================
--- code/trunk/testdata/saved32LE-1 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/testdata/saved32LE-1 2013-04-24 12:07:09 UTC (rev 1313)
@@ -1 +1 @@
-???\xEC???,ERCP\xEC???????????????????????????????????????????????n???a???m???e???????????????o???t???h???e???r???????}???^???????j??????????????????????????????????????????????}???????j???????????????????????????????????d???r???????k???\xFF\xFF\xFF\xFF\xFF\xDF\xFF\xFF\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFFc???r???????\x80???????????l??? ???????????P???P???????????????q???????l?????????????????????8???8??????????????????????????????\xD8??\xFF\xDF??????u???????l???????????????????????????????????????????????????????????????h???????????????r???^???????,???????????????????????????????????????????
\ No newline at end of file
+???\xF4???,ERCP\xF4???????????????????????????????????????????????????????n???a???m???e???????????????o???t???h???e???r???????}???^???????j??????????????????????????????????????????????}???????j???????????????????????????????????d???r???????k???\xFF\xFF\xFF\xFF\xFF\xDF\xFF\xFF\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFFc???r???????\x80???????????l??? ???????????P???P???????????????q???????l?????????????????????8???8??????????????????????????????\xD8??\xFF\xDF??????u???????l???????????????????????????????????????????????????????????????h???????????????r???^???????,???????????????????????????????????????????
\ No newline at end of file
Modified: code/trunk/testdata/saved32LE-2
===================================================================
--- code/trunk/testdata/saved32LE-2 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/testdata/saved32LE-2 2013-04-24 12:07:09 UTC (rev 1313)
@@ -1 +1 @@
-???\x8C???,ERCP\x8C???????????????????????????????????????????????c???b???2???????1???????????c???b???r???a???1???????}???F??????!???????l???????????????????????????????????????????????????\xFF\xFF??????h???????????l???????????????#\xF1??????9???????????4???????f???q???????l???????????????????????????????????????????????????\xA7???????4???????r???4??????????????k???\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD\xFF\xFF\xFB\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFFr???????r???F???????,???????????????????????????????????????????
\ No newline at end of file
+???\x94???,ERCP\x94???????????????????????????????????????????????????????c???b???2???????1???????????c???b???r???a???1???????}???F??????!???????l???????????????????????????????????????????????????\xFF\xFF??????h???????????l???????????????#\xF1??????9???????????4???????f???q???????l???????????????????????????????????????????????????\xA7???????4???????r???4??????????????k???\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD\xFF\xFF\xFB\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFFr???????r???F???????,???????????????????????????????????????????
\ No newline at end of file
Modified: code/trunk/testdata/saved8
===================================================================
(Binary files differ)
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/testdata/testinput2 2013-04-24 12:07:09 UTC (rev 1313)
@@ -3802,4 +3802,39 @@
/-------------------------/
+/(*LIMIT_MATCH=12bc)abc/
+
+/(*LIMIT_MATCH=4294967290)abc/
+
+/(*LIMIT_RECURSION=4294967280)abc/I
+
+/(a+)*zz/
+ aaaaaaaaaaaaaz
+ aaaaaaaaaaaaaz\q3000
+
+/(a+)*zz/S-
+ aaaaaaaaaaaaaz\Q10
+
+/(*LIMIT_MATCH=3000)(a+)*zz/I
+ aaaaaaaaaaaaaz
+ aaaaaaaaaaaaaz\q60000
+
+/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
+ aaaaaaaaaaaaaz
+
+/(*LIMIT_MATCH=60000)(a+)*zz/I
+ aaaaaaaaaaaaaz
+ aaaaaaaaaaaaaz\q3000
+
+/(*LIMIT_RECURSION=10)(a+)*zz/IS-
+ aaaaaaaaaaaaaz
+ aaaaaaaaaaaaaz\Q1000
+
+/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/IS-
+ aaaaaaaaaaaaaz
+
+/(*LIMIT_RECURSION=1000)(a+)*zz/IS-
+ aaaaaaaaaaaaaz
+ aaaaaaaaaaaaaz\Q10
+
/-- End of testinput2 --/
Modified: code/trunk/testdata/testinput21
===================================================================
--- code/trunk/testdata/testinput21 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/testdata/testinput21 2013-04-24 12:07:09 UTC (rev 1313)
@@ -1,9 +1,15 @@
-/-- Tests for reloading pre-compile patterns. The first one gives an error
-right away. The others require the linke size to be 2. */
+/-- Tests for reloading pre-compiled patterns. The first one gives an error
+right away, and can be any old pattern compiled in 8-bit mode ("abc" is
+typical). The others require the link size to be 2. */x
<!testsaved8
-/-- Generated from: ^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|[^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$ --/
+%-- Generated from: ^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|[^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$
+ In 16-bit mode with options: S>testdata/saved16LE-1
+ FS>testdata/saved16BE-1
+ In 32-bit mode with options: S>testdata/saved32LE-1
+ FS>testdata/saved32BE-1
+--%x
<!testsaved16LE-1
Modified: code/trunk/testdata/testinput22
===================================================================
--- code/trunk/testdata/testinput22 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/testdata/testinput22 2013-04-24 12:07:09 UTC (rev 1313)
@@ -1,6 +1,11 @@
/-- Tests for reloading pre-compile patterns with UTF-16 or UTF-32 support. */
-/-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az]) --/8
+%-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az])
+ In 16-bit mode with options: S8>testdata/saved16LE-1
+ FS8>testdata/saved16BE-1
+ In 32-bit mode with options: S8>testdata/saved32LE-1
+ FS8testdata/saved32BE-1
+--%8x
<!testsaved16LE-2
Modified: code/trunk/testdata/testoutput18-16
===================================================================
--- code/trunk/testdata/testoutput18-16 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/testdata/testoutput18-16 2013-04-24 12:07:09 UTC (rev 1313)
@@ -646,7 +646,7 @@
0: \x{11234}
/(*UTF-32)\x{11234}/
-Failed: (*VERB) not recognized at offset 5
+Failed: (*VERB) not recognized or malformed at offset 5
/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
Capturing subpattern count = 0
@@ -656,7 +656,7 @@
Need char = 'b'
/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
-Failed: (*VERB) not recognized at offset 12
+Failed: (*VERB) not recognized or malformed at offset 12
/\h/SI8
Capturing subpattern count = 0
Modified: code/trunk/testdata/testoutput18-32
===================================================================
--- code/trunk/testdata/testoutput18-32 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/testdata/testoutput18-32 2013-04-24 12:07:09 UTC (rev 1313)
@@ -632,7 +632,7 @@
Error -10 (bad UTF-32 string) offset=0 reason=1
/(*UTF16)\x{11234}/
-Failed: (*VERB) not recognized at offset 5
+Failed: (*VERB) not recognized or malformed at offset 5
/(*UTF)\x{11234}/I
Capturing subpattern count = 0
@@ -643,10 +643,10 @@
0: \x{11234}
/(*UTF-32)\x{11234}/
-Failed: (*VERB) not recognized at offset 5
+Failed: (*VERB) not recognized or malformed at offset 5
/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
-Failed: (*VERB) not recognized at offset 12
+Failed: (*VERB) not recognized or malformed at offset 12
/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
Capturing subpattern count = 0
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/testdata/testoutput2 2013-04-24 12:07:09 UTC (rev 1313)
@@ -448,10 +448,10 @@
/(?<!bar|cattle)foo/I
Capturing subpattern count = 0
+Max lookbehind = 6
No options
First char = 'f'
Need char = 'o'
-Max lookbehind = 6
foo
0: foo
catfoo
@@ -631,10 +631,10 @@
/\Aabc/Im
Capturing subpattern count = 0
+Max lookbehind = 1
Options: anchored multiline
No first char
No need char
-Max lookbehind = 1
/^abc/Im
Capturing subpattern count = 0
@@ -657,19 +657,19 @@
/(?<=foo)[ab]/IS
Capturing subpattern count = 0
+Max lookbehind = 3
No options
No first char
No need char
-Max lookbehind = 3
Subject length lower bound = 1
Starting byte set: a b
/(?<!foo)(alpha|omega)/IS
Capturing subpattern count = 1
+Max lookbehind = 3
No options
No first char
Need char = 'a'
-Max lookbehind = 3
Subject length lower bound = 5
Starting byte set: a o
@@ -683,11 +683,11 @@
/(?<=foo\n)^bar/Im
Capturing subpattern count = 0
+Max lookbehind = 4
Contains explicit CR or LF match
Options: multiline
No first char
Need char = 'r'
-Max lookbehind = 4
foo\nbarbar
0: bar
***Failers
@@ -701,11 +701,11 @@
/^(?<=foo\n)bar/Im
Capturing subpattern count = 0
+Max lookbehind = 4
Contains explicit CR or LF match
Options: multiline
First char at start or follows newline
Need char = 'r'
-Max lookbehind = 4
foo\nbarbar
0: bar
***Failers
@@ -744,10 +744,10 @@
/(?<=bullock|donkey)-cart/I
Capturing subpattern count = 0
+Max lookbehind = 7
No options
First char = '-'
Need char = 't'
-Max lookbehind = 7
the bullock-cart
0: -cart
a donkey-cart race
@@ -761,17 +761,17 @@
/(?<=ab(?i)x|y|z)/I
Capturing subpattern count = 0
+Max lookbehind = 3
No options
No first char
No need char
-Max lookbehind = 3
/(?>.*)(?<=(abcd)|(xyz))/I
Capturing subpattern count = 2
+Max lookbehind = 4
No options
No first char
No need char
-Max lookbehind = 4
alphabetabcd
0: alphabetabcd
1: abcd
@@ -782,10 +782,10 @@
/(?<=ab(?i)x(?-i)y|(?i)z|b)ZZ/I
Capturing subpattern count = 0
+Max lookbehind = 4
No options
First char = 'Z'
Need char = 'Z'
-Max lookbehind = 4
abxyZZ
0: ZZ
abXyZZ
@@ -811,10 +811,10 @@
/(?<!(foo)a)bar/I
Capturing subpattern count = 1
+Max lookbehind = 4
No options
First char = 'b'
Need char = 'r'
-Max lookbehind = 4
bar
0: bar
foobbar
@@ -1197,10 +1197,10 @@
/\Biss\B/I+
Capturing subpattern count = 0
+Max lookbehind = 1
No options
First char = 'i'
Need char = 's'
-Max lookbehind = 1
Mississippi
0: iss
0+ issippi
@@ -1218,20 +1218,20 @@
/\Biss\B/IG+
Capturing subpattern count = 0
+Max lookbehind = 1
No options
First char = 'i'
Need char = 's'
-Max lookbehind = 1
Mississippi
0: iss
0+ issippi
/\Biss\B/Ig+
Capturing subpattern count = 0
+Max lookbehind = 1
No options
First char = 'i'
Need char = 's'
-Max lookbehind = 1
Mississippi
0: iss
0+ issippi
@@ -1244,10 +1244,10 @@
/(?<=[Ms])iss/Ig+
Capturing subpattern count = 0
+Max lookbehind = 1
No options
First char = 'i'
Need char = 's'
-Max lookbehind = 1
Mississippi
0: iss
0+ issippi
@@ -1256,10 +1256,10 @@
/(?<=[Ms])iss/IG+
Capturing subpattern count = 0
+Max lookbehind = 1
No options
First char = 'i'
Need char = 's'
-Max lookbehind = 1
Mississippi
0: iss
0+ issippi
@@ -1437,10 +1437,10 @@
/...(?<=abc)/I
Capturing subpattern count = 0
+Max lookbehind = 3
No options
No first char
No need char
-Max lookbehind = 3
/abc(?!pqr)/I
Capturing subpattern count = 0
@@ -3218,10 +3218,10 @@
End
------------------------------------------------------------------
Capturing subpattern count = 0
+Max lookbehind = 1
No options
First char = '8'
Need char = 'X'
-Max lookbehind = 1
|\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b|IDZ
------------------------------------------------------------------
@@ -3232,10 +3232,10 @@
End
------------------------------------------------------------------
Capturing subpattern count = 0
+Max lookbehind = 1
No options
First char = '$'
Need char = 'X'
-Max lookbehind = 1
/(.*)\d+\1/I
Capturing subpattern count = 1
@@ -3748,10 +3748,10 @@
/(?<=(abc)(?C))xyz/I
Capturing subpattern count = 1
+Max lookbehind = 3
No options
First char = 'x'
Need char = 'z'
-Max lookbehind = 3
abcxyz\C+
Callout 0: last capture = 1
0: <unset>
@@ -5396,19 +5396,19 @@
/\b.*/I
Capturing subpattern count = 0
+Max lookbehind = 1
No options
No first char
No need char
-Max lookbehind = 1
ab cd\>1
0: cd
/\b.*/Is
Capturing subpattern count = 0
+Max lookbehind = 1
Options: dotall
No first char
No need char
-Max lookbehind = 1
ab cd\>1
0: cd
@@ -8788,7 +8788,7 @@
1: \x0a
/a(*CR)b/
-Failed: (*VERB) not recognized at offset 5
+Failed: (*VERB) not recognized or malformed at offset 5
/(*CR)a.b/
a\nb
@@ -11627,19 +11627,19 @@
/\btype\b\W*?\btext\b\W*?\bjavascript\b/IS
Capturing subpattern count = 0
+Max lookbehind = 1
No options
First char = 't'
Need char = 't'
-Max lookbehind = 1
Subject length lower bound = 18
No set of starting bytes
/\btype\b\W*?\btext\b\W*?\bjavascript\b|\burl\b\W*?\bshell:|<input\b.*?\btype\b\W*?\bimage\b|\bonkeyup\b\W*?\=/IS
Capturing subpattern count = 0
+Max lookbehind = 1
No options
No first char
No need char
-Max lookbehind = 1
Subject length lower bound = 8
Starting byte set: < o t u
@@ -12236,17 +12236,17 @@
/(?>.*?a)(?<=ba)/I
Capturing subpattern count = 0
+Max lookbehind = 2
No options
No first char
Need char = 'a'
-Max lookbehind = 2
/(?:.*?a)(?<=ba)/I
Capturing subpattern count = 0
+Max lookbehind = 2
No options
First char at start or follows newline
Need char = 'a'
-Max lookbehind = 2
/.*?a(*PRUNE)b/I
Capturing subpattern count = 0
@@ -12292,17 +12292,17 @@
/(?>.*?)(?<=(abcd)|(wxyz))/I
Capturing subpattern count = 2
+Max lookbehind = 4
No options
No first char
No need char
-Max lookbehind = 4
/(?>.*)(?<=(abcd)|(wxyz))/I
Capturing subpattern count = 2
+Max lookbehind = 4
No options
No first char
No need char
-Max lookbehind = 4
"(?>.*)foo"I
Capturing subpattern count = 0
@@ -12574,4 +12574,95 @@
/-------------------------/
+/(*LIMIT_MATCH=12bc)abc/
+Failed: (*VERB) not recognized or malformed at offset 7
+
+/(*LIMIT_MATCH=4294967290)abc/
+Failed: (*VERB) not recognized or malformed at offset 7
+
+/(*LIMIT_RECURSION=4294967280)abc/I
+Capturing subpattern count = 0
+Recursion limit = 4294967280
+No options
+First char = 'a'
+Need char = 'c'
+
+/(a+)*zz/
+ aaaaaaaaaaaaaz
+No match
+ aaaaaaaaaaaaaz\q3000
+Error -8 (match limit exceeded)
+
+/(a+)*zz/S-
+ aaaaaaaaaaaaaz\Q10
+Error -21 (recursion limit exceeded)
+
+/(*LIMIT_MATCH=3000)(a+)*zz/I
+Capturing subpattern count = 1
+Match limit = 3000
+No options
+No first char
+Need char = 'z'
+ aaaaaaaaaaaaaz
+Error -8 (match limit exceeded)
+ aaaaaaaaaaaaaz\q60000
+Error -8 (match limit exceeded)
+
+/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
+Capturing subpattern count = 1
+Match limit = 3000
+No options
+No first char
+Need char = 'z'
+ aaaaaaaaaaaaaz
+Error -8 (match limit exceeded)
+
+/(*LIMIT_MATCH=60000)(a+)*zz/I
+Capturing subpattern count = 1
+Match limit = 60000
+No options
+No first char
+Need char = 'z'
+ aaaaaaaaaaaaaz
+No match
+ aaaaaaaaaaaaaz\q3000
+Error -8 (match limit exceeded)
+
+/(*LIMIT_RECURSION=10)(a+)*zz/IS-
+Capturing subpattern count = 1
+Recursion limit = 10
+No options
+No first char
+Need char = 'z'
+Subject length lower bound = 2
+Starting byte set: a z
+ aaaaaaaaaaaaaz
+Error -21 (recursion limit exceeded)
+ aaaaaaaaaaaaaz\Q1000
+Error -21 (recursion limit exceeded)
+
+/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/IS-
+Capturing subpattern count = 1
+Recursion limit = 10
+No options
+No first char
+Need char = 'z'
+Subject length lower bound = 2
+Starting byte set: a z
+ aaaaaaaaaaaaaz
+Error -21 (recursion limit exceeded)
+
+/(*LIMIT_RECURSION=1000)(a+)*zz/IS-
+Capturing subpattern count = 1
+Recursion limit = 1000
+No options
+No first char
+Need char = 'z'
+Subject length lower bound = 2
+Starting byte set: a z
+ aaaaaaaaaaaaaz
+No match
+ aaaaaaaaaaaaaz\Q10
+Error -21 (recursion limit exceeded)
+
/-- End of testinput2 --/
Modified: code/trunk/testdata/testoutput21-16
===================================================================
--- code/trunk/testdata/testoutput21-16 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/testdata/testoutput21-16 2013-04-24 12:07:09 UTC (rev 1313)
@@ -1,5 +1,6 @@
-/-- Tests for reloading pre-compile patterns. The first one gives an error
-right away. The others require the linke size to be 2. */
+/-- Tests for reloading pre-compiled patterns. The first one gives an error
+right away, and can be any old pattern compiled in 8-bit mode ("abc" is
+typical). The others require the link size to be 2. */x
<!testsaved8
Compiled pattern loaded from testsaved8
@@ -7,7 +8,12 @@
Error -28 from pcre16_fullinfo(0)
Running in 16-bit mode but pattern was compiled in 8-bit mode
-/-- Generated from: ^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|[^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$ --/
+%-- Generated from: ^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|[^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$
+ In 16-bit mode with options: S>testdata/saved16LE-1
+ FS>testdata/saved16BE-1
+ In 32-bit mode with options: S>testdata/saved32LE-1
+ FS>testdata/saved32BE-1
+--%x
<!testsaved16LE-1
Compiled pattern loaded from testsaved16LE-1
Modified: code/trunk/testdata/testoutput21-32
===================================================================
--- code/trunk/testdata/testoutput21-32 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/testdata/testoutput21-32 2013-04-24 12:07:09 UTC (rev 1313)
@@ -1,5 +1,6 @@
-/-- Tests for reloading pre-compile patterns. The first one gives an error
-right away. The others require the linke size to be 2. */
+/-- Tests for reloading pre-compiled patterns. The first one gives an error
+right away, and can be any old pattern compiled in 8-bit mode ("abc" is
+typical). The others require the link size to be 2. */x
<!testsaved8
Compiled pattern loaded from testsaved8
@@ -7,7 +8,12 @@
Error -28 from pcre32_fullinfo(0)
Running in 32-bit mode but pattern was compiled in 8-bit mode
-/-- Generated from: ^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|[^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$ --/
+%-- Generated from: ^[aL](?P<name>(?:[AaLl]+)[^xX-]*?)(?P<other>[\x{150}-\x{250}\x{300}]|[^\x{800}aAs-uS-U\x{d800}-\x{dfff}])++[^#\b\x{500}\x{1000}]{3,5}$
+ In 16-bit mode with options: S>testdata/saved16LE-1
+ FS>testdata/saved16BE-1
+ In 32-bit mode with options: S>testdata/saved32LE-1
+ FS>testdata/saved32BE-1
+--%x
<!testsaved16LE-1
Compiled pattern loaded from testsaved16LE-1
Modified: code/trunk/testdata/testoutput22-16
===================================================================
--- code/trunk/testdata/testoutput22-16 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/testdata/testoutput22-16 2013-04-24 12:07:09 UTC (rev 1313)
@@ -1,6 +1,11 @@
/-- Tests for reloading pre-compile patterns with UTF-16 or UTF-32 support. */
-/-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az]) --/8
+%-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az])
+ In 16-bit mode with options: S8>testdata/saved16LE-1
+ FS8>testdata/saved16BE-1
+ In 32-bit mode with options: S8>testdata/saved32LE-1
+ FS8testdata/saved32BE-1
+--%8x
<!testsaved16LE-2
Compiled pattern loaded from testsaved16LE-2
Modified: code/trunk/testdata/testoutput22-32
===================================================================
--- code/trunk/testdata/testoutput22-32 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/testdata/testoutput22-32 2013-04-24 12:07:09 UTC (rev 1313)
@@ -1,6 +1,11 @@
/-- Tests for reloading pre-compile patterns with UTF-16 or UTF-32 support. */
-/-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az]) --/8
+%-- Generated from: (?P<cbra1>[aZ\x{400}-\x{10ffff}]{4,}[\x{f123}\x{10039}\x{20000}-\x{21234}]?|[A-Cx-z\x{100000}-\x{1000a7}\x{101234}])(?<cb2>[^az])
+ In 16-bit mode with options: S8>testdata/saved16LE-1
+ FS8>testdata/saved16BE-1
+ In 32-bit mode with options: S8>testdata/saved32LE-1
+ FS8testdata/saved32BE-1
+--%8x
<!testsaved16LE-2
Compiled pattern loaded from testsaved16LE-2
Modified: code/trunk/testdata/testoutput5
===================================================================
--- code/trunk/testdata/testoutput5 2013-04-23 09:48:25 UTC (rev 1312)
+++ code/trunk/testdata/testoutput5 2013-04-24 12:07:09 UTC (rev 1313)
@@ -1820,10 +1820,10 @@
/(?<=\x{1234}\x{1234})\bxy/I8
Capturing subpattern count = 0
+Max lookbehind = 2
Options: utf
First char = 'x'
Need char = 'y'
-Max lookbehind = 2
/(?<!^)ETA/8
ETA