Revision: 799
http://www.exim.org/viewvc/pcre2?view=rev&revision=799
Author: ph10
Date: 2017-05-23 17:08:48 +0100 (Tue, 23 May 2017)
Log Message:
-----------
More experimental convert code evolution.
Modified Paths:
--------------
code/trunk/src/pcre2.h
code/trunk/src/pcre2.h.in
code/trunk/src/pcre2_context.c
code/trunk/src/pcre2_convert.c
code/trunk/src/pcre2_intmodedep.h
code/trunk/src/pcre2test.c
code/trunk/testdata/testinput24
code/trunk/testdata/testoutput24
Modified: code/trunk/src/pcre2.h
===================================================================
--- code/trunk/src/pcre2.h 2017-05-23 15:17:44 UTC (rev 798)
+++ code/trunk/src/pcre2.h 2017-05-23 16:08:48 UTC (rev 799)
@@ -187,7 +187,7 @@
#define PCRE2_CONVERT_POSIX_BASIC 0x00000004u
#define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u
#define PCRE2_CONVERT_GLOB 0x00000010u
-#define PCRE2_CONVERT_GLOB_NO_BACKSLASH 0x00000030u
+#define PCRE2_CONVERT_GLOB_NO_DOT_SPECIAL 0x00000030u
#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000050u
#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000090u
#define PCRE2_CONVERT_GLOB_BASIC 0x000000f0u
@@ -497,6 +497,8 @@
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_convert_context_free(pcre2_convert_context *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_glob_separator(pcre2_convert_context *, uint32_t);
@@ -733,6 +735,7 @@
#define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_)
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
+#define pcre2_set_glob_escape PCRE2_SUFFIX(pcre2_set_glob_escape_)
#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_)
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
Modified: code/trunk/src/pcre2.h.in
===================================================================
--- code/trunk/src/pcre2.h.in 2017-05-23 15:17:44 UTC (rev 798)
+++ code/trunk/src/pcre2.h.in 2017-05-23 16:08:48 UTC (rev 799)
@@ -187,7 +187,7 @@
#define PCRE2_CONVERT_POSIX_BASIC 0x00000004u
#define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u
#define PCRE2_CONVERT_GLOB 0x00000010u
-#define PCRE2_CONVERT_GLOB_NO_BACKSLASH 0x00000030u
+#define PCRE2_CONVERT_GLOB_NO_DOT_SPECIAL 0x00000030u
#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000050u
#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000090u
#define PCRE2_CONVERT_GLOB_BASIC 0x000000f0u
@@ -497,6 +497,8 @@
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_convert_context_free(pcre2_convert_context *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_glob_separator(pcre2_convert_context *, uint32_t);
@@ -733,6 +735,7 @@
#define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_)
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
+#define pcre2_set_glob_escape PCRE2_SUFFIX(pcre2_set_glob_escape_)
#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_)
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
Modified: code/trunk/src/pcre2_context.c
===================================================================
--- code/trunk/src/pcre2_context.c 2017-05-23 15:17:44 UTC (rev 798)
+++ code/trunk/src/pcre2_context.c 2017-05-23 16:08:48 UTC (rev 799)
@@ -189,15 +189,17 @@
}
-/* A default covert context is set up to save having to initialize at run time
+/* A default convert context is set up to save having to initialize at run time
when no context is supplied to the convert function. */
const pcre2_convert_context PRIV(default_convert_context) = {
{ default_malloc, default_free, NULL }, /* Default memory handling */
#ifdef _WIN32
- CHAR_BACKSLASH /* Default path separator */
-#else /* is OS dependent */
- CHAR_SLASH /* Not Windows */
+ CHAR_BACKSLASH, /* Default path separator */
+ CHAR_GRAVE_ACCENT /* Default escape character */
+#else /* Not Windows */
+ CHAR_SLASH, /* Default path separator */
+ CHAR_BACKSLASH /* Default escape character */
#endif
};
@@ -454,6 +456,14 @@
return 0;
}
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_glob_escape(pcre2_convert_context *ccontext, uint32_t escape)
+{
+if (escape > 255 || (escape != 0 && !ispunct(escape)))
+ return PCRE2_ERROR_BADDATA;
+ccontext->glob_escape = escape;
+return 0;
+}
/* End of pcre2_context.c */
Modified: code/trunk/src/pcre2_convert.c
===================================================================
--- code/trunk/src/pcre2_convert.c 2017-05-23 15:17:44 UTC (rev 798)
+++ code/trunk/src/pcre2_convert.c 2017-05-23 16:08:48 UTC (rev 799)
@@ -49,8 +49,10 @@
PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED)
#define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \
- PCRE2_CONVERT_GLOB_NO_BACKSLASH|PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \
- PCRE2_CONVERT_GLOB_NO_STARSTAR|TYPE_OPTIONS)
+ PCRE2_CONVERT_GLOB_NO_DOT_SPECIAL| \
+ PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \
+ PCRE2_CONVERT_GLOB_NO_STARSTAR| \
+ TYPE_OPTIONS)
#define DUMMY_BUFFER_SIZE 100
@@ -76,7 +78,7 @@
/* States for range and POSIX processing */
enum { RANGE_NOT_STARTED, RANGE_STARTING, RANGE_STARTED };
-enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET,
+enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET,
POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED };
/* Macro to add a character string to the output buffer, checking for overflow. */
@@ -89,25 +91,25 @@
*p++ = *s; \
} \
}
-
+
/* Literals that must be escaped: \ ? * + | . ^ $ { } [ ] ( ) */
static const char *pcre2_escaped_literals =
- STR_BACKSLASH STR_QUESTION_MARK STR_ASTERISK STR_PLUS
- STR_VERTICAL_LINE STR_DOT STR_CIRCUMFLEX_ACCENT STR_DOLLAR_SIGN
+ STR_BACKSLASH STR_QUESTION_MARK STR_ASTERISK STR_PLUS
+ STR_VERTICAL_LINE STR_DOT STR_CIRCUMFLEX_ACCENT STR_DOLLAR_SIGN
STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS;
-
+
/* Recognized escapes in POSIX basic patterns. */
static const char *posix_basic_escapes =
STR_QUESTION_MARK STR_PLUS STR_VERTICAL_LINE
STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS
- STR_0 STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9;
-
+ STR_0 STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9;
+
/*************************************************
* Convert a POSIX pattern *
*************************************************/
@@ -186,46 +188,46 @@
if (posix_state >= POSIX_CLASS_NOT_STARTED)
{
- if (c == CHAR_RIGHT_SQUARE_BRACKET)
+ if (c == CHAR_RIGHT_SQUARE_BRACKET)
{
- PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
+ PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
posix_state = POSIX_NOT_BRACKET;
}
-
+
/* Not the end of the class */
-
- else
+
+ else
{
switch (posix_state)
{
case POSIX_CLASS_STARTED:
if (c <= 127 && islower(c)) break; /* Remain in started state */
- posix_state = POSIX_CLASS_NOT_STARTED;
- if (c == CHAR_COLON && plength > 0 &&
+ posix_state = POSIX_CLASS_NOT_STARTED;
+ if (c == CHAR_COLON && plength > 0 &&
*posix == CHAR_RIGHT_SQUARE_BRACKET)
{
PUTCHARS(STR_COLON_RIGHT_SQUARE_BRACKET);
- plength--;
+ plength--;
posix++;
- continue; /* With next character after :] */
+ continue; /* With next character after :] */
}
- /* Fall through */
-
- case POSIX_CLASS_NOT_STARTED:
- if (c == CHAR_LEFT_SQUARE_BRACKET)
+ /* Fall through */
+
+ case POSIX_CLASS_NOT_STARTED:
+ if (c == CHAR_LEFT_SQUARE_BRACKET)
posix_state = POSIX_CLASS_STARTING;
break;
-
+
case POSIX_CLASS_STARTING:
if (c == CHAR_COLON) posix_state = POSIX_CLASS_STARTED;
break;
- }
-
+ }
+
if (c == CHAR_BACKSLASH) PUTCHARS(STR_BACKSLASH);
if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
memcpy(p, posix - clength, CU2BYTES(clength));
p += clength;
- }
+ }
}
/* Handle a character not within a class. */
@@ -234,17 +236,17 @@
{
case CHAR_LEFT_SQUARE_BRACKET:
PUTCHARS(STR_LEFT_SQUARE_BRACKET);
-
+
/* Handle special cases [[:<:]] and [[:>:]] (which PCRE does support) */
-
+
if (plength >= 6)
{
if (posix[0] == CHAR_LEFT_SQUARE_BRACKET &&
posix[1] == CHAR_COLON &&
- (posix[2] == CHAR_LESS_THAN_SIGN ||
+ (posix[2] == CHAR_LESS_THAN_SIGN ||
posix[2] == CHAR_GREATER_THAN_SIGN) &&
posix[3] == CHAR_COLON &&
- posix[4] == CHAR_RIGHT_SQUARE_BRACKET &&
+ posix[4] == CHAR_RIGHT_SQUARE_BRACKET &&
posix[5] == CHAR_RIGHT_SQUARE_BRACKET)
{
if (p + 6 > endp) return PCRE2_ERROR_NOMEMORY;
@@ -251,15 +253,15 @@
memcpy(p, posix, CU2BYTES(6));
p += 6;
posix += 6;
- plength -= 6;
- continue; /* With next character */
+ plength -= 6;
+ continue; /* With next character */
}
- }
-
+ }
+
/* Handle "normal" character classes */
-
- posix_state = POSIX_CLASS_NOT_STARTED;
+ posix_state = POSIX_CLASS_NOT_STARTED;
+
/* Handle ^ and ] as first characters */
if (plength > 0)
@@ -275,23 +277,23 @@
posix++;
plength--;
PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
- }
+ }
}
break;
case CHAR_BACKSLASH:
if (plength <= 0) return ERROR_END_BACKSLASH;
- if (!extended && *posix < 127 &&
- strchr(posix_basic_escapes, *posix) != NULL)
+ if (!extended && *posix < 127 &&
+ strchr(posix_basic_escapes, *posix) != NULL)
{
- if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);
+ if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);
if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
lastspecial = *p++ = *posix++;
- plength--;
+ plength--;
}
else nextisliteral = TRUE;
break;
-
+
case CHAR_RIGHT_PARENTHESIS:
if (!extended || bracount == 0) goto ESCAPE_LITERAL;
bracount--;
@@ -299,60 +301,60 @@
case CHAR_LEFT_PARENTHESIS:
bracount++;
- /* Fall through */
+ /* Fall through */
case CHAR_QUESTION_MARK:
case CHAR_PLUS:
- case CHAR_LEFT_CURLY_BRACKET:
- case CHAR_RIGHT_CURLY_BRACKET:
+ case CHAR_LEFT_CURLY_BRACKET:
+ case CHAR_RIGHT_CURLY_BRACKET:
case CHAR_VERTICAL_LINE:
if (!extended) goto ESCAPE_LITERAL;
- /* Fall through */
-
+ /* Fall through */
+
case CHAR_DOT:
- case CHAR_DOLLAR_SIGN:
- posix_state = POSIX_NOT_BRACKET;
+ case CHAR_DOLLAR_SIGN:
+ posix_state = POSIX_NOT_BRACKET;
COPY_SPECIAL:
- lastspecial = c;
+ lastspecial = c;
if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
*p++ = c;
- break;
+ break;
case CHAR_ASTERISK:
- if (lastspecial != CHAR_ASTERISK)
+ if (lastspecial != CHAR_ASTERISK)
{
if (!extended && posix_state < POSIX_NOT_BRACKET)
- goto ESCAPE_LITERAL;
+ goto ESCAPE_LITERAL;
goto COPY_SPECIAL;
- }
- break; /* Ignore second and subsequent asterisks */
+ }
+ break; /* Ignore second and subsequent asterisks */
case CHAR_CIRCUMFLEX_ACCENT:
if (extended) goto COPY_SPECIAL;
- if (posix_state == POSIX_START_REGEX ||
- lastspecial == CHAR_LEFT_PARENTHESIS)
+ if (posix_state == POSIX_START_REGEX ||
+ lastspecial == CHAR_LEFT_PARENTHESIS)
{
posix_state = POSIX_ANCHORED;
goto COPY_SPECIAL;
- }
- /* Fall through */
+ }
+ /* Fall through */
default:
if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
{
- ESCAPE_LITERAL:
+ ESCAPE_LITERAL:
PUTCHARS(STR_BACKSLASH);
}
- lastspecial = 0xff; /* Indicates nothing special */
+ lastspecial = 0xff; /* Indicates nothing special */
if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
memcpy(p, posix - clength, CU2BYTES(clength));
p += clength;
- posix_state = POSIX_NOT_BRACKET;
+ posix_state = POSIX_NOT_BRACKET;
break;
}
}
-if (posix_state >= POSIX_CLASS_NOT_STARTED)
+if (posix_state >= POSIX_CLASS_NOT_STARTED)
return ERROR_MISSING_SQUARE_BRACKET;
convlength += p - pp; /* Final segment */
*bufflenptr = convlength;
@@ -726,7 +728,7 @@
PCRE2_SPTR pattern_end = pattern + plength;
PCRE2_UCHAR separator = ccontext->glob_separator;
PCRE2_UCHAR c;
-BOOL no_backslash = (options & PCRE2_CONVERT_GLOB_NO_BACKSLASH) != 0;
+BOOL no_escape = ccontext->glob_escape == 0;
BOOL no_wildsep = (options & PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR) != 0;
BOOL no_starstar = (options & PCRE2_CONVERT_GLOB_NO_STARSTAR) != 0;
BOOL in_atomic = FALSE;
@@ -734,6 +736,8 @@
BOOL with_escape, is_start;
int result, len;
+(void)utf; /* Avoid compiler warning */
+
if (separator >= 128)
{
/* Currently only ASCII separators are supported. */
@@ -805,7 +809,7 @@
break;
}
- if (!no_backslash && *pattern == CHAR_BACKSLASH)
+ if (!no_escape && *pattern == ccontext->glob_escape)
{
pattern++;
if (pattern >= pattern_end)
@@ -925,11 +929,11 @@
continue;
}
- if (!no_backslash && c == CHAR_BACKSLASH)
+ if (!no_escape && c == ccontext->glob_escape)
{
if (pattern >= pattern_end)
{
- result = ERROR_END_BACKSLASH;
+ result = PCRE2_ERROR_CONVERT_SYNTAX;
break;
}
c = *pattern++;
Modified: code/trunk/src/pcre2_intmodedep.h
===================================================================
--- code/trunk/src/pcre2_intmodedep.h 2017-05-23 15:17:44 UTC (rev 798)
+++ code/trunk/src/pcre2_intmodedep.h 2017-05-23 16:08:48 UTC (rev 799)
@@ -572,7 +572,7 @@
uint16_t bsr_convention;
uint16_t newline_convention;
uint32_t parens_nest_limit;
- uint32_t extra_options;
+ uint32_t extra_options;
} pcre2_real_compile_context;
/* The real match context structure. */
@@ -586,7 +586,7 @@
int (*callout)(pcre2_callout_block *, void *);
void *callout_data;
PCRE2_SIZE offset_limit;
- uint32_t heap_limit;
+ uint32_t heap_limit;
uint32_t match_limit;
uint32_t depth_limit;
} pcre2_real_match_context;
@@ -595,7 +595,8 @@
typedef struct pcre2_real_convert_context {
pcre2_memctl memctl;
- uint32_t glob_separator;
+ uint32_t glob_separator;
+ uint32_t glob_escape;
} pcre2_real_convert_context;
/* The real compiled code structure. The type for the blocksize field is
@@ -623,7 +624,7 @@
uint32_t compile_options; /* Options passed to pcre2_compile() */
uint32_t overall_options; /* Options after processing the pattern */
uint32_t flags; /* Various state flags */
- uint32_t limit_heap; /* Limit set in the pattern */
+ uint32_t limit_heap; /* Limit set in the pattern */
uint32_t limit_match; /* Limit set in the pattern */
uint32_t limit_depth; /* Limit set in the pattern */
uint32_t first_codeunit; /* Starting code unit */
@@ -638,9 +639,9 @@
uint16_t name_count; /* Number of name entries in the table */
} pcre2_real_code;
-/* The real match data structure. Define ovector large so that array bound
-checkers don't grumble. Memory for this structure is obtained by calling
-pcre2_match_data_create(), which sets the size as the offset of ovector plus
+/* The real match data structure. Define ovector large so that array bound
+checkers don't grumble. Memory for this structure is obtained by calling
+pcre2_match_data_create(), which sets the size as the offset of ovector plus
pairs of elements for each capturing group. (See also the heapframe structure
below.) */
@@ -781,7 +782,7 @@
PCRE2_SPTR ecode; /* The current position in the pattern */
PCRE2_SPTR temp_sptr[2]; /* Used for short-term PCRE_SPTR values */
PCRE2_SIZE length; /* Used for character, string, or code lengths */
- PCRE2_SIZE back_frame; /* Amount to subtract on RRETURN */
+ PCRE2_SIZE back_frame; /* Amount to subtract on RRETURN */
PCRE2_SIZE temp_size; /* Used for short-term PCRE2_SIZE values */
uint32_t rdepth; /* "Recursion" depth */
uint32_t group_frame_type; /* Type information for group frames */
@@ -798,15 +799,15 @@
#endif
/* The rest have to be copied from the previous frame whenever a new frame
- becomes current. The final field is specified as a large vector so that
- runtime array bound checks don't catch references to it. However, for any
- specific call to pcre2_match() the memory allocated for each frame structure
- allows for exactly the right size ovector for the number of capturing
+ becomes current. The final field is specified as a large vector so that
+ runtime array bound checks don't catch references to it. However, for any
+ specific call to pcre2_match() the memory allocated for each frame structure
+ allows for exactly the right size ovector for the number of capturing
parentheses. */
PCRE2_SPTR eptr; /* MUST BE FIRST */
PCRE2_SPTR start_match; /* Can be adjusted by \K */
- PCRE2_SPTR mark; /* Most recent mark on the success path */
+ PCRE2_SPTR mark; /* Most recent mark on the success path */
uint32_t current_recurse; /* Current (deepest) recursion number */
uint32_t capture_last; /* Most recent capture */
PCRE2_SIZE last_group_offset; /* Saved offset to most recent group frame */
@@ -825,7 +826,7 @@
PCRE2_SIZE frame_vector_size; /* Size of a backtracking frame */
heapframe *match_frames; /* Points to vector of frames */
heapframe *match_frames_top; /* Points after the end of the vector */
- heapframe *stack_frames; /* The original vector on the stack */
+ heapframe *stack_frames; /* The original vector on the stack */
PCRE2_SIZE heap_limit; /* As it says */
uint32_t match_limit; /* As it says */
uint32_t match_limit_depth; /* As it says */
@@ -852,7 +853,7 @@
PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */
PCRE2_SPTR verb_ecode_ptr; /* For passing back info */
PCRE2_SPTR verb_skip_ptr; /* For passing back a (*SKIP) name */
- uint32_t verb_current_recurse; /* Current recurse when (*VERB) happens */
+ uint32_t verb_current_recurse; /* Current recurse when (*VERB) happens */
uint32_t moptions; /* Match options */
uint32_t poptions; /* Pattern options */
uint32_t skip_arg_count; /* For counting SKIP_ARGs */
Modified: code/trunk/src/pcre2test.c
===================================================================
--- code/trunk/src/pcre2test.c 2017-05-23 15:17:44 UTC (rev 798)
+++ code/trunk/src/pcre2test.c 2017-05-23 16:08:48 UTC (rev 799)
@@ -400,14 +400,14 @@
} convertstruct;
static convertstruct convertlist[] = {
- { "glob", PCRE2_CONVERT_GLOB },
- { "glob_basic", PCRE2_CONVERT_GLOB_BASIC },
- { "glob_no_backslash", PCRE2_CONVERT_GLOB_NO_BACKSLASH },
- { "glob_no_starstar", PCRE2_CONVERT_GLOB_NO_STARSTAR },
+ { "glob", PCRE2_CONVERT_GLOB },
+ { "glob_basic", PCRE2_CONVERT_GLOB_BASIC },
+ { "glob_no_dot_special", PCRE2_CONVERT_GLOB_NO_DOT_SPECIAL },
+ { "glob_no_starstar", PCRE2_CONVERT_GLOB_NO_STARSTAR },
{ "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR },
- { "posix_basic", PCRE2_CONVERT_POSIX_BASIC },
- { "posix_extended", PCRE2_CONVERT_POSIX_EXTENDED },
- { "unset", CONVERT_UNSET }};
+ { "posix_basic", PCRE2_CONVERT_POSIX_BASIC },
+ { "posix_extended", PCRE2_CONVERT_POSIX_EXTENDED },
+ { "unset", CONVERT_UNSET }};
#define convertlistcount (sizeof(convertlist)/sizeof(convertstruct))
@@ -524,6 +524,7 @@
uint32_t tables_id;
uint32_t convert_type;
uint32_t convert_length;
+ uint32_t convert_glob_escape;
uint32_t convert_glob_separator;
uint32_t regerror_buffsize;
uint8_t locale[LOCALESIZE];
@@ -599,6 +600,7 @@
{ "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
{ "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) },
{ "convert", MOD_PAT, MOD_CON, 0, PO(convert_type) },
+ { "convert_glob_escape", MOD_PAT, MOD_CHR, 0, PO(convert_glob_escape) },
{ "convert_glob_separator", MOD_PAT, MOD_CHR, 0, PO(convert_glob_separator) },
{ "convert_length", MOD_PAT, MOD_INT, 0, PO(convert_length) },
{ "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) },
@@ -1286,6 +1288,14 @@
else \
r = pcre2_set_glob_separator_32(G(a,32),b)
+#define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
+ if (test_mode == PCRE8_MODE) \
+ r = pcre2_set_glob_escape_8(G(a,8),b); \
+ else if (test_mode == PCRE16_MODE) \
+ r = pcre2_set_glob_escape_16(G(a,16),b); \
+ else \
+ r = pcre2_set_glob_escape_32(G(a,32),b)
+
#define PCRE2_SET_HEAP_LIMIT(a,b) \
if (test_mode == PCRE8_MODE) \
pcre2_set_heap_limit_8(G(a,8),b); \
@@ -1753,6 +1763,12 @@
else \
G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
+#define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
+ if (test_mode == G(G(PCRE,BITONE),_MODE)) \
+ r = G(pcre2_set_glob_escape_,BITONE)(G(a,BITONE),b); \
+ else \
+ r = G(pcre2_set_glob_escape_,BITTWO)(G(a,BITTWO),b)
+
#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
r = G(pcre2_set_glob_separator_,BITONE)(G(a,BITONE),b); \
@@ -1983,6 +1999,7 @@
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
+#define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_8(G(a,8),b)
#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_8(G(a,8),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
@@ -2086,6 +2103,7 @@
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
+#define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_16(G(a,16),b)
#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_16(G(a,16),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
@@ -2189,6 +2207,7 @@
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
+#define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_32(G(a,32),b)
#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_32(G(a,32),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
@@ -2903,7 +2922,7 @@
*************************************************/
/* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
-For printing *MARK strings, a negative length is given.If handed a NULL file,
+For printing *MARK strings, a negative length is given. If handed a NULL file,
just counts chars without printing. */
static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
@@ -2910,6 +2929,7 @@
{
int yield = 0;
(void)(utf); /* Avoid compiler warning */
+
if (length < 0) length = p[-1];
while (length-- > 0)
{
@@ -5385,6 +5405,21 @@
convert_options |= PCRE2_CONVERT_NO_UTF_CHECK;
CONCTXCPY(con_context, default_con_context);
+
+ if (pat_patctl.convert_glob_escape != 0)
+ {
+ uint32_t escape = (pat_patctl.convert_glob_escape == '0')? 0 :
+ pat_patctl.convert_glob_escape;
+ PCRE2_SET_GLOB_ESCAPE(rc, con_context, escape);
+ if (rc != 0)
+ {
+ fprintf(outfile, "** Invalid glob escape '%c'\n",
+ pat_patctl.convert_glob_escape);
+ convert_return = PR_SKIP;
+ goto CONVERT_FINISH;
+ }
+ }
+
if (pat_patctl.convert_glob_separator != 0)
{
PCRE2_SET_GLOB_SEPARATOR(rc, con_context, pat_patctl.convert_glob_separator);
Modified: code/trunk/testdata/testinput24
===================================================================
--- code/trunk/testdata/testinput24 2017-05-23 15:17:44 UTC (rev 798)
+++ code/trunk/testdata/testinput24 2017-05-23 16:08:48 UTC (rev 799)
@@ -281,12 +281,18 @@
/??a??/
#pattern convert=unset
-#pattern convert=glob:glob_no_backslash
+#pattern convert=glob,convert_glob_escape=0
/a\b\cd/
/**\/a/
+/a`*b/convert_glob_escape=`
+
+/a`*b/convert_glob_escape=0
+
+/a`*b/convert_glob_escape=x
+
#pattern convert=unset:posix_extended
/a[[:>:]z/
Modified: code/trunk/testdata/testoutput24
===================================================================
--- code/trunk/testdata/testoutput24 2017-05-23 15:17:44 UTC (rev 798)
+++ code/trunk/testdata/testoutput24 2017-05-23 16:08:48 UTC (rev 799)
@@ -429,7 +429,7 @@
(?s)\A..a..\z
#pattern convert=unset
-#pattern convert=glob:glob_no_backslash
+#pattern convert=glob,convert_glob_escape=0
/a\b\cd/
(?s)\Aa\\b\\cd\z
@@ -437,6 +437,15 @@
/**\/a/
** Pattern conversion error at offset 2: invalid syntax
+/a`*b/convert_glob_escape=`
+(?s)\Aa\*b\z
+
+/a`*b/convert_glob_escape=0
+(?s)\Aa`(*COMMIT)[^/]*?b\z
+
+/a`*b/convert_glob_escape=x
+** Invalid glob escape 'x'
+
#pattern convert=unset:posix_extended
/a[[:>:]z/