Revision: 803
http://www.exim.org/viewvc/pcre2?view=rev&revision=803
Author: zherczeg
Date: 2017-05-24 16:22:03 +0100 (Wed, 24 May 2017)
Log Message:
-----------
Support the general case of starstar in glob conversion.
Modified Paths:
--------------
code/trunk/src/pcre2_convert.c
code/trunk/testdata/testinput24
code/trunk/testdata/testoutput24
Modified: code/trunk/src/pcre2_convert.c
===================================================================
--- code/trunk/src/pcre2_convert.c 2017-05-24 10:14:43 UTC (rev 802)
+++ code/trunk/src/pcre2_convert.c 2017-05-24 15:22:03 UTC (rev 803)
@@ -616,7 +616,9 @@
if (*pattern == CHAR_RIGHT_SQUARE_BRACKET)
{
- convert_glob_write(out, CHAR_RIGHT_SQUARE_BRACKET);
+ out->out_str[0] = CHAR_BACKSLASH;
+ out->out_str[1] = CHAR_RIGHT_SQUARE_BRACKET;
+ convert_glob_write_str(out, 2);
has_prev_c = TRUE;
prev_c = CHAR_RIGHT_SQUARE_BRACKET;
pattern++;
@@ -777,8 +779,8 @@
BOOL no_starstar = (options & PCRE2_CONVERT_GLOB_NO_STARSTAR) != 0;
BOOL in_atomic = FALSE;
BOOL after_starstar = FALSE;
-BOOL with_escape, is_start;
-int result, len;
+BOOL with_escape, is_start, after_separator;
+int result;
(void)utf; /* Avoid compiler warning. */
@@ -840,11 +842,7 @@
if (!no_starstar && pattern < pattern_end && *pattern == CHAR_ASTERISK)
{
- if (!is_start && pattern[-2] != separator)
- {
- result = PCRE2_ERROR_CONVERT_SYNTAX;
- break;
- }
+ after_separator = is_start || (pattern[-2] == separator);
do pattern++; while (pattern < pattern_end &&
*pattern == CHAR_ASTERISK);
@@ -855,27 +853,16 @@
break;
}
- if (escape != 0 && *pattern == escape)
- {
+ after_starstar = TRUE;
+
+ if (after_separator && escape != 0 && *pattern == escape &&
+ pattern + 1 < pattern_end && pattern[1] == separator)
pattern++;
- if (pattern >= pattern_end)
- {
- result = PCRE2_ERROR_CONVERT_SYNTAX;
- break;
- }
- }
- if (*pattern != separator)
+ if (is_start)
{
- result = PCRE2_ERROR_CONVERT_SYNTAX;
- break;
- }
+ if (*pattern != separator) continue;
- pattern++;
- after_starstar = TRUE;
-
- if (is_start)
- {
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
out.out_str[1] = CHAR_QUESTION_MARK;
out.out_str[2] = CHAR_COLON;
@@ -886,11 +873,22 @@
convert_glob_print_separator(&out, separator, with_escape);
convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
+
+ pattern++;
continue;
}
convert_glob_print_commit(&out);
+ if (!after_separator || *pattern != separator)
+ {
+ out.out_str[0] = CHAR_DOT;
+ out.out_str[1] = CHAR_ASTERISK;
+ out.out_str[2] = CHAR_QUESTION_MARK;
+ convert_glob_write_str(&out, 3);
+ continue;
+ }
+
out.out_str[0] = CHAR_LEFT_PARENTHESIS;
out.out_str[1] = CHAR_QUESTION_MARK;
out.out_str[2] = CHAR_COLON;
@@ -897,21 +895,17 @@
out.out_str[3] = CHAR_DOT;
out.out_str[4] = CHAR_ASTERISK;
out.out_str[5] = CHAR_QUESTION_MARK;
- len = 6;
- if (with_escape)
- {
- out.out_str[6] = CHAR_BACKSLASH;
- len = 7;
- }
+ convert_glob_write_str(&out, 6);
- convert_glob_write_str(&out, len);
+ convert_glob_print_separator(&out, separator, with_escape);
- out.out_str[0] = (uint8_t) separator;
- out.out_str[1] = CHAR_RIGHT_PARENTHESIS;
+ out.out_str[0] = CHAR_RIGHT_PARENTHESIS;
+ out.out_str[1] = CHAR_QUESTION_MARK;
out.out_str[2] = CHAR_QUESTION_MARK;
- out.out_str[3] = CHAR_QUESTION_MARK;
- convert_glob_write_str(&out, 4);
+ convert_glob_write_str(&out, 3);
+
+ pattern++;
continue;
}
Modified: code/trunk/testdata/testinput24
===================================================================
--- code/trunk/testdata/testinput24 2017-05-24 10:14:43 UTC (rev 802)
+++ code/trunk/testdata/testinput24 2017-05-24 15:22:03 UTC (rev 803)
@@ -252,6 +252,9 @@
/****/
/**\/abc/
+ abc
+ x/abc
+ xabc
/abc\/**/
@@ -271,6 +274,19 @@
xx/xx/xx/xax/xx/xb
xx/xx/xx/xax/xx/x
+"**a"convert=glob
+ a
+ c/b/a
+ c/b/aaa
+
+"a**/b"convert=glob
+ a/b
+ ab
+
+"a/**b"convert=glob
+ a/b
+ ab
+
#pattern convert=glob:glob_no_starstar
/***/
Modified: code/trunk/testdata/testoutput24
===================================================================
--- code/trunk/testdata/testoutput24 2017-05-24 10:14:43 UTC (rev 802)
+++ code/trunk/testdata/testoutput24 2017-05-24 15:22:03 UTC (rev 803)
@@ -41,7 +41,7 @@
# Now some actual tests
/a?b[]xy]*c/
-(?s)\Aa[^/]b[]xy](*COMMIT)[^/]*?c\z
+(?s)\Aa[^/]b[\]xy](*COMMIT)[^/]*?c\z
azb]1234c
0: azb]1234c
@@ -143,15 +143,15 @@
0: ten
/a[]]b/
-(?s)\Aa[]]b\z
+(?s)\Aa[\]]b\z
a]b
0: a]b
/a[]a-]b/
-(?s)\Aa[]a\-]b\z
+(?s)\Aa[\]a\-]b\z
/a[]-]b/
-(?s)\Aa[]\-]b\z
+(?s)\Aa[\]\-]b\z
a-b
0: a-b
a]b
@@ -161,7 +161,7 @@
No match
/a[]a-z]b/
-(?s)\Aa[]a-z]b\z
+(?s)\Aa[\]a-z]b\z
aab
0: aab
@@ -343,10 +343,10 @@
(?s)\A[^/]a[^/]/[^/]b[^/]\z
/[a\\b\c][]][-][\]\-]/
-(?s)\A[a\\bc][]][\-][\]\-]\z
+(?s)\A[a\\bc][\]][\-][\]\-]\z
/[^a\\b\c][!]][!-][^\]\-]/
-(?s)\A[^/a\\bc][^/]][^/\-][^/\]\-]\z
+(?s)\A[^/a\\bc][^/\]][^/\-][^/\]\-]\z
/[[:alpha:][:xdigit:][:word:]]/
(?s)\A[[:alpha:][:xdigit:][:word:]](?<!/)\z
@@ -389,6 +389,12 @@
/**\/abc/
(?s)(?:\A|/)abc\z
+ abc
+ 0: abc
+ x/abc
+ 0: /abc
+ xabc
+No match
/abc\/**/
(?s)\Aabc/
@@ -420,6 +426,29 @@
xx/xx/xx/xax/xx/x
No match
+"**a"convert=glob
+(?s)a\z
+ a
+ 0: a
+ c/b/a
+ 0: a
+ c/b/aaa
+ 0: a
+
+"a**/b"convert=glob
+(?s)\Aa(*COMMIT).*?/b\z
+ a/b
+ 0: a/b
+ ab
+No match
+
+"a/**b"convert=glob
+(?s)\Aa/(*COMMIT).*?b\z
+ a/b
+ 0: a/b
+ ab
+No match
+
#pattern convert=glob:glob_no_starstar
/***/
@@ -438,7 +467,7 @@
(?s)a
/**a**/
-** Pattern conversion error at offset 2: invalid syntax
+(?s)a
/a*b/
(?s)\Aa(*COMMIT).*?b\z
@@ -456,7 +485,7 @@
(?s)\Aa\\b\\cd\z
/**\/a/
-** Pattern conversion error at offset 2: invalid syntax
+(?s)\\/a\z
/a`*b/convert_glob_escape=`
(?s)\Aa\*b\z