Revision: 809
http://www.exim.org/viewvc/pcre2?view=rev&revision=809
Author: ph10
Date: 2017-05-27 18:08:28 +0100 (Sat, 27 May 2017)
Log Message:
-----------
Update POSIX basic regex conversion code.
Modified Paths:
--------------
code/trunk/src/pcre2_convert.c
code/trunk/testdata/testinput24
code/trunk/testdata/testoutput24
Modified: code/trunk/src/pcre2_convert.c
===================================================================
--- code/trunk/src/pcre2_convert.c 2017-05-27 16:06:56 UTC (rev 808)
+++ code/trunk/src/pcre2_convert.c 2017-05-27 17:08:28 UTC (rev 809)
@@ -74,6 +74,7 @@
#define STR_DOT_STAR_LOOKBEHIND STR_DOT STR_ASTERISK STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_LESS_THAN_SIGN STR_EQUALS_SIGN
#define STR_LOOKAHEAD_NOT_DOT STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_EXCLAMATION_MARK STR_BACKSLASH STR_DOT STR_RIGHT_PARENTHESIS
#define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS
+#define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
/* States for range and POSIX processing */
@@ -101,12 +102,12 @@
STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS;
-/* Recognized escapes in POSIX basic patterns. */
+/* Recognized escaped metacharacters in POSIX basic patterns. */
-static const char *posix_basic_escapes =
- STR_QUESTION_MARK STR_PLUS STR_VERTICAL_LINE
+static const char *posix_meta_escapes =
STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS
- STR_0 STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9;
+ STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
+ STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9;
@@ -155,6 +156,7 @@
/* Initialize default for error offset as end of input. */
*bufflenptr = plength;
+PUTCHARS(STR_STAR_NUL);
/* Now scan the input. */
@@ -237,7 +239,9 @@
case CHAR_LEFT_SQUARE_BRACKET:
PUTCHARS(STR_LEFT_SQUARE_BRACKET);
- /* Handle special cases [[:<:]] and [[:>:]] (which PCRE does support) */
+#ifdef NEVER
+ /* We could handle special cases [[:<:]] and [[:>:]] (which PCRE does
+ support) but they are not part of POSIX 1003.1. */
if (plength >= 6)
{
@@ -257,8 +261,9 @@
continue; /* With next character */
}
}
+#endif
- /* Handle "normal" character classes */
+ /* Handle start of "normal" character classes */
posix_state = POSIX_CLASS_NOT_STARTED;
@@ -283,15 +288,17 @@
case CHAR_BACKSLASH:
if (plength <= 0) return ERROR_END_BACKSLASH;
- if (!extended && *posix < 127 &&
- strchr(posix_basic_escapes, *posix) != NULL)
- {
- if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);
- if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
- lastspecial = *p++ = *posix++;
- plength--;
+ if (extended) nextisliteral = TRUE; else
+ {
+ if (*posix < 127 && strchr(posix_meta_escapes, *posix) != NULL)
+ {
+ if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);
+ if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
+ lastspecial = *p++ = *posix++;
+ plength--;
+ }
+ else nextisliteral = TRUE;
}
- else nextisliteral = TRUE;
break;
case CHAR_RIGHT_PARENTHESIS:
@@ -323,7 +330,8 @@
case CHAR_ASTERISK:
if (lastspecial != CHAR_ASTERISK)
{
- if (!extended && posix_state < POSIX_NOT_BRACKET)
+ if (!extended && (posix_state < POSIX_NOT_BRACKET ||
+ lastspecial == CHAR_LEFT_PARENTHESIS))
goto ESCAPE_LITERAL;
goto COPY_SPECIAL;
}
Modified: code/trunk/testdata/testinput24
===================================================================
--- code/trunk/testdata/testinput24 2017-05-27 16:06:56 UTC (rev 808)
+++ code/trunk/testdata/testinput24 2017-05-27 17:08:28 UTC (rev 809)
@@ -323,10 +323,10 @@
/a`*b/convert_glob_escape=x
+# -------- Tests of extended POSIX conversion --------
+
#pattern convert=unset:posix_extended
-/a[[:>:]z/
-
/<[[:a[:digit:]b]>/
<[>
<:>
@@ -338,8 +338,6 @@
/a+\1b\\c|d[ab\c]/
-/a[[:<:]]b[[:>:]]/
-
/<[]bc]>/
<]>
<b>
@@ -361,6 +359,8 @@
/a***b/
+# -------- Tests of basic POSIX conversion --------
+
#pattern convert=unset:posix_basic
/a*b+c\+[def](ab)\(cd\)/
@@ -371,6 +371,9 @@
a1b
/how.to how\.to/
+ how\nto how.to
+\= Expect no match
+ how\x{0}to how.to
/^how to \^how to/
@@ -383,13 +386,11 @@
XabcY
X*abcY
X**abcY
+
+/*ab\(*cd\)/
/^b\(c^d\)\(^e^f\)/
/a***b/
-#pattern convert=unset
-
-/abc/
-
# End of testinput24
Modified: code/trunk/testdata/testoutput24
===================================================================
--- code/trunk/testdata/testoutput24 2017-05-27 16:06:56 UTC (rev 808)
+++ code/trunk/testdata/testoutput24 2017-05-27 17:08:28 UTC (rev 809)
@@ -508,14 +508,12 @@
/a`*b/convert_glob_escape=x
** Invalid glob escape 'x'
+# -------- Tests of extended POSIX conversion --------
+
#pattern convert=unset:posix_extended
-/a[[:>:]z/
-a[[:>:]z
-Failed: error 130 at offset 4: unknown POSIX class name
-
/<[[:a[:digit:]b]>/
-<[[:a[:digit:]b]>
+(*NUL)<[[:a[:digit:]b]>
<[>
0: <[>
<:>
@@ -531,13 +529,10 @@
No match
/a+\1b\\c|d[ab\c]/
-a+1b\\c|d[ab\\c]
+(*NUL)a+1b\\c|d[ab\\c]
-/a[[:<:]]b[[:>:]]/
-a[[:<:]]b[[:>:]]
-
/<[]bc]>/
-<[]bc]>
+(*NUL)<[]bc]>
<]>
0: <]>
<b>
@@ -546,7 +541,7 @@
0: <c>
/<[^]bc]>/
-<[^]bc]>
+(*NUL)<[^]bc]>
<.>
0: <.>
\= Expect no match
@@ -556,7 +551,7 @@
No match
/(a)\1b/
-(a)1b
+(*NUL)(a)1b
a1b
0: a1b
1: a
@@ -565,21 +560,23 @@
No match
/(ab)c)d]/
-(ab)c\)d\]
+(*NUL)(ab)c\)d\]
Xabc)d]Y
0: abc)d]
1: ab
/a***b/
-a*b
+(*NUL)a*b
+# -------- Tests of basic POSIX conversion --------
+
#pattern convert=unset:posix_basic
/a*b+c\+[def](ab)\(cd\)/
-a*b\+c+[def]\(ab\)(cd)
+(*NUL)a*b\+c\+[def]\(ab\)(cd)
/\(a\)\1b/
-(a)\1b
+(*NUL)(a)\1b
aab
0: aab
1: a
@@ -588,21 +585,26 @@
No match
/how.to how\.to/
-how.to how\.to
+(*NUL)how.to how\.to
+ how\nto how.to
+ 0: how\x0ato how.to
+\= Expect no match
+ how\x{0}to how.to
+No match
/^how to \^how to/
-^how to \^how to
+(*NUL)^how to \^how to
/^*abc/
-^\*abc
+(*NUL)^\*abc
/*abc/
-\*abc
+(*NUL)\*abc
X*abcY
0: *abc
/**abc/
-\**abc
+(*NUL)\**abc
XabcY
0: abc
X*abcY
@@ -609,15 +611,14 @@
0: *abc
X**abcY
0: **abc
+
+/*ab\(*cd\)/
+(*NUL)\*ab(\*cd)
/^b\(c^d\)\(^e^f\)/
-^b(c\^d)(^e\^f)
+(*NUL)^b(c\^d)(^e\^f)
/a***b/
-a*b
+(*NUL)a*b
-#pattern convert=unset
-
-/abc/
-
# End of testinput24