Revision: 781
http://www.exim.org/viewvc/pcre2?view=rev&revision=781
Author: ph10
Date: 2017-05-13 18:46:27 +0100 (Sat, 13 May 2017)
Log Message:
-----------
Updates to experimental conversion code.
Modified Paths:
--------------
code/trunk/src/pcre2_convert.c
code/trunk/testdata/testinput24
code/trunk/testdata/testoutput24
Modified: code/trunk/src/pcre2_convert.c
===================================================================
--- code/trunk/src/pcre2_convert.c 2017-05-11 16:49:58 UTC (rev 780)
+++ code/trunk/src/pcre2_convert.c 2017-05-13 17:46:27 UTC (rev 781)
@@ -118,7 +118,9 @@
PCRE2_UCHAR *endp = p + use_length - 1; /* Allow for trailing zero */
PCRE2_SIZE convlength = 0;
+uint32_t bracount = 0;
uint32_t posix_class_state = POSIX_CLASS_NOT_STARTED;
+uint32_t lastspecial = 0;
BOOL extended = (pattype & PCRE2_CONVERT_POSIX_EXTENDED) != 0;
BOOL inclass = FALSE;
BOOL nextisliteral = FALSE;
@@ -130,8 +132,14 @@
*bufflenptr = plength;
-/* Now scan the input */
+/* Now scan the input. In non-extended patterns, an initial asterisk is treated
+as literal. Still figuring out what happens in extended patterns... */
+if (plength > 0 && *posix == CHAR_ASTERISK)
+ {
+ if (!extended) nextisliteral = TRUE;
+ }
+
while (plength > 0)
{
uint32_t c, sc;
@@ -262,35 +270,56 @@
{
if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);
if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
- *p++ = *posix++;
+ lastspecial = *p++ = *posix++;
plength--;
}
else nextisliteral = TRUE;
break;
+ case CHAR_RIGHT_PARENTHESIS:
+ if (!extended || bracount == 0) goto ESCAPE_LITERAL;
+ bracount--;
+ goto COPY_SPECIAL;
+
+ case CHAR_LEFT_PARENTHESIS:
+ bracount++;
+ /* Fall through */
+
case CHAR_QUESTION_MARK:
case CHAR_PLUS:
case CHAR_LEFT_CURLY_BRACKET:
case CHAR_RIGHT_CURLY_BRACKET:
case CHAR_VERTICAL_LINE:
- case CHAR_LEFT_PARENTHESIS:
- case CHAR_RIGHT_PARENTHESIS:
- if (!extended) PUTCHARS(STR_BACKSLASH);
+ if (!extended) goto ESCAPE_LITERAL;
/* Fall through */
- case CHAR_ASTERISK:
case CHAR_DOT:
- case CHAR_CIRCUMFLEX_ACCENT:
case CHAR_DOLLAR_SIGN:
+ COPY_SPECIAL:
+ lastspecial = c;
if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
- *p++ = sc;
- break;
-
+ *p++ = c;
+ break;
+
+ case CHAR_ASTERISK:
+ if (lastspecial != CHAR_ASTERISK) goto COPY_SPECIAL;
+ break; /* Ignore second and subsequent asterisks */
+
+ case CHAR_CIRCUMFLEX_ACCENT:
+ if (extended ||
+ lastspecial == 0 ||
+ lastspecial == CHAR_LEFT_PARENTHESIS ||
+ lastspecial == CHAR_VERTICAL_LINE)
+ goto COPY_SPECIAL;
+ /* Fall through */
+
default:
if (c < 256 && strchr("\\{}?*+[]()|.^$", c) != NULL)
{
+ ESCAPE_LITERAL:
PUTCHARS(STR_BACKSLASH);
}
+ lastspecial = 0xff; /* Indicates nothing special */
if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
memcpy(p, posix - clength, CU2BYTES(clength));
p += clength;
Modified: code/trunk/testdata/testinput24
===================================================================
--- code/trunk/testdata/testinput24 2017-05-11 16:49:58 UTC (rev 780)
+++ code/trunk/testdata/testinput24 2017-05-13 17:46:27 UTC (rev 781)
@@ -247,6 +247,11 @@
\= Expect no match
aab
+/(ab)c)d]/
+ Xabc)d]Y
+
+/a***b/
+
#pattern convert=unset
#pattern convert=posix_basic
@@ -261,6 +266,18 @@
/^how to \^how to/
+/*abc/
+ X*abcY
+
+/**abc/
+ XabcY
+ X*abcY
+ X**abcY
+
+/^b\(c^d\)\(^e^f\)/
+
+/a***b/
+
#pattern convert=unset
/abc/
Modified: code/trunk/testdata/testoutput24
===================================================================
--- code/trunk/testdata/testoutput24 2017-05-11 16:49:58 UTC (rev 780)
+++ code/trunk/testdata/testoutput24 2017-05-13 17:46:27 UTC (rev 781)
@@ -396,6 +396,15 @@
aab
No match
+/(ab)c)d]/
+(ab)c\)d\]
+ Xabc)d]Y
+ 0: abc)d]
+ 1: ab
+
+/a***b/
+a*b
+
#pattern convert=unset
#pattern convert=posix_basic
@@ -417,6 +426,26 @@
/^how to \^how to/
^how to \^how to
+/*abc/
+\*abc
+ X*abcY
+ 0: *abc
+
+/**abc/
+\**abc
+ XabcY
+ 0: abc
+ X*abcY
+ 0: *abc
+ X**abcY
+ 0: **abc
+
+/^b\(c^d\)\(^e^f\)/
+^b(c\^d)(^e\^f)
+
+/a***b/
+a*b
+
#pattern convert=unset
/abc/