[Pcre-svn] [781] code/trunk: Updates to experimental convers…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [781] code/trunk: Updates to experimental conversion code.
Revision: 781
          http://www.exim.org/viewvc/pcre2?view=rev&revision=781
Author:   ph10
Date:     2017-05-13 18:46:27 +0100 (Sat, 13 May 2017)
Log Message:
-----------
Updates to experimental conversion code.


Modified Paths:
--------------
    code/trunk/src/pcre2_convert.c
    code/trunk/testdata/testinput24
    code/trunk/testdata/testoutput24


Modified: code/trunk/src/pcre2_convert.c
===================================================================
--- code/trunk/src/pcre2_convert.c    2017-05-11 16:49:58 UTC (rev 780)
+++ code/trunk/src/pcre2_convert.c    2017-05-13 17:46:27 UTC (rev 781)
@@ -118,7 +118,9 @@
 PCRE2_UCHAR *endp = p + use_length - 1;  /* Allow for trailing zero */
 PCRE2_SIZE convlength = 0;


+uint32_t bracount = 0;
uint32_t posix_class_state = POSIX_CLASS_NOT_STARTED;
+uint32_t lastspecial = 0;
BOOL extended = (pattype & PCRE2_CONVERT_POSIX_EXTENDED) != 0;
BOOL inclass = FALSE;
BOOL nextisliteral = FALSE;
@@ -130,8 +132,14 @@

*bufflenptr = plength;

-/* Now scan the input */
+/* Now scan the input. In non-extended patterns, an initial asterisk is treated
+as literal. Still figuring out what happens in extended patterns... */

+if (plength > 0 && *posix == CHAR_ASTERISK)
+  {
+  if (!extended) nextisliteral = TRUE; 
+  } 
+
 while (plength > 0)
   {
   uint32_t c, sc;
@@ -262,35 +270,56 @@
       {
       if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH); 
       if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
-      *p++ = *posix++;
+      lastspecial = *p++ = *posix++;
       plength--;  
       }
     else nextisliteral = TRUE;
     break;


+    case CHAR_RIGHT_PARENTHESIS:
+    if (!extended || bracount == 0) goto ESCAPE_LITERAL;
+    bracount--;
+    goto COPY_SPECIAL;
+
+    case CHAR_LEFT_PARENTHESIS:
+    bracount++;
+    /* Fall through */  
+
     case CHAR_QUESTION_MARK:
     case CHAR_PLUS:
     case CHAR_LEFT_CURLY_BRACKET:   
     case CHAR_RIGHT_CURLY_BRACKET:   
     case CHAR_VERTICAL_LINE:
-    case CHAR_LEFT_PARENTHESIS:
-    case CHAR_RIGHT_PARENTHESIS:
-    if (!extended) PUTCHARS(STR_BACKSLASH);
+    if (!extended) goto ESCAPE_LITERAL;
     /* Fall through */ 


-    case CHAR_ASTERISK:
     case CHAR_DOT:
-    case CHAR_CIRCUMFLEX_ACCENT:
     case CHAR_DOLLAR_SIGN:   
+    COPY_SPECIAL:
+    lastspecial = c; 
     if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
-    *p++ = sc;
-    break;  
-    
+    *p++ = c;
+    break; 
+
+    case CHAR_ASTERISK:
+    if (lastspecial != CHAR_ASTERISK) goto COPY_SPECIAL;
+    break;   /* Ignore second and subsequent asterisks */  
+
+    case CHAR_CIRCUMFLEX_ACCENT:
+    if (extended || 
+          lastspecial == 0 || 
+          lastspecial == CHAR_LEFT_PARENTHESIS ||
+          lastspecial == CHAR_VERTICAL_LINE) 
+      goto COPY_SPECIAL;
+    /* Fall through */      
+
     default:
     if (c < 256 && strchr("\\{}?*+[]()|.^$", c) != NULL)
       {
+      ESCAPE_LITERAL: 
       PUTCHARS(STR_BACKSLASH);
       }
+    lastspecial = 0xff;  /* Indicates nothing special */   
     if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
     memcpy(p, posix - clength, CU2BYTES(clength));
     p += clength;


Modified: code/trunk/testdata/testinput24
===================================================================
--- code/trunk/testdata/testinput24    2017-05-11 16:49:58 UTC (rev 780)
+++ code/trunk/testdata/testinput24    2017-05-13 17:46:27 UTC (rev 781)
@@ -247,6 +247,11 @@
 \= Expect no match
     aab


+/(ab)c)d]/
+    Xabc)d]Y
+
+/a***b/
+
 #pattern convert=unset
 #pattern convert=posix_basic


@@ -261,6 +266,18 @@

/^how to \^how to/

+/*abc/
+    X*abcY
+
+/**abc/
+    XabcY
+    X*abcY
+    X**abcY  
+
+/^b\(c^d\)\(^e^f\)/
+
+/a***b/
+
 #pattern convert=unset


/abc/

Modified: code/trunk/testdata/testoutput24
===================================================================
--- code/trunk/testdata/testoutput24    2017-05-11 16:49:58 UTC (rev 780)
+++ code/trunk/testdata/testoutput24    2017-05-13 17:46:27 UTC (rev 781)
@@ -396,6 +396,15 @@
     aab
 No match


+/(ab)c)d]/
+(ab)c\)d\]
+    Xabc)d]Y
+ 0: abc)d]
+ 1: ab
+
+/a***b/
+a*b
+
 #pattern convert=unset
 #pattern convert=posix_basic


@@ -417,6 +426,26 @@
/^how to \^how to/
^how to \^how to

+/*abc/
+\*abc
+    X*abcY
+ 0: *abc
+
+/**abc/
+\**abc
+    XabcY
+ 0: abc
+    X*abcY
+ 0: *abc
+    X**abcY  
+ 0: **abc
+
+/^b\(c^d\)\(^e^f\)/
+^b(c\^d)(^e\^f)
+
+/a***b/
+a*b
+
 #pattern convert=unset


/abc/