[Pcre-svn] [809] code/trunk: Update POSIX basic regex conver…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [809] code/trunk: Update POSIX basic regex conversion code.
Revision: 809
          http://www.exim.org/viewvc/pcre2?view=rev&revision=809
Author:   ph10
Date:     2017-05-27 18:08:28 +0100 (Sat, 27 May 2017)
Log Message:
-----------
Update POSIX basic regex conversion code.


Modified Paths:
--------------
    code/trunk/src/pcre2_convert.c
    code/trunk/testdata/testinput24
    code/trunk/testdata/testoutput24


Modified: code/trunk/src/pcre2_convert.c
===================================================================
--- code/trunk/src/pcre2_convert.c    2017-05-27 16:06:56 UTC (rev 808)
+++ code/trunk/src/pcre2_convert.c    2017-05-27 17:08:28 UTC (rev 809)
@@ -74,6 +74,7 @@
 #define STR_DOT_STAR_LOOKBEHIND STR_DOT STR_ASTERISK STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_LESS_THAN_SIGN STR_EQUALS_SIGN
 #define STR_LOOKAHEAD_NOT_DOT STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_EXCLAMATION_MARK STR_BACKSLASH STR_DOT STR_RIGHT_PARENTHESIS
 #define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS
+#define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS


/* States for range and POSIX processing */

@@ -101,12 +102,12 @@
STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS;

-/* Recognized escapes in POSIX basic patterns. */
+/* Recognized escaped metacharacters in POSIX basic patterns. */

-static const char *posix_basic_escapes =
- STR_QUESTION_MARK STR_PLUS STR_VERTICAL_LINE
+static const char *posix_meta_escapes =
STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS
- STR_0 STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9;
+ STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
+ STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9;



@@ -155,6 +156,7 @@
/* Initialize default for error offset as end of input. */

*bufflenptr = plength;
+PUTCHARS(STR_STAR_NUL);

/* Now scan the input. */

@@ -237,7 +239,9 @@
     case CHAR_LEFT_SQUARE_BRACKET:
     PUTCHARS(STR_LEFT_SQUARE_BRACKET);


-    /* Handle special cases [[:<:]] and [[:>:]] (which PCRE does support) */
+#ifdef NEVER
+    /* We could handle special cases [[:<:]] and [[:>:]] (which PCRE does
+    support) but they are not part of POSIX 1003.1. */


     if (plength >= 6)
       {
@@ -257,8 +261,9 @@
         continue;  /* With next character */
         }
       }
+#endif       


-    /* Handle "normal" character classes */
+    /* Handle start of "normal" character classes */


     posix_state = POSIX_CLASS_NOT_STARTED;


@@ -283,15 +288,17 @@

     case CHAR_BACKSLASH:
     if (plength <= 0) return ERROR_END_BACKSLASH;
-    if (!extended && *posix < 127 &&
-          strchr(posix_basic_escapes, *posix) != NULL)
-      {
-      if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);
-      if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
-      lastspecial = *p++ = *posix++;
-      plength--;
+    if (extended) nextisliteral = TRUE; else
+      { 
+      if (*posix < 127 && strchr(posix_meta_escapes, *posix) != NULL)
+        {
+        if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);
+        if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
+        lastspecial = *p++ = *posix++;
+        plength--;
+        }
+      else nextisliteral = TRUE;  
       }
-    else nextisliteral = TRUE;
     break;


     case CHAR_RIGHT_PARENTHESIS:
@@ -323,7 +330,8 @@
     case CHAR_ASTERISK:
     if (lastspecial != CHAR_ASTERISK)
       {
-      if (!extended && posix_state < POSIX_NOT_BRACKET)
+      if (!extended && (posix_state < POSIX_NOT_BRACKET ||
+          lastspecial == CHAR_LEFT_PARENTHESIS))
         goto ESCAPE_LITERAL;
       goto COPY_SPECIAL;
       }


Modified: code/trunk/testdata/testinput24
===================================================================
--- code/trunk/testdata/testinput24    2017-05-27 16:06:56 UTC (rev 808)
+++ code/trunk/testdata/testinput24    2017-05-27 17:08:28 UTC (rev 809)
@@ -323,10 +323,10 @@


/a`*b/convert_glob_escape=x

+# -------- Tests of extended POSIX conversion --------
+
#pattern convert=unset:posix_extended

-/a[[:>:]z/
-
 /<[[:a[:digit:]b]>/
     <[>
     <:>
@@ -338,8 +338,6 @@


/a+\1b\\c|d[ab\c]/

-/a[[:<:]]b[[:>:]]/
-
 /<[]bc]>/
     <]>
     <b>
@@ -361,6 +359,8 @@


/a***b/

+# -------- Tests of basic POSIX conversion --------
+
#pattern convert=unset:posix_basic

 /a*b+c\+[def](ab)\(cd\)/
@@ -371,6 +371,9 @@
     a1b


 /how.to how\.to/
+    how\nto how.to
+\= Expect no match     
+    how\x{0}to how.to


/^how to \^how to/

@@ -383,13 +386,11 @@
     XabcY
     X*abcY
     X**abcY
+    
+/*ab\(*cd\)/ 


/^b\(c^d\)\(^e^f\)/

/a***b/

-#pattern convert=unset
-
-/abc/
-
# End of testinput24

Modified: code/trunk/testdata/testoutput24
===================================================================
--- code/trunk/testdata/testoutput24    2017-05-27 16:06:56 UTC (rev 808)
+++ code/trunk/testdata/testoutput24    2017-05-27 17:08:28 UTC (rev 809)
@@ -508,14 +508,12 @@
 /a`*b/convert_glob_escape=x
 ** Invalid glob escape 'x'


+# -------- Tests of extended POSIX conversion --------
+
#pattern convert=unset:posix_extended

-/a[[:>:]z/
-a[[:>:]z
-Failed: error 130 at offset 4: unknown POSIX class name
-
 /<[[:a[:digit:]b]>/
-<[[:a[:digit:]b]>
+(*NUL)<[[:a[:digit:]b]>
     <[>
  0: <[>
     <:>
@@ -531,13 +529,10 @@
 No match


/a+\1b\\c|d[ab\c]/
-a+1b\\c|d[ab\\c]
+(*NUL)a+1b\\c|d[ab\\c]

-/a[[:<:]]b[[:>:]]/
-a[[:<:]]b[[:>:]]
-
 /<[]bc]>/
-<[]bc]>
+(*NUL)<[]bc]>
     <]>
  0: <]>
     <b>
@@ -546,7 +541,7 @@
  0: <c>


 /<[^]bc]>/
-<[^]bc]>
+(*NUL)<[^]bc]>
     <.>
  0: <.>
 \= Expect no match
@@ -556,7 +551,7 @@
 No match


 /(a)\1b/
-(a)1b
+(*NUL)(a)1b
     a1b
  0: a1b
  1: a
@@ -565,21 +560,23 @@
 No match


 /(ab)c)d]/
-(ab)c\)d\]
+(*NUL)(ab)c\)d\]
     Xabc)d]Y
  0: abc)d]
  1: ab


/a***b/
-a*b
+(*NUL)a*b

+# -------- Tests of basic POSIX conversion --------
+
#pattern convert=unset:posix_basic

/a*b+c\+[def](ab)\(cd\)/
-a*b\+c+[def]\(ab\)(cd)
+(*NUL)a*b\+c\+[def]\(ab\)(cd)

 /\(a\)\1b/
-(a)\1b
+(*NUL)(a)\1b
     aab
  0: aab
  1: a
@@ -588,21 +585,26 @@
 No match


 /how.to how\.to/
-how.to how\.to
+(*NUL)how.to how\.to
+    how\nto how.to
+ 0: how\x0ato how.to
+\= Expect no match     
+    how\x{0}to how.to
+No match


/^how to \^how to/
-^how to \^how to
+(*NUL)^how to \^how to

/^*abc/
-^\*abc
+(*NUL)^\*abc

 /*abc/
-\*abc
+(*NUL)\*abc
     X*abcY
  0: *abc


 /**abc/
-\**abc
+(*NUL)\**abc
     XabcY
  0: abc
     X*abcY
@@ -609,15 +611,14 @@
  0: *abc
     X**abcY
  0: **abc
+    
+/*ab\(*cd\)/ 
+(*NUL)\*ab(\*cd)


/^b\(c^d\)\(^e^f\)/
-^b(c\^d)(^e\^f)
+(*NUL)^b(c\^d)(^e\^f)

/a***b/
-a*b
+(*NUL)a*b

-#pattern convert=unset
-
-/abc/
-
# End of testinput24