[Pcre-svn] [804] code/trunk: Support invalid character class…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [804] code/trunk: Support invalid character classes in conversion.
Revision: 804
          http://www.exim.org/viewvc/pcre2?view=rev&revision=804
Author:   zherczeg
Date:     2017-05-25 14:19:42 +0100 (Thu, 25 May 2017)
Log Message:
-----------
Support invalid character classes in conversion.


Modified Paths:
--------------
    code/trunk/src/pcre2_convert.c
    code/trunk/testdata/testinput24
    code/trunk/testdata/testoutput24


Modified: code/trunk/src/pcre2_convert.c
===================================================================
--- code/trunk/src/pcre2_convert.c    2017-05-24 15:22:03 UTC (rev 803)
+++ code/trunk/src/pcre2_convert.c    2017-05-25 13:19:42 UTC (rev 804)
@@ -471,8 +471,8 @@
   pattern_end    end of pattern
   out            output context


-Returns:      TRUE => success
-             FALSE => malformed class
+Returns:  >0 => class index
+          0  => malformed class
 */


static int
@@ -481,48 +481,31 @@
{
static const char *posix_classes = "alnum:alpha:ascii:blank:cntrl:digit:"
"graph:lower:print:punct:space:upper:word:xdigit:";
-PCRE2_SPTR pattern = *from;
-PCRE2_SPTR start;
+PCRE2_SPTR start = *from + 1;
+PCRE2_SPTR pattern = start;
const char *class_ptr;
PCRE2_UCHAR c;
+int class_index;

-out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
-out->out_str[1] = CHAR_COLON;
-convert_glob_write_str(out, 2);
-
 while (TRUE)
   {
-  if (pattern >= pattern_end)
-    {
-    *from = pattern;
-    return ERROR_MISSING_SQUARE_BRACKET;
-    }
+  if (pattern >= pattern_end) return 0;


c = *pattern++;

-  if (c == CHAR_COLON && pattern < pattern_end &&
-      *pattern == CHAR_RIGHT_SQUARE_BRACKET)
-    {
-      break;
-    }
+  if (c < CHAR_a || c > CHAR_z) break;
+  }


-  if (c < CHAR_a || c > CHAR_z)
-    {
-    /* All POSIX class is composed of lowercase characters */
-    *from = pattern;
-    return ERROR_MISSING_SQUARE_BRACKET;
-    }
+if (c != CHAR_COLON || pattern >= pattern_end ||
+    *pattern != CHAR_RIGHT_SQUARE_BRACKET)
+  return 0;


- convert_glob_write(out, c);
- }
-
-start = *from;
-*from = pattern + 1;
class_ptr = posix_classes;
+class_index = 0;

while (TRUE)
{
- if (*class_ptr == CHAR_NULL) return ERROR_UNKNOWN_POSIX_CLASS;
+ if (*class_ptr == CHAR_NULL) return 0;

pattern = start;

@@ -530,10 +513,13 @@
     {
     if (*pattern == CHAR_COLON)
       {
-      out->out_str[0] = CHAR_COLON;
-      out->out_str[1] = CHAR_RIGHT_SQUARE_BRACKET;
-      convert_glob_write_str(out, 2);
-      return 0;
+      pattern += 2;
+      start -= 2;
+
+      do convert_glob_write(out, *start++); while (start < pattern);
+
+      *from = pattern;
+      return class_index;
       }
     pattern++;
     class_ptr++;
@@ -541,10 +527,42 @@


while (*class_ptr != CHAR_COLON) class_ptr++;
class_ptr++;
+ class_index++;
}
}

+/* Checks whether the character is in the class.

+Arguments:
+  class_index    class index
+  c              character
+
+Returns:   !0 => character is found in the class
+            0 => otherwise
+*/
+
+static BOOL
+convert_glob_char_in_class(int class_index, PCRE2_UCHAR c)
+{
+switch (class_index)
+  {
+  case 0: return isalnum(c);
+  case 1: return isalpha(c);
+  case 2: return 1;
+  case 3: return c == CHAR_HT || c == CHAR_SPACE;
+  case 4: return iscntrl(c);
+  case 5: return isdigit(c);
+  case 6: return isgraph(c);
+  case 7: return islower(c);
+  case 8: return isprint(c);
+  case 9: return ispunct(c);
+  case 10: return isspace(c);
+  case 11: return isupper(c);
+  case 12: return isalnum(c) || c == CHAR_UNDERSCORE;
+  default: return isxdigit(c);
+  }
+}
+
 /* Parse a range of characters.


Arguments:
@@ -569,7 +587,7 @@
PCRE2_SPTR pattern = *from;
PCRE2_SPTR char_start = NULL;
uint32_t c, prev_c;
-int result, len;
+int len, class_index;

(void)utf; /* Avoid compiler warning. */

@@ -653,17 +671,21 @@

   if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
     {
-    *from = pattern + 1;
+    *from = pattern;
+    class_index = convert_glob_parse_class(from, pattern_end, out);


-    result = convert_glob_parse_class(from, pattern_end, out);
-    if (result != 0) return result;
+    if (class_index != 0)
+      {
+      pattern = *from;


-    pattern = *from;
+      has_prev_c = FALSE;
+      prev_c = 0;


-    has_prev_c = FALSE;
-    prev_c = 0;
-    separator_seen = TRUE;
-    continue;
+      if (!is_negative &&
+          convert_glob_char_in_class (class_index, separator))
+        separator_seen = TRUE;
+      continue;
+      }
     }
   else if (c == CHAR_MINUS && has_prev_c &&
            *pattern != CHAR_RIGHT_SQUARE_BRACKET)


Modified: code/trunk/testdata/testinput24
===================================================================
--- code/trunk/testdata/testinput24    2017-05-24 15:22:03 UTC (rev 803)
+++ code/trunk/testdata/testinput24    2017-05-25 13:19:42 UTC (rev 804)
@@ -237,6 +237,8 @@


/[[:alpha:]-a]/

+/[[:alpha:]][[:punct:]][[:ascii:]]/
+
/[a-[:alpha:]]/

/[[:alpha:/

Modified: code/trunk/testdata/testoutput24
===================================================================
--- code/trunk/testdata/testoutput24    2017-05-24 15:22:03 UTC (rev 803)
+++ code/trunk/testdata/testoutput24    2017-05-25 13:19:42 UTC (rev 804)
@@ -179,12 +179,12 @@
 No match


 '[[:alpha:]][[:digit:]][[:upper:]]'
-(?s)\A[[:alpha:]](?<!/)[[:digit:]](?<!/)[[:upper:]](?<!/)\z
+(?s)\A[[:alpha:]][[:digit:]][[:upper:]]\z
     a1B
  0: a1B


 '[[:digit:][:upper:][:space:]]'
-(?s)\A[[:digit:][:upper:][:space:]](?<!/)\z
+(?s)\A[[:digit:][:upper:][:space:]]\z
     A
  0: A
     1
@@ -198,7 +198,7 @@
 No match


 '[a-c[:digit:]x-z]'
-(?s)\A[a-c[:digit:]x-z](?<!/)\z
+(?s)\A[a-c[:digit:]x-z]\z
     5
  0: 5
     b
@@ -294,7 +294,7 @@
  0: <->


 /a[[:digit:].]z/
-(?s)\Aa[[:digit:].](?<!/)z\z
+(?s)\Aa[[:digit:].]z\z
     a1z
  0: a1z
     a.z
@@ -304,20 +304,29 @@
 No match


 /a[[:digit].]z/
-** Pattern conversion error at offset 10: missing terminating ] for character class
+(?s)\Aa[\[:digit]\.\]z\z
     a[.]z
+ 0: a[.]z
     a:.]z
+ 0: a:.]z
     ad.]z
+ 0: ad.]z


 /<[[:a[:digit:]b]>/
-** Pattern conversion error at offset 6: missing terminating ] for character class
+(?s)\A<[\[:a[:digit:]b]>\z
     <[>
+ 0: <[>
     <:>
+ 0: <:>
     <a>
+ 0: <a>
     <9>
+ 0: <9>
     <b>
+ 0: <b>
 \= Expect no match
     <d>
+No match


/a*b/convert_glob_separator=\
(?s)\Aa(*COMMIT)[^\\]*?b\z
@@ -349,7 +358,7 @@
(?s)\A[^/a\\bc][^/\]][^/\-][^/\]\-]\z

/[[:alpha:][:xdigit:][:word:]]/
-(?s)\A[[:alpha:][:xdigit:][:word:]](?<!/)\z
+(?s)\A[[:alpha:][:xdigit:][:word:]]\z

"[/-/]"
(?s)\A[/-/](?<!/)\z
@@ -364,8 +373,11 @@
(?s)\A[^/\--\-\--\-]\z

/[[:alpha:]-a]/
-(?s)\A[[:alpha:]\-a](?<!/)\z
+(?s)\A[[:alpha:]\-a]\z

+/[[:alpha:]][[:punct:]][[:ascii:]]/
+(?s)\A[[:alpha:]][[:punct:]](?<!/)[[:ascii:]](?<!/)\z
+
/[a-[:alpha:]]/
** Pattern conversion error at offset 4: invalid syntax

@@ -376,13 +388,13 @@
** Pattern conversion error at offset 10: missing terminating ] for character class

/[[:alphaa:]]/
-** Pattern conversion error at offset 11: unknown POSIX class name
+(?s)\A[\[:alphaa:]\]\z

/[[:xdigi:]]/
-** Pattern conversion error at offset 10: unknown POSIX class name
+(?s)\A[\[:xdigi:]\]\z

/[[:xdigit::]]/
-** Pattern conversion error at offset 10: missing terminating ] for character class
+(?s)\A[\[:xdigit::]\]\z

/****/
(?s)