[Pcre-svn] [640] code/trunk: Fix three compile-time bugs (Bu…

トップ ページ
このメッセージを削除
著者: Subversion repository
日付:  
To: pcre-svn
題目: [Pcre-svn] [640] code/trunk: Fix three compile-time bugs (Bugzilla #1123).
Revision: 640
          http://vcs.pcre.org/viewvc?view=rev&revision=640
Author:   ph10
Date:     2011-07-25 11:50:28 +0100 (Mon, 25 Jul 2011)


Log Message:
-----------
Fix three compile-time bugs (Bugzilla #1123).

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_compile.c
    code/trunk/testdata/testinput1
    code/trunk/testdata/testinput11
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput1
    code/trunk/testdata/testoutput11
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2011-07-25 09:45:43 UTC (rev 639)
+++ code/trunk/ChangeLog    2011-07-25 10:50:28 UTC (rev 640)
@@ -180,7 +180,20 @@


34. A minor code tidy in pcre_compile() when checking options for \R usage.

+35. \g was being checked for fancy things in a character class, when it should
+    just be a literal "g".
+    
+36. PCRE was rejecting [:a[:digit:]] whereas Perl was not. It seems that the
+    appearance of a nested POSIX class supersedes an apparent external class.
+    For example, [:a[:digit:]b:] matches "a", "b", ":", or a digit. Also,
+    unescaped square brackets may also appear as part of class names. For
+    example, [:a[:abc]b:] gives unknown class "[:abc]b:]". PCRE now behaves 
+    more like Perl.
+    
+37. PCRE was giving an error for \N with a braced quantifier such as {1,} (this 
+    was because it thought it was \N{name}, which is not supported).


+
Version 8.12 15-Jan-2011
------------------------


Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c    2011-07-25 09:45:43 UTC (rev 639)
+++ code/trunk/pcre_compile.c    2011-07-25 10:50:28 UTC (rev 640)
@@ -578,6 +578,39 @@



 /*************************************************
+*            Check for counted repeat            *
+*************************************************/
+
+/* This function is called when a '{' is encountered in a place where it might
+start a quantifier. It looks ahead to see if it really is a quantifier or not.
+It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}
+where the ddds are digits.
+
+Arguments:
+  p         pointer to the first char after '{'
+
+Returns:    TRUE or FALSE
+*/
+
+static BOOL
+is_counted_repeat(const uschar *p)
+{
+if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
+while ((digitab[*p] & ctype_digit) != 0) p++;
+if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
+
+if (*p++ != CHAR_COMMA) return FALSE;
+if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
+
+if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
+while ((digitab[*p] & ctype_digit) != 0) p++;
+
+return (*p == CHAR_RIGHT_CURLY_BRACKET);
+}
+
+
+
+/*************************************************
 *            Handle escapes                      *
 *************************************************/


@@ -648,7 +681,8 @@
     *errorcodeptr = ERR37;
     break;


-    /* \g must be followed by one of a number of specific things:
+    /* In a character class, \g is just a literal "g". Outside a character 
+    class, \g must be followed by one of a number of specific things:


     (1) A number, either plain or braced. If positive, it is an absolute
     backreference. If negative, it is a relative backreference. This is a Perl
@@ -665,6 +699,7 @@
     the -ESC_g code (cf \k). */


     case CHAR_g:
+    if (isclass) break;
     if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)
       {
       c = -ESC_g;
@@ -886,9 +921,11 @@
   }


/* Perl supports \N{name} for character names, as well as plain \N for "not
-newline". PCRE does not support \N{name}. */
+newline". PCRE does not support \N{name}. However, it does support
+quantification such as \N{2,3}. */

-if (c == -ESC_N && ptr[1] == CHAR_LEFT_CURLY_BRACKET)
+if (c == -ESC_N && ptr[1] == CHAR_LEFT_CURLY_BRACKET &&
+     !is_counted_repeat(ptr+2))
   *errorcodeptr = ERR37;


/* If PCRE_UCP is set, we change the values for \d etc. */
@@ -998,39 +1035,6 @@


 /*************************************************
-*            Check for counted repeat            *
-*************************************************/
-
-/* This function is called when a '{' is encountered in a place where it might
-start a quantifier. It looks ahead to see if it really is a quantifier or not.
-It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}
-where the ddds are digits.
-
-Arguments:
-  p         pointer to the first char after '{'
-
-Returns:    TRUE or FALSE
-*/
-
-static BOOL
-is_counted_repeat(const uschar *p)
-{
-if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
-while ((digitab[*p] & ctype_digit) != 0) p++;
-if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
-
-if (*p++ != CHAR_COMMA) return FALSE;
-if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
-
-if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
-while ((digitab[*p] & ctype_digit) != 0) p++;
-
-return (*p == CHAR_RIGHT_CURLY_BRACKET);
-}
-
-
-
-/*************************************************
 *         Read repeat counts                     *
 *************************************************/


@@ -2288,6 +2292,12 @@
"l\ower". This is a lesser evil that not diagnosing bad classes when Perl does,
I think.

+A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not.
+It seems that the appearance of a nested POSIX class supersedes an apparent
+external class. For example, [:a[:digit:]b:] matches "a", "b", ":", or
+a digit. Also, unescaped square brackets may also appear as part of class
+names. For example, [:a[:abc]b:] gives unknown class "[:abc]b:]"in Perl. 
+
 Arguments:
   ptr      pointer to the initial [
   endptr   where to return the end pointer
@@ -2302,14 +2312,20 @@
 terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
 for (++ptr; *ptr != 0; ptr++)
   {
-  if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) ptr++; else
+  if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) 
+    ptr++; 
+  else
     {
-    if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
     if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
       {
       *endptr = ptr;
       return TRUE;
       }
+    if (*ptr == CHAR_LEFT_SQUARE_BRACKET &&
+         (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
+          ptr[1] == CHAR_EQUALS_SIGN) &&
+        check_posix_syntax(ptr, endptr))
+      return FALSE;   
     }
   }
 return FALSE;


Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1    2011-07-25 09:45:43 UTC (rev 639)
+++ code/trunk/testdata/testinput1    2011-07-25 10:50:28 UTC (rev 640)
@@ -4234,4 +4234,18 @@
     abcxyz
     pqrxyz 


+/^[\g<a>]+/
+    ggg<<<aaa>>>
+    ** Failers
+    \\ga  
+    
+/^[\ga]+/
+    gggagagaxyz 
+    
+/^[:a[:digit:]]+/
+    aaaa444:::Z 
+
+/^[:a[:digit:]:b]+/
+    aaaa444:::bbbZ 
+
 /-- End of testinput1 --/


Modified: code/trunk/testdata/testinput11
===================================================================
--- code/trunk/testdata/testinput11    2011-07-25 09:45:43 UTC (rev 639)
+++ code/trunk/testdata/testinput11    2011-07-25 10:50:28 UTC (rev 640)
@@ -648,4 +648,10 @@
 /(?(DEFINE)(a))?b(?1)/
     backgammon


+/^\N+/
+    abc\ndef
+    
+/^\N{1,}/
+    abc\ndef 
+
 /-- End of testinput11 --/


Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2011-07-25 09:45:43 UTC (rev 639)
+++ code/trunk/testdata/testinput2    2011-07-25 10:50:28 UTC (rev 640)
@@ -2337,7 +2337,7 @@


/\g6666666666/

-/[\g6666666666]/
+/[\g6666666666]/BZ

/(?1)\c[/

@@ -3816,4 +3816,6 @@

/(?<=(abc))?xyz/BZ

+/[:a[:abc]b:]/
+
/-- End of testinput2 --/

Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1    2011-07-25 09:45:43 UTC (rev 639)
+++ code/trunk/testdata/testoutput1    2011-07-25 10:50:28 UTC (rev 640)
@@ -6928,4 +6928,24 @@
     pqrxyz 
  0: xyz


+/^[\g<a>]+/
+    ggg<<<aaa>>>
+ 0: ggg<<<aaa>>>
+    ** Failers
+No match
+    \\ga  
+No match
+    
+/^[\ga]+/
+    gggagagaxyz 
+ 0: gggagaga
+    
+/^[:a[:digit:]]+/
+    aaaa444:::Z 
+ 0: aaaa444:::
+
+/^[:a[:digit:]:b]+/
+    aaaa444:::bbbZ 
+ 0: aaaa444:::bbb
+
 /-- End of testinput1 --/


Modified: code/trunk/testdata/testoutput11
===================================================================
--- code/trunk/testdata/testoutput11    2011-07-25 09:45:43 UTC (rev 639)
+++ code/trunk/testdata/testoutput11    2011-07-25 10:50:28 UTC (rev 640)
@@ -1225,4 +1225,12 @@
     backgammon
  0: ba


+/^\N+/
+    abc\ndef
+ 0: abc
+    
+/^\N{1,}/
+    abc\ndef 
+ 0: abc
+
 /-- End of testinput11 --/


Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2011-07-25 09:45:43 UTC (rev 639)
+++ code/trunk/testdata/testoutput2    2011-07-25 10:50:28 UTC (rev 640)
@@ -8936,8 +8936,13 @@
 /\g6666666666/
 Failed: number is too big at offset 11


-/[\g6666666666]/
-Failed: number is too big at offset 12
+/[\g6666666666]/BZ
+------------------------------------------------------------------
+        Bra
+        [6g]
+        Ket
+        End
+------------------------------------------------------------------


 /(?1)\c[/
 Failed: reference to non-existent subpattern at offset 3
@@ -12131,4 +12136,7 @@
         End
 ------------------------------------------------------------------


+/[:a[:abc]b:]/
+Failed: unknown POSIX class name at offset 5
+
/-- End of testinput2 --/