Revision: 640
http://vcs.pcre.org/viewvc?view=rev&revision=640
Author: ph10
Date: 2011-07-25 11:50:28 +0100 (Mon, 25 Jul 2011)
Log Message:
-----------
Fix three compile-time bugs (Bugzilla #1123).
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_compile.c
code/trunk/testdata/testinput1
code/trunk/testdata/testinput11
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput1
code/trunk/testdata/testoutput11
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2011-07-25 09:45:43 UTC (rev 639)
+++ code/trunk/ChangeLog 2011-07-25 10:50:28 UTC (rev 640)
@@ -180,7 +180,20 @@
34. A minor code tidy in pcre_compile() when checking options for \R usage.
+35. \g was being checked for fancy things in a character class, when it should
+ just be a literal "g".
+
+36. PCRE was rejecting [:a[:digit:]] whereas Perl was not. It seems that the
+ appearance of a nested POSIX class supersedes an apparent external class.
+ For example, [:a[:digit:]b:] matches "a", "b", ":", or a digit. Also,
+ unescaped square brackets may also appear as part of class names. For
+ example, [:a[:abc]b:] gives unknown class "[:abc]b:]". PCRE now behaves
+ more like Perl.
+
+37. PCRE was giving an error for \N with a braced quantifier such as {1,} (this
+ was because it thought it was \N{name}, which is not supported).
+
Version 8.12 15-Jan-2011
------------------------
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2011-07-25 09:45:43 UTC (rev 639)
+++ code/trunk/pcre_compile.c 2011-07-25 10:50:28 UTC (rev 640)
@@ -578,6 +578,39 @@
/*************************************************
+* Check for counted repeat *
+*************************************************/
+
+/* This function is called when a '{' is encountered in a place where it might
+start a quantifier. It looks ahead to see if it really is a quantifier or not.
+It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}
+where the ddds are digits.
+
+Arguments:
+ p pointer to the first char after '{'
+
+Returns: TRUE or FALSE
+*/
+
+static BOOL
+is_counted_repeat(const uschar *p)
+{
+if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
+while ((digitab[*p] & ctype_digit) != 0) p++;
+if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
+
+if (*p++ != CHAR_COMMA) return FALSE;
+if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
+
+if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
+while ((digitab[*p] & ctype_digit) != 0) p++;
+
+return (*p == CHAR_RIGHT_CURLY_BRACKET);
+}
+
+
+
+/*************************************************
* Handle escapes *
*************************************************/
@@ -648,7 +681,8 @@
*errorcodeptr = ERR37;
break;
- /* \g must be followed by one of a number of specific things:
+ /* In a character class, \g is just a literal "g". Outside a character
+ class, \g must be followed by one of a number of specific things:
(1) A number, either plain or braced. If positive, it is an absolute
backreference. If negative, it is a relative backreference. This is a Perl
@@ -665,6 +699,7 @@
the -ESC_g code (cf \k). */
case CHAR_g:
+ if (isclass) break;
if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)
{
c = -ESC_g;
@@ -886,9 +921,11 @@
}
/* Perl supports \N{name} for character names, as well as plain \N for "not
-newline". PCRE does not support \N{name}. */
+newline". PCRE does not support \N{name}. However, it does support
+quantification such as \N{2,3}. */
-if (c == -ESC_N && ptr[1] == CHAR_LEFT_CURLY_BRACKET)
+if (c == -ESC_N && ptr[1] == CHAR_LEFT_CURLY_BRACKET &&
+ !is_counted_repeat(ptr+2))
*errorcodeptr = ERR37;
/* If PCRE_UCP is set, we change the values for \d etc. */
@@ -998,39 +1035,6 @@
/*************************************************
-* Check for counted repeat *
-*************************************************/
-
-/* This function is called when a '{' is encountered in a place where it might
-start a quantifier. It looks ahead to see if it really is a quantifier or not.
-It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}
-where the ddds are digits.
-
-Arguments:
- p pointer to the first char after '{'
-
-Returns: TRUE or FALSE
-*/
-
-static BOOL
-is_counted_repeat(const uschar *p)
-{
-if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
-while ((digitab[*p] & ctype_digit) != 0) p++;
-if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
-
-if (*p++ != CHAR_COMMA) return FALSE;
-if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
-
-if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
-while ((digitab[*p] & ctype_digit) != 0) p++;
-
-return (*p == CHAR_RIGHT_CURLY_BRACKET);
-}
-
-
-
-/*************************************************
* Read repeat counts *
*************************************************/
@@ -2288,6 +2292,12 @@
"l\ower". This is a lesser evil that not diagnosing bad classes when Perl does,
I think.
+A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not.
+It seems that the appearance of a nested POSIX class supersedes an apparent
+external class. For example, [:a[:digit:]b:] matches "a", "b", ":", or
+a digit. Also, unescaped square brackets may also appear as part of class
+names. For example, [:a[:abc]b:] gives unknown class "[:abc]b:]"in Perl.
+
Arguments:
ptr pointer to the initial [
endptr where to return the end pointer
@@ -2302,14 +2312,20 @@
terminator = *(++ptr); /* compiler warns about "non-constant" initializer. */
for (++ptr; *ptr != 0; ptr++)
{
- if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) ptr++; else
+ if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
+ ptr++;
+ else
{
- if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
{
*endptr = ptr;
return TRUE;
}
+ if (*ptr == CHAR_LEFT_SQUARE_BRACKET &&
+ (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
+ ptr[1] == CHAR_EQUALS_SIGN) &&
+ check_posix_syntax(ptr, endptr))
+ return FALSE;
}
}
return FALSE;
Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1 2011-07-25 09:45:43 UTC (rev 639)
+++ code/trunk/testdata/testinput1 2011-07-25 10:50:28 UTC (rev 640)
@@ -4234,4 +4234,18 @@
abcxyz
pqrxyz
+/^[\g<a>]+/
+ ggg<<<aaa>>>
+ ** Failers
+ \\ga
+
+/^[\ga]+/
+ gggagagaxyz
+
+/^[:a[:digit:]]+/
+ aaaa444:::Z
+
+/^[:a[:digit:]:b]+/
+ aaaa444:::bbbZ
+
/-- End of testinput1 --/
Modified: code/trunk/testdata/testinput11
===================================================================
--- code/trunk/testdata/testinput11 2011-07-25 09:45:43 UTC (rev 639)
+++ code/trunk/testdata/testinput11 2011-07-25 10:50:28 UTC (rev 640)
@@ -648,4 +648,10 @@
/(?(DEFINE)(a))?b(?1)/
backgammon
+/^\N+/
+ abc\ndef
+
+/^\N{1,}/
+ abc\ndef
+
/-- End of testinput11 --/
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2011-07-25 09:45:43 UTC (rev 639)
+++ code/trunk/testdata/testinput2 2011-07-25 10:50:28 UTC (rev 640)
@@ -2337,7 +2337,7 @@
/\g6666666666/
-/[\g6666666666]/
+/[\g6666666666]/BZ
/(?1)\c[/
@@ -3816,4 +3816,6 @@
/(?<=(abc))?xyz/BZ
+/[:a[:abc]b:]/
+
/-- End of testinput2 --/
Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1 2011-07-25 09:45:43 UTC (rev 639)
+++ code/trunk/testdata/testoutput1 2011-07-25 10:50:28 UTC (rev 640)
@@ -6928,4 +6928,24 @@
pqrxyz
0: xyz
+/^[\g<a>]+/
+ ggg<<<aaa>>>
+ 0: ggg<<<aaa>>>
+ ** Failers
+No match
+ \\ga
+No match
+
+/^[\ga]+/
+ gggagagaxyz
+ 0: gggagaga
+
+/^[:a[:digit:]]+/
+ aaaa444:::Z
+ 0: aaaa444:::
+
+/^[:a[:digit:]:b]+/
+ aaaa444:::bbbZ
+ 0: aaaa444:::bbb
+
/-- End of testinput1 --/
Modified: code/trunk/testdata/testoutput11
===================================================================
--- code/trunk/testdata/testoutput11 2011-07-25 09:45:43 UTC (rev 639)
+++ code/trunk/testdata/testoutput11 2011-07-25 10:50:28 UTC (rev 640)
@@ -1225,4 +1225,12 @@
backgammon
0: ba
+/^\N+/
+ abc\ndef
+ 0: abc
+
+/^\N{1,}/
+ abc\ndef
+ 0: abc
+
/-- End of testinput11 --/
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2011-07-25 09:45:43 UTC (rev 639)
+++ code/trunk/testdata/testoutput2 2011-07-25 10:50:28 UTC (rev 640)
@@ -8936,8 +8936,13 @@
/\g6666666666/
Failed: number is too big at offset 11
-/[\g6666666666]/
-Failed: number is too big at offset 12
+/[\g6666666666]/BZ
+------------------------------------------------------------------
+ Bra
+ [6g]
+ Ket
+ End
+------------------------------------------------------------------
/(?1)\c[/
Failed: reference to non-existent subpattern at offset 3
@@ -12131,4 +12136,7 @@
End
------------------------------------------------------------------
+/[:a[:abc]b:]/
+Failed: unknown POSIX class name at offset 5
+
/-- End of testinput2 --/