Revision: 758
http://vcs.pcre.org/viewvc?view=rev&revision=758
Author: ph10
Date: 2011-11-21 12:05:36 +0000 (Mon, 21 Nov 2011)
Log Message:
-----------
Disallow \N in character classes, for Perl compatibility.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/doc/pcrepattern.3
code/trunk/pcre_compile.c
code/trunk/pcre_internal.h
code/trunk/pcreposix.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2011-11-21 11:44:55 UTC (rev 757)
+++ code/trunk/ChangeLog 2011-11-21 12:05:36 UTC (rev 758)
@@ -52,7 +52,10 @@
13. In non-UTF-8 mode, \C is now supported in lookbehinds and DFA matching.
+14. Perl does not support \N without a following name in a [] class; PCRE now
+ also gives an error.
+
Version 8.20 21-Oct-2011
------------------------
Modified: code/trunk/doc/pcrepattern.3
===================================================================
--- code/trunk/doc/pcrepattern.3 2011-11-21 11:44:55 UTC (rev 757)
+++ code/trunk/doc/pcrepattern.3 2011-11-21 12:05:36 UTC (rev 758)
@@ -328,12 +328,14 @@
zero, because no more than three octal digits are ever read.
.P
All the sequences that define a single character value can be used both inside
-and outside character classes. In addition, inside a character class, the
-sequence \eb is interpreted as the backspace character (hex 08). The sequences
-\eB, \eN, \eR, and \eX are not special inside a character class. Like any other
-unrecognized escape sequences, they are treated as the literal characters "B",
-"N", "R", and "X" by default, but cause an error if the PCRE_EXTRA option is
-set. Outside a character class, these sequences have different meanings.
+and outside character classes. In addition, inside a character class, \eb is
+interpreted as the backspace character (hex 08).
+.P
+\eN is not allowed in a character class. \eB, \eR, and \eX are not special
+inside a character class. Like other unrecognized escape sequences, they are
+treated as the literal characters "B", "R", and "X" by default, but cause an
+error if the PCRE_EXTRA option is set. Outside a character class, these
+sequences have different meanings.
.
.
.SS "Unsupported escape sequences"
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2011-11-21 11:44:55 UTC (rev 757)
+++ code/trunk/pcre_compile.c 2011-11-21 12:05:36 UTC (rev 758)
@@ -412,6 +412,7 @@
"\\k is not followed by a braced, angle-bracketed, or quoted name\0"
/* 70 */
"internal error: unknown opcode in find_fixedlength()\0"
+ "\\N is not supported in a class\0"
;
/* Table to identify digits and hex digits. This is used when compiling
@@ -3770,6 +3771,11 @@
if (*errorcodeptr != 0) goto FAILED;
if (-c == ESC_b) c = CHAR_BS; /* \b is backspace in a class */
+ else if (-c == ESC_N) /* \N is not supported in a class */
+ {
+ *errorcodeptr = ERR71;
+ goto FAILED;
+ }
else if (-c == ESC_Q) /* Handle start of quoted string */
{
if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h 2011-11-21 11:44:55 UTC (rev 757)
+++ code/trunk/pcre_internal.h 2011-11-21 12:05:36 UTC (rev 758)
@@ -1665,7 +1665,7 @@
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
- ERR70, ERRCOUNT };
+ ERR70, ERR71, ERRCOUNT };
/* The real format of the start of the pcre block; the index of names and the
code vector run on as long as necessary after the end. We store an explicit
Modified: code/trunk/pcreposix.c
===================================================================
--- code/trunk/pcreposix.c 2011-11-21 11:44:55 UTC (rev 757)
+++ code/trunk/pcreposix.c 2011-11-21 12:05:36 UTC (rev 758)
@@ -155,6 +155,7 @@
REG_BADPAT, /* \k is not followed by a braced, angle-bracketed, or quoted name */
/* 70 */
REG_BADPAT, /* internal error: unknown opcode in find_fixedlength() */
+ REG_BADPAT, /* \N is not supported in a class */
};
/* Table of texts corresponding to POSIX error codes */
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2011-11-21 11:44:55 UTC (rev 757)
+++ code/trunk/testdata/testinput2 2011-11-21 12:05:36 UTC (rev 758)
@@ -4009,4 +4009,9 @@
/(?<=ab\Cde)X/8
+/a[\NB]c/
+ aNc
+
+/a[B-\Nc]/
+
/-- End of testinput2 --/
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2011-11-21 11:44:55 UTC (rev 757)
+++ code/trunk/testdata/testoutput2 2011-11-21 12:05:36 UTC (rev 758)
@@ -12594,4 +12594,10 @@
/(?<=ab\Cde)X/8
Failed: \C not allowed in lookbehind assertion at offset 10
+/a[\NB]c/
+Failed: \N is not supported in a class at offset 3
+
+/a[B-\Nc]/
+Failed: \N is not supported in a class at offset 5
+
/-- End of testinput2 --/