Revision: 978
http://vcs.pcre.org/viewvc?view=rev&revision=978
Author: ph10
Date: 2012-06-17 17:55:07 +0100 (Sun, 17 Jun 2012)
Log Message:
-----------
Apply character value checks to \u.... in JavaScript mode, for compatibility
with \x{....} in non-JavaScript mode.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/doc/pcreapi.3
code/trunk/doc/pcrepattern.3
code/trunk/pcre_compile.c
code/trunk/pcre_internal.h
code/trunk/pcreposix.c
code/trunk/testdata/testinput14
code/trunk/testdata/testinput17
code/trunk/testdata/testinput5
code/trunk/testdata/testoutput14
code/trunk/testdata/testoutput17
code/trunk/testdata/testoutput5
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/ChangeLog 2012-06-17 16:55:07 UTC (rev 978)
@@ -132,7 +132,12 @@
37. Optimizing single character iterators in JIT.
+38. Wide characters specified with \uxxxx in JavaScript mode are now subject to
+ the same checks as \x{...} characters in non-JavaScript mode. Specifically,
+ codepoints that are too big for the mode are faulted, and in a UTF mode,
+ disallowed codepoints are also faulted.
+
Version 8.30 04-February-2012
-----------------------------
Modified: code/trunk/doc/pcreapi.3
===================================================================
--- code/trunk/doc/pcreapi.3 2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/doc/pcreapi.3 2012-06-17 16:55:07 UTC (rev 978)
@@ -927,6 +927,7 @@
73 disallowed Unicode code point (>= 0xd800 && <= 0xdfff)
74 invalid UTF-16 string (specifically UTF-16)
75 name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
+ 76 character value in \eu.... sequence is too large
.sp
The numbers 32 and 10000 in errors 48 and 49 are defaults; different values may
be used if the limits were changed when PCRE was built.
@@ -2666,6 +2667,6 @@
.rs
.sp
.nf
-Last updated: 04 May 2012
+Last updated: 17 June 2012
Copyright (c) 1997-2012 University of Cambridge.
.fi
Modified: code/trunk/doc/pcrepattern.3
===================================================================
--- code/trunk/doc/pcrepattern.3 2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/doc/pcrepattern.3 2012-06-17 16:55:07 UTC (rev 978)
@@ -277,6 +277,8 @@
Otherwise, it matches a literal "x" character. In JavaScript mode, support for
code points greater than 256 is provided by \eu, which must be followed by
four hexadecimal digits; otherwise it matches a literal "u" character.
+Character codes specified by \eu in JavaScript mode are constrained in the same
+was as those specified by \ex in non-JavaScript mode.
.P
Characters whose value is less than 256 can be defined by either of the two
syntaxes for \ex (or by \eu in JavaScript mode). There is no difference in the
@@ -2911,6 +2913,6 @@
.rs
.sp
.nf
-Last updated: 01 June 2012
+Last updated: 17 June 2012
Copyright (c) 1997-2012 University of Cambridge.
.fi
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/pcre_compile.c 2012-06-17 16:55:07 UTC (rev 978)
@@ -491,6 +491,7 @@
"invalid UTF-16 string\0"
/* 75 */
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
+ "character value in \\u.... sequence is too large\0"
;
/* Table to identify digits and hex digits. This is used when compiling
@@ -831,6 +832,18 @@
c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
#endif
}
+
+#ifdef COMPILE_PCRE8
+ if (c > (utf ? 0x10ffff : 0xff))
+#else
+#ifdef COMPILE_PCRE16
+ if (c > (utf ? 0x10ffff : 0xffff))
+#endif
+#endif
+ {
+ *errorcodeptr = ERR76;
+ }
+ else if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
}
}
else
Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h 2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/pcre_internal.h 2012-06-17 16:55:07 UTC (rev 978)
@@ -1945,7 +1945,7 @@
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
- ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERRCOUNT };
+ ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERRCOUNT };
/* JIT compiling modes. The function list is indexed by them. */
enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
Modified: code/trunk/pcreposix.c
===================================================================
--- code/trunk/pcreposix.c 2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/pcreposix.c 2012-06-17 16:55:07 UTC (rev 978)
@@ -160,7 +160,8 @@
REG_BADPAT, /* disallowed UTF-8/16 code point (>= 0xd800 && <= 0xdfff) */
REG_BADPAT, /* invalid UTF-16 string (should not occur) */
/* 75 */
- REG_BADPAT /* overlong MARK name */
+ REG_BADPAT, /* overlong MARK name */
+ REG_BADPAT /* character value in \u.... sequence is too large */
};
/* Table of texts corresponding to POSIX error codes */
Modified: code/trunk/testdata/testinput14
===================================================================
--- code/trunk/testdata/testinput14 2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/testdata/testinput14 2012-06-17 16:55:07 UTC (rev 978)
@@ -320,4 +320,8 @@
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/K
XX
+/\u0100/<JS>
+
+/[\u0100-\u0200]/<JS>
+
/-- End of testinput14 --/
Modified: code/trunk/testdata/testinput17
===================================================================
--- code/trunk/testdata/testinput17 2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/testdata/testinput17 2012-06-17 16:55:07 UTC (rev 978)
@@ -286,4 +286,10 @@
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/K
XX
+/\u0100/<JS>BZ
+
+/[\u0100-\u0200]/<JS>BZ
+
+/\ud800/<JS>BZ
+
/-- End of testinput17 --/
Modified: code/trunk/testdata/testinput5
===================================================================
--- code/trunk/testdata/testinput5 2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/testdata/testinput5 2012-06-17 16:55:07 UTC (rev 978)
@@ -765,4 +765,10 @@
/(?<!^)ETA/8
ETA
+/\u0100/<JS>8BZ
+
+/[\u0100-\u0200]/<JS>8BZ
+
+/\ud800/<JS>8
+
/-- End of testinput5 --/
Modified: code/trunk/testdata/testoutput14
===================================================================
--- code/trunk/testdata/testoutput14 2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/testdata/testoutput14 2012-06-17 16:55:07 UTC (rev 978)
@@ -461,4 +461,10 @@
0: XX
MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE
+/\u0100/<JS>
+Failed: character value in \u.... sequence is too large at offset 5
+
+/[\u0100-\u0200]/<JS>
+Failed: character value in \u.... sequence is too large at offset 6
+
/-- End of testinput14 --/
Modified: code/trunk/testdata/testoutput17
===================================================================
--- code/trunk/testdata/testoutput17 2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/testdata/testoutput17 2012-06-17 16:55:07 UTC (rev 978)
@@ -516,4 +516,28 @@
0: XX
MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE
+/\u0100/<JS>BZ
+------------------------------------------------------------------
+ Bra
+ \x{100}
+ Ket
+ End
+------------------------------------------------------------------
+
+/[\u0100-\u0200]/<JS>BZ
+------------------------------------------------------------------
+ Bra
+ [\x{100}-\x{200}]
+ Ket
+ End
+------------------------------------------------------------------
+
+/\ud800/<JS>BZ
+------------------------------------------------------------------
+ Bra
+ \x{d800}
+ Ket
+ End
+------------------------------------------------------------------
+
/-- End of testinput17 --/
Modified: code/trunk/testdata/testoutput5
===================================================================
--- code/trunk/testdata/testoutput5 2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/testdata/testoutput5 2012-06-17 16:55:07 UTC (rev 978)
@@ -1829,4 +1829,23 @@
ETA
No match
+/\u0100/<JS>8BZ
+------------------------------------------------------------------
+ Bra
+ \x{100}
+ Ket
+ End
+------------------------------------------------------------------
+
+/[\u0100-\u0200]/<JS>8BZ
+------------------------------------------------------------------
+ Bra
+ [\x{100}-\x{200}]
+ Ket
+ End
+------------------------------------------------------------------
+
+/\ud800/<JS>8
+Failed: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) at offset 5
+
/-- End of testinput5 --/