[Pcre-svn] [978] code/trunk: Apply character value checks to…

Startseite
Nachricht löschen
Autor: Subversion repository
Datum:  
To: pcre-svn
Betreff: [Pcre-svn] [978] code/trunk: Apply character value checks to \u....
Revision: 978
          http://vcs.pcre.org/viewvc?view=rev&revision=978
Author:   ph10
Date:     2012-06-17 17:55:07 +0100 (Sun, 17 Jun 2012)


Log Message:
-----------
Apply character value checks to \u.... in JavaScript mode, for compatibility
with \x{....} in non-JavaScript mode.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/doc/pcreapi.3
    code/trunk/doc/pcrepattern.3
    code/trunk/pcre_compile.c
    code/trunk/pcre_internal.h
    code/trunk/pcreposix.c
    code/trunk/testdata/testinput14
    code/trunk/testdata/testinput17
    code/trunk/testdata/testinput5
    code/trunk/testdata/testoutput14
    code/trunk/testdata/testoutput17
    code/trunk/testdata/testoutput5


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/ChangeLog    2012-06-17 16:55:07 UTC (rev 978)
@@ -132,7 +132,12 @@


37. Optimizing single character iterators in JIT.

+38. Wide characters specified with \uxxxx in JavaScript mode are now subject to
+    the same checks as \x{...} characters in non-JavaScript mode. Specifically, 
+    codepoints that are too big for the mode are faulted, and in a UTF mode, 
+    disallowed codepoints are also faulted. 


+
Version 8.30 04-February-2012
-----------------------------


Modified: code/trunk/doc/pcreapi.3
===================================================================
--- code/trunk/doc/pcreapi.3    2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/doc/pcreapi.3    2012-06-17 16:55:07 UTC (rev 978)
@@ -927,6 +927,7 @@
   73  disallowed Unicode code point (>= 0xd800 && <= 0xdfff)
   74  invalid UTF-16 string (specifically UTF-16)
   75  name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
+  76  character value in \eu.... sequence is too large
 .sp
 The numbers 32 and 10000 in errors 48 and 49 are defaults; different values may
 be used if the limits were changed when PCRE was built.
@@ -2666,6 +2667,6 @@
 .rs
 .sp
 .nf
-Last updated: 04 May 2012
+Last updated: 17 June 2012
 Copyright (c) 1997-2012 University of Cambridge.
 .fi


Modified: code/trunk/doc/pcrepattern.3
===================================================================
--- code/trunk/doc/pcrepattern.3    2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/doc/pcrepattern.3    2012-06-17 16:55:07 UTC (rev 978)
@@ -277,6 +277,8 @@
 Otherwise, it matches a literal "x" character. In JavaScript mode, support for
 code points greater than 256 is provided by \eu, which must be followed by
 four hexadecimal digits; otherwise it matches a literal "u" character.
+Character codes specified by \eu in JavaScript mode are constrained in the same 
+was as those specified by \ex in non-JavaScript mode.
 .P
 Characters whose value is less than 256 can be defined by either of the two
 syntaxes for \ex (or by \eu in JavaScript mode). There is no difference in the
@@ -2911,6 +2913,6 @@
 .rs
 .sp
 .nf
-Last updated: 01 June 2012
+Last updated: 17 June 2012
 Copyright (c) 1997-2012 University of Cambridge.
 .fi


Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c    2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/pcre_compile.c    2012-06-17 16:55:07 UTC (rev 978)
@@ -491,6 +491,7 @@
   "invalid UTF-16 string\0"
   /* 75 */
   "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
+  "character value in \\u.... sequence is too large\0"
   ;


 /* Table to identify digits and hex digits. This is used when compiling
@@ -831,6 +832,18 @@
           c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
 #endif
           }
+          
+#ifdef COMPILE_PCRE8
+        if (c > (utf ? 0x10ffff : 0xff))
+#else
+#ifdef COMPILE_PCRE16
+        if (c > (utf ? 0x10ffff : 0xffff))
+#endif
+#endif
+          {
+          *errorcodeptr = ERR76; 
+          }
+        else if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
         }
       }
     else


Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h    2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/pcre_internal.h    2012-06-17 16:55:07 UTC (rev 978)
@@ -1945,7 +1945,7 @@
        ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
        ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
        ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
-       ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERRCOUNT };
+       ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERRCOUNT };


/* JIT compiling modes. The function list is indexed by them. */
enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,

Modified: code/trunk/pcreposix.c
===================================================================
--- code/trunk/pcreposix.c    2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/pcreposix.c    2012-06-17 16:55:07 UTC (rev 978)
@@ -160,7 +160,8 @@
   REG_BADPAT,  /* disallowed UTF-8/16 code point (>= 0xd800 && <= 0xdfff) */
   REG_BADPAT,  /* invalid UTF-16 string (should not occur) */
   /* 75 */
-  REG_BADPAT   /* overlong MARK name */
+  REG_BADPAT,  /* overlong MARK name */
+  REG_BADPAT   /* character value in \u.... sequence is too large */ 
 };


/* Table of texts corresponding to POSIX error codes */

Modified: code/trunk/testdata/testinput14
===================================================================
--- code/trunk/testdata/testinput14    2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/testdata/testinput14    2012-06-17 16:55:07 UTC (rev 978)
@@ -320,4 +320,8 @@
 /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/K
     XX


+/\u0100/<JS>
+
+/[\u0100-\u0200]/<JS>
+
/-- End of testinput14 --/

Modified: code/trunk/testdata/testinput17
===================================================================
--- code/trunk/testdata/testinput17    2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/testdata/testinput17    2012-06-17 16:55:07 UTC (rev 978)
@@ -286,4 +286,10 @@
 /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/K
     XX


+/\u0100/<JS>BZ
+
+/[\u0100-\u0200]/<JS>BZ
+
+/\ud800/<JS>BZ
+
/-- End of testinput17 --/

Modified: code/trunk/testdata/testinput5
===================================================================
--- code/trunk/testdata/testinput5    2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/testdata/testinput5    2012-06-17 16:55:07 UTC (rev 978)
@@ -765,4 +765,10 @@
 /(?<!^)ETA/8
     ETA


+/\u0100/<JS>8BZ
+
+/[\u0100-\u0200]/<JS>8BZ
+
+/\ud800/<JS>8
+
/-- End of testinput5 --/

Modified: code/trunk/testdata/testoutput14
===================================================================
--- code/trunk/testdata/testoutput14    2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/testdata/testoutput14    2012-06-17 16:55:07 UTC (rev 978)
@@ -461,4 +461,10 @@
  0: XX
 MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE


+/\u0100/<JS>
+Failed: character value in \u.... sequence is too large at offset 5
+
+/[\u0100-\u0200]/<JS>
+Failed: character value in \u.... sequence is too large at offset 6
+
/-- End of testinput14 --/

Modified: code/trunk/testdata/testoutput17
===================================================================
--- code/trunk/testdata/testoutput17    2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/testdata/testoutput17    2012-06-17 16:55:07 UTC (rev 978)
@@ -516,4 +516,28 @@
  0: XX
 MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE


+/\u0100/<JS>BZ
+------------------------------------------------------------------
+        Bra
+        \x{100}
+        Ket
+        End
+------------------------------------------------------------------
+
+/[\u0100-\u0200]/<JS>BZ
+------------------------------------------------------------------
+        Bra
+        [\x{100}-\x{200}]
+        Ket
+        End
+------------------------------------------------------------------
+
+/\ud800/<JS>BZ
+------------------------------------------------------------------
+        Bra
+        \x{d800}
+        Ket
+        End
+------------------------------------------------------------------
+
 /-- End of testinput17 --/


Modified: code/trunk/testdata/testoutput5
===================================================================
--- code/trunk/testdata/testoutput5    2012-06-17 06:20:52 UTC (rev 977)
+++ code/trunk/testdata/testoutput5    2012-06-17 16:55:07 UTC (rev 978)
@@ -1829,4 +1829,23 @@
     ETA
 No match


+/\u0100/<JS>8BZ
+------------------------------------------------------------------
+        Bra
+        \x{100}
+        Ket
+        End
+------------------------------------------------------------------
+
+/[\u0100-\u0200]/<JS>8BZ
+------------------------------------------------------------------
+        Bra
+        [\x{100}-\x{200}]
+        Ket
+        End
+------------------------------------------------------------------
+
+/\ud800/<JS>8
+Failed: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) at offset 5
+
 /-- End of testinput5 --/