Revision: 1088
http://vcs.pcre.org/viewvc?view=rev&revision=1088
Author: chpe
Date: 2012-10-16 16:55:41 +0100 (Tue, 16 Oct 2012)
Log Message:
-----------
pcre32: Reject characters > 0x10ffff in UTF-32 mode
Modified Paths:
--------------
code/trunk/doc/pcre32.3
code/trunk/pcre.h.in
code/trunk/pcre32_valid_utf32.c
Modified: code/trunk/doc/pcre32.3
===================================================================
--- code/trunk/doc/pcre32.3 2012-10-16 15:55:38 UTC (rev 1087)
+++ code/trunk/doc/pcre32.3 2012-10-16 15:55:41 UTC (rev 1088)
@@ -328,6 +328,7 @@
.sp
PCRE_UTF32_ERR1 Surrogate character (range from 0xd800 to 0xdfff)
PCRE_UTF32_ERR2 Invalid character 0xfffe
+ PCRE_UTF32_ERR3 Invalid character > 0x10ffff
.
.
.SH "ERROR TEXTS"
Modified: code/trunk/pcre.h.in
===================================================================
--- code/trunk/pcre.h.in 2012-10-16 15:55:38 UTC (rev 1087)
+++ code/trunk/pcre.h.in 2012-10-16 15:55:41 UTC (rev 1088)
@@ -223,6 +223,7 @@
#define PCRE_UTF32_ERR0 0
#define PCRE_UTF32_ERR1 1
#define PCRE_UTF32_ERR2 2
+#define PCRE_UTF32_ERR3 3
/* Request types for pcre_fullinfo() */
Modified: code/trunk/pcre32_valid_utf32.c
===================================================================
--- code/trunk/pcre32_valid_utf32.c 2012-10-16 15:55:38 UTC (rev 1087)
+++ code/trunk/pcre32_valid_utf32.c 2012-10-16 15:55:41 UTC (rev 1088)
@@ -51,8 +51,6 @@
#include "pcre_internal.h"
-#define MASK (0x1fffffu)
-
/*************************************************
* Validate a UTF-32 string *
*************************************************/
@@ -63,12 +61,13 @@
can be turned off for maximum performance, but the consequences of supplying an
invalid string are then undefined.
-From release 8.21 more information about the details of the error are passed
+More information about the details of the error are passed
back in the returned value:
PCRE_UTF32_ERR0 No error
PCRE_UTF32_ERR1 Surrogate character
-PCRE_UTF32_ERR2 Not allowed character
+PCRE_UTF32_ERR2 Disallowed character 0xfffe
+PCRE_UTF32_ERR3 Character > 0x10ffff
Arguments:
string points to the string
@@ -94,7 +93,7 @@
for (p = string; length-- > 0; p++)
{
- c = *p & MASK;
+ c = *p & UTF32_MASK;
if ((c & 0xfffff800u) != 0xd800u)
{
@@ -106,6 +105,11 @@
*erroroffset = p - string;
return PCRE_UTF32_ERR2;
}
+ else if (c > 0x10ffffu)
+ {
+ *erroroffset = p - string;
+ return PCRE_UTF32_ERR3;
+ }
}
else
{