Revision: 1200
http://vcs.pcre.org/viewvc?view=rev&revision=1200
Author: chpe
Date: 2012-11-03 19:21:41 +0000 (Sat, 03 Nov 2012)
Log Message:
-----------
Temporarily remove 32-bit masking
Remove the masking via the PCRE_NO_UTF32_CHECK. It will be reintroduced as a
dedicated runtime option later, with support for JIT and non-JIT cases.
Modified Paths:
--------------
code/trunk/RunTest
code/trunk/doc/pcreunicode.3
code/trunk/pcre_internal.h
code/trunk/pcretest.c
Modified: code/trunk/RunTest
===================================================================
--- code/trunk/RunTest 2012-11-03 19:21:37 UTC (rev 1199)
+++ code/trunk/RunTest 2012-11-03 19:21:41 UTC (rev 1200)
@@ -418,39 +418,22 @@
do26=yes
fi
-# Extra test round for testing 32-bit UTF mode with high bits set
-if test "$test32" != "skip" -a $utf -eq 1; then
- test32plus="-32+"
-else
- test32plus=skip
-fi
-
# Show which release and which test data
echo ""
echo PCRE C library tests using test data from $testdata
$sim ./pcretest /dev/null
-for bmode in "$test8" "$test16" "$test32" "$test32plus"; do
+for bmode in "$test8" "$test16" "$test32"; do
case "$bmode" in
skip) continue;;
-16) if [ "$test8$test32" != "skipskip" ] ; then echo ""; fi
bits=16; echo "---- Testing 16-bit library ----"; echo "";;
-32) if [ "$test8$test16" != "skipskip" ] ; then echo ""; fi
bits=32; echo "---- Testing 32-bit library ----"; echo "";;
- -32+) echo ""
- bits=32; echo "---- Testing 32-bit library UTF-32 mode with high bits set ----"; echo "";;
*) bits=8; echo "---- Testing 8-bit library ----"; echo "";;
esac
- # When testing 32-bit UTF mode with high bits masked, skip all non-UTF tests
- # Since this is the last test, we can just set doN=no
- if test "$bmode" = "-32+"; then
- do1=no do2=no do3=no do8=no
- do12=no do13=no do14=no do15=no do16=no do17=no
- do20=no do21=no do23=no do24=no do25=no
- fi
-
# Primary test, compatible with JIT and all versions of Perl >= 5.8
if [ $do1 = yes ] ; then
Modified: code/trunk/doc/pcreunicode.3
===================================================================
--- code/trunk/doc/pcreunicode.3 2012-11-03 19:21:37 UTC (rev 1199)
+++ code/trunk/doc/pcreunicode.3 2012-11-03 19:21:41 UTC (rev 1200)
@@ -184,16 +184,6 @@
the PCRE_NO_UTF32_CHECK flag at compile time or at run time, PCRE assumes that
the pattern or subject it is given (respectively) contains only valid UTF-32
sequences. In this case, it does not diagnose an invalid UTF-32 string.
-.P
-UTF-32 only uses the lowest 21 bits of the 32 bit characters, and the
-application may use the upper bits for internal purposes. To allow you to
-pass these strings to PCRE unmodified (thus avoiding the costly operation of
-creating a copy of the string with the upper bits masked), PCRE accepts
-these 32-bit character strings as-is, but only uses the lowest 21 bits for
-matching, if you pass the PCRE_NO_UTF32_CHECK flag to \fBpcre32_exec()\fP and
-\fBpcre32_dfa_exec()\fP. However, in this situation, you will have to apply
-your own validity check, and avoid the use of JIT optimization.
-(The latter restriction may be lifter in a later version of PCRE.)
.
.
.SS "General comments about UTF modes"
Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h 2012-11-03 19:21:37 UTC (rev 1199)
+++ code/trunk/pcre_internal.h 2012-11-03 19:21:41 UTC (rev 1200)
@@ -934,43 +934,29 @@
#define GET_EXTRALEN(c) (0)
#define NOT_FIRSTCHAR(c) (0)
-#define UTF32_MASK (0x1fffffu)
-
-/* Base macro to pick up an UTF-32 character out of a uint32 */
-
-#define MASKHIGHBITS(c) ((c) & UTF32_MASK)
-
-/* Base macro to pick up an UTF-32 character, not advancing the pointer */
-
-#define GETUTF32(eptr) (MASKHIGHBITS(*(eptr)))
-
-/* Base macro to pick up an UTF-32 character, advancing the pointer */
-
-#define GETUTF32INC(eptr) (MASKHIGHBITS(*((eptr)++)))
-
/* Get the next UTF-32 character, not advancing the pointer. This is called when
we know we are in UTF-32 mode. */
#define GETCHAR(c, eptr) \
- c = GETUTF32(eptr);
+ c = *(eptr);
/* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the
pointer. */
#define GETCHARTEST(c, eptr) \
- c = (utf ? GETUTF32(eptr) : *(eptr));
+ c = *(eptr);
/* Get the next UTF-32 character, advancing the pointer. This is called when we
know we are in UTF-32 mode. */
#define GETCHARINC(c, eptr) \
- c = GETUTF32INC(eptr);
+ c = *((eptr)++);
/* Get the next character, testing for UTF-32 mode, and advancing the pointer.
This is called when we don't know if we are in UTF-32 mode. */
#define GETCHARINCTEST(c, eptr) \
- c = (utf ? GETUTF32INC(eptr) : *((eptr)++));
+ c = *((eptr)++);
/* Get the next UTF-32 character, not advancing the pointer, not incrementing
length (since all UTF-32 is of length 1). This is called when we know we are in
@@ -990,25 +976,25 @@
we know we are in UTF mode. */
#define RAWUCHAR(eptr) \
- (MASKHIGHBITS(*(eptr)))
+ (*(eptr))
/* Returns the next uchar, advancing the pointer. This is called when
we know we are in UTF mode. */
#define RAWUCHARINC(eptr) \
- (MASKHIGHBITS(*((eptr)++)))
+ (*((eptr)++))
/* Returns the next uchar, testing for UTF mode, and not advancing the
pointer. */
#define RAWUCHARTEST(eptr) \
- (utf ? (MASKHIGHBITS(*(eptr))) : *(eptr))
+ (*(eptr))
/* Returns the next uchar, testing for UTF mode, advancing the
pointer. */
#define RAWUCHARINCTEST(eptr) \
- (utf ? (MASKHIGHBITS(*((eptr)++))) : *((eptr)++))
+ (*((eptr)++))
/* If the pointer is not at the start of a character, move it back until
it is. This is called only in UTF-32 mode - we don't put a test within the
Modified: code/trunk/pcretest.c
===================================================================
--- code/trunk/pcretest.c 2012-11-03 19:21:37 UTC (rev 1199)
+++ code/trunk/pcretest.c 2012-11-03 19:21:41 UTC (rev 1200)
@@ -2102,8 +2102,6 @@
/* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
If handed a NULL file, just counts chars without printing. */
-#define UTF32_MASK (0x1fffffu)
-
static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
{
int yield = 0;
@@ -2114,7 +2112,6 @@
while (length-- > 0)
{
pcre_uint32 c = *p++;
- if (utf) c &= UTF32_MASK;
yield += pchar(c, f);
}
@@ -2942,9 +2939,6 @@
int all_use_dfa = 0;
int verify_jit = 0;
int yield = 0;
-#ifdef SUPPORT_PCRE32
-int mask_utf32 = 0;
-#endif
int stack_size;
pcre_uint8 *dbuffer = NULL;
size_t dbuffer_size = 1u << 14;
@@ -3056,11 +3050,10 @@
exit(1);
#endif
}
- else if (strcmp(arg, "-32") == 0 || strcmp(arg, "-32+") == 0)
+ else if (strcmp(arg, "-32") == 0)
{
#ifdef SUPPORT_PCRE32
pcre_mode = PCRE32_MODE;
- mask_utf32 = (strcmp(arg, "-32+") == 0);
#else
printf("** This version of PCRE was built without 32-bit support\n");
exit(1);
@@ -4821,21 +4814,6 @@
}
#endif
-#if defined SUPPORT_UTF && defined SUPPORT_PCRE32
- /* If we're requsted to test UTF-32 masking of high bits, change the data
- string to have high bits set, unless the string is invalid UTF-32.
- Since the JIT doesn't support this yet, only do it when not JITing. */
- if (use_utf && mask_utf32 && (study_options & PCRE_STUDY_ALLJIT) == 0 &&
- valid_utf32((pcre_uint32 *)dbuffer, len))
- {
- for (q32 = (pcre_uint32 *)dbuffer; *q32; q32++)
- *q32 |= ~(pcre_uint32)UTF32_MASK;
-
- /* Need to pass NO_UTF32_CHECK so the high bits are allowed */
- options |= PCRE_NO_UTF32_CHECK;
- }
-#endif
-
/* If we're compiling with explicit valgrind support, Mark the data from after
its end to the end of the buffer as unaddressable, so that a read over the end
of the buffer will be seen by valgrind, even if it doesn't cause a crash.