[Pcre-svn] [1200] code/trunk: Temporarily remove 32-bit mask…

Startseite
Nachricht löschen
Autor: Subversion repository
Datum:  
To: pcre-svn
Betreff: [Pcre-svn] [1200] code/trunk: Temporarily remove 32-bit masking
Revision: 1200
          http://vcs.pcre.org/viewvc?view=rev&revision=1200
Author:   chpe
Date:     2012-11-03 19:21:41 +0000 (Sat, 03 Nov 2012)


Log Message:
-----------
Temporarily remove 32-bit masking

Remove the masking via the PCRE_NO_UTF32_CHECK. It will be reintroduced as a
dedicated runtime option later, with support for JIT and non-JIT cases.

Modified Paths:
--------------
    code/trunk/RunTest
    code/trunk/doc/pcreunicode.3
    code/trunk/pcre_internal.h
    code/trunk/pcretest.c


Modified: code/trunk/RunTest
===================================================================
--- code/trunk/RunTest    2012-11-03 19:21:37 UTC (rev 1199)
+++ code/trunk/RunTest    2012-11-03 19:21:41 UTC (rev 1200)
@@ -418,39 +418,22 @@
   do26=yes
 fi


-# Extra test round for testing 32-bit UTF mode with high bits set
-if test "$test32" != "skip" -a $utf -eq 1; then
- test32plus="-32+"
-else
- test32plus=skip
-fi
-
# Show which release and which test data

echo ""
echo PCRE C library tests using test data from $testdata
$sim ./pcretest /dev/null

-for bmode in "$test8" "$test16" "$test32" "$test32plus"; do
+for bmode in "$test8" "$test16" "$test32"; do
   case "$bmode" in
     skip) continue;;
     -16)  if [ "$test8$test32" != "skipskip" ] ; then echo ""; fi
           bits=16; echo "---- Testing 16-bit library ----"; echo "";;
     -32)  if [ "$test8$test16" != "skipskip" ] ; then echo ""; fi
           bits=32; echo "---- Testing 32-bit library ----"; echo "";;
-    -32+) echo ""
-          bits=32; echo "---- Testing 32-bit library UTF-32 mode with high bits set ----"; echo "";;
     *)    bits=8; echo "---- Testing 8-bit library ----"; echo "";;
   esac


-  # When testing 32-bit UTF mode with high bits masked, skip all non-UTF tests
-  # Since this is the last test, we can just set doN=no
-  if test "$bmode" = "-32+"; then
-    do1=no do2=no do3=no do8=no 
-    do12=no do13=no do14=no do15=no do16=no do17=no
-    do20=no do21=no do23=no do24=no do25=no
-  fi
-
 # Primary test, compatible with JIT and all versions of Perl >= 5.8


if [ $do1 = yes ] ; then

Modified: code/trunk/doc/pcreunicode.3
===================================================================
--- code/trunk/doc/pcreunicode.3    2012-11-03 19:21:37 UTC (rev 1199)
+++ code/trunk/doc/pcreunicode.3    2012-11-03 19:21:41 UTC (rev 1200)
@@ -184,16 +184,6 @@
 the PCRE_NO_UTF32_CHECK flag at compile time or at run time, PCRE assumes that
 the pattern or subject it is given (respectively) contains only valid UTF-32
 sequences. In this case, it does not diagnose an invalid UTF-32 string.
-.P
-UTF-32 only uses the lowest 21 bits of the 32 bit characters, and the
-application may use the upper bits for internal purposes. To allow you to
-pass these strings to PCRE unmodified (thus avoiding the costly operation of
-creating a copy of the string with the upper bits masked), PCRE accepts
-these 32-bit character strings as-is, but only uses the lowest 21 bits for
-matching, if you pass the PCRE_NO_UTF32_CHECK flag to \fBpcre32_exec()\fP and
-\fBpcre32_dfa_exec()\fP. However, in this situation, you will have to apply
-your own validity check, and avoid the use of JIT optimization.
-(The latter restriction may be lifter in a later version of PCRE.)
 .
 .
 .SS "General comments about UTF modes"


Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h    2012-11-03 19:21:37 UTC (rev 1199)
+++ code/trunk/pcre_internal.h    2012-11-03 19:21:41 UTC (rev 1200)
@@ -934,43 +934,29 @@
 #define GET_EXTRALEN(c) (0)
 #define NOT_FIRSTCHAR(c) (0)


-#define UTF32_MASK (0x1fffffu)
-
-/* Base macro to pick up an UTF-32 character out of a uint32 */
-
-#define MASKHIGHBITS(c) ((c) & UTF32_MASK)
-
-/* Base macro to pick up an UTF-32 character, not advancing the pointer */
-
-#define GETUTF32(eptr) (MASKHIGHBITS(*(eptr)))
-
-/* Base macro to pick up an UTF-32 character, advancing the pointer */
-
-#define GETUTF32INC(eptr) (MASKHIGHBITS(*((eptr)++)))
-
/* Get the next UTF-32 character, not advancing the pointer. This is called when
we know we are in UTF-32 mode. */

#define GETCHAR(c, eptr) \
- c = GETUTF32(eptr);
+ c = *(eptr);

/* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the
pointer. */

#define GETCHARTEST(c, eptr) \
- c = (utf ? GETUTF32(eptr) : *(eptr));
+ c = *(eptr);

/* Get the next UTF-32 character, advancing the pointer. This is called when we
know we are in UTF-32 mode. */

#define GETCHARINC(c, eptr) \
- c = GETUTF32INC(eptr);
+ c = *((eptr)++);

/* Get the next character, testing for UTF-32 mode, and advancing the pointer.
This is called when we don't know if we are in UTF-32 mode. */

#define GETCHARINCTEST(c, eptr) \
- c = (utf ? GETUTF32INC(eptr) : *((eptr)++));
+ c = *((eptr)++);

/* Get the next UTF-32 character, not advancing the pointer, not incrementing
length (since all UTF-32 is of length 1). This is called when we know we are in
@@ -990,25 +976,25 @@
we know we are in UTF mode. */

#define RAWUCHAR(eptr) \
- (MASKHIGHBITS(*(eptr)))
+ (*(eptr))

/* Returns the next uchar, advancing the pointer. This is called when
we know we are in UTF mode. */

#define RAWUCHARINC(eptr) \
- (MASKHIGHBITS(*((eptr)++)))
+ (*((eptr)++))

/* Returns the next uchar, testing for UTF mode, and not advancing the
pointer. */

#define RAWUCHARTEST(eptr) \
- (utf ? (MASKHIGHBITS(*(eptr))) : *(eptr))
+ (*(eptr))

/* Returns the next uchar, testing for UTF mode, advancing the
pointer. */

#define RAWUCHARINCTEST(eptr) \
- (utf ? (MASKHIGHBITS(*((eptr)++))) : *((eptr)++))
+ (*((eptr)++))

/* If the pointer is not at the start of a character, move it back until
it is. This is called only in UTF-32 mode - we don't put a test within the

Modified: code/trunk/pcretest.c
===================================================================
--- code/trunk/pcretest.c    2012-11-03 19:21:37 UTC (rev 1199)
+++ code/trunk/pcretest.c    2012-11-03 19:21:41 UTC (rev 1200)
@@ -2102,8 +2102,6 @@
 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
 If handed a NULL file, just counts chars without printing. */


-#define UTF32_MASK (0x1fffffu)
-
static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
{
int yield = 0;
@@ -2114,7 +2112,6 @@
while (length-- > 0)
{
pcre_uint32 c = *p++;
- if (utf) c &= UTF32_MASK;
yield += pchar(c, f);
}

@@ -2942,9 +2939,6 @@
 int all_use_dfa = 0;
 int verify_jit = 0;
 int yield = 0;
-#ifdef SUPPORT_PCRE32
-int mask_utf32 = 0;
-#endif
 int stack_size;
 pcre_uint8 *dbuffer = NULL;
 size_t dbuffer_size = 1u << 14;
@@ -3056,11 +3050,10 @@
     exit(1);
 #endif
     }
-  else if (strcmp(arg, "-32") == 0 || strcmp(arg, "-32+") == 0)
+  else if (strcmp(arg, "-32") == 0)
     {
 #ifdef SUPPORT_PCRE32
     pcre_mode = PCRE32_MODE;
-    mask_utf32 = (strcmp(arg, "-32+") == 0);
 #else
     printf("** This version of PCRE was built without 32-bit support\n");
     exit(1);
@@ -4821,21 +4814,6 @@
     }
 #endif


-#if defined SUPPORT_UTF && defined SUPPORT_PCRE32
-    /* If we're requsted to test UTF-32 masking of high bits, change the data
-    string to have high bits set, unless the string is invalid UTF-32.
-    Since the JIT doesn't support this yet, only do it when not JITing. */
-    if (use_utf && mask_utf32 && (study_options & PCRE_STUDY_ALLJIT) == 0 &&
-        valid_utf32((pcre_uint32 *)dbuffer, len))
-      {
-      for (q32 = (pcre_uint32 *)dbuffer; *q32; q32++)
-        *q32 |= ~(pcre_uint32)UTF32_MASK;
-
-      /* Need to pass NO_UTF32_CHECK so the high bits are allowed */
-      options |= PCRE_NO_UTF32_CHECK;
-      }
-#endif
-
     /* If we're compiling with explicit valgrind support, Mark the data from after
     its end to the end of the buffer as unaddressable, so that a read over the end
     of the buffer will be seen by valgrind, even if it doesn't cause a crash.