[Pcre-svn] [1117] code/trunk/pcretest.c: pcre32: pcretest: A…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [1117] code/trunk/pcretest.c: pcre32: pcretest: Add -32+ option
Revision: 1117
          http://vcs.pcre.org/viewvc?view=rev&revision=1117
Author:   chpe
Date:     2012-10-16 16:57:27 +0100 (Tue, 16 Oct 2012)


Log Message:
-----------
pcre32: pcretest: Add -32+ option

Add -32+ option that selects 32-bit mode like -32, but additionally
modifies the characters in the data strings to have the bits > 21 set,
to test that the masking works.

Modified Paths:
--------------
    code/trunk/pcretest.c


Modified: code/trunk/pcretest.c
===================================================================
--- code/trunk/pcretest.c    2012-10-16 15:57:23 UTC (rev 1116)
+++ code/trunk/pcretest.c    2012-10-16 15:57:27 UTC (rev 1117)
@@ -399,10 +399,10 @@
 #ifdef SUPPORT_PCRE32


#define PCHARS32(lv, p, offset, len, f) \
- lv = pchars32((PCRE_SPTR32)(p) + offset, len, f)
+ lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)

-#define PCHARSV32(p, offset, len, f) \
-  (void)pchars32((PCRE_SPTR32)(p) + offset, len, f)
+#define PCHARSV32(p, offset, len, f)                \
+  (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)


 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
   p = read_capture_name32(p, cn32, re)
@@ -1589,6 +1589,45 @@
 *pp = 0;
 return pp - buffer32;
 }
+
+/* Check that a 32-bit character string is valid UTF-32.
+
+Arguments:
+  string       points to the string
+  length       length of string, or -1 if the string is zero-terminated
+
+Returns:       TRUE  if the string is a valid UTF-32 string
+               FALSE otherwise
+*/
+
+#ifdef SUPPORT_UTF
+static BOOL
+valid_utf32(pcre_uint32 *string, int length)
+{
+register pcre_uint32 *p;
+register pcre_uint32 c;
+
+for (p = string; length-- > 0; p++)
+  {
+  c = *p;
+
+  if (c > 0x10ffffu)
+    return FALSE;
+
+  /* A surrogate */
+  if ((c & 0xfffff800u) == 0xd800u)
+    return FALSE;
+
+  /* Non-character */
+  if ((c & 0xfffeu) == 0xfffeu ||
+      c >= 0xfdd0u && c <= 0xfdefu)
+    return FALSE;
+  }
+
+return TRUE;
+}
+#endif /* SUPPORT_UTF */
+
 #endif


/*************************************************
@@ -1874,7 +1913,9 @@
/* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
If handed a NULL file, just counts chars without printing. */

-static int pchars32(PCRE_SPTR32 p, int length, FILE *f)
+#define UTF32_MASK (0x1fffffu)
+
+static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
{
int yield = 0;

@@ -1883,7 +1924,8 @@

while (length-- > 0)
{
- pcre_uint32 c = *p++;
+ pcre_uint32 c = *p++;
+ if (utf) c &= UTF32_MASK;
yield += pchar(c, f);
}

@@ -2714,6 +2756,7 @@
 int all_use_dfa = 0;
 int verify_jit = 0;
 int yield = 0;
+int mask_utf32 = 0;
 int stack_size;
 pcre_uint8 *dbuffer = NULL;
 size_t dbuffer_size = 1u << 14;
@@ -2825,10 +2868,11 @@
     exit(1);
 #endif
     }
-  else if (strcmp(arg, "-32") == 0)
+  else if (strcmp(arg, "-32") == 0 || strcmp(arg, "-32+") == 0)
     {
 #ifdef SUPPORT_PCRE32
     pcre_mode = PCRE32_MODE;
+    mask_utf32 = (strcmp(arg, "-32+") == 0);
 #else
     printf("** This version of PCRE was built without 32-bit support\n");
     exit(1);
@@ -4573,6 +4617,20 @@
     }
 #endif


+#if defined SUPPORT_UTF && defined SUPPORT_PCRE32
+    /* If we're requsted to test UTF-32 masking of high bits, change the data
+    string to have high bits set, unless the string is invalid UTF-32. 
+    Since the JIT doesn't support this yet, only do it when not JITing. */
+    if (use_utf && mask_utf32 && (study_options & PCRE_STUDY_ALLJIT) == 0 &&
+        valid_utf32((pcre_uint32 *)dbuffer, len))
+      {
+      for (q32 = (pcre_uint32 *)dbuffer; *q32; q32++)
+        *q32 |= ~(pcre_uint32)UTF32_MASK;
+
+      options |= PCRE_NO_UTF32_CHECK;
+      }
+#endif
+
     /* Move the data to the end of the buffer so that a read over the end of
     the buffer will be seen by valgrind, even if it doesn't cause a crash. If
     we are using the POSIX interface, we must include the terminating zero. */