[Pcre-svn] [808] code/branches/pcre16: Commit progress so far on pcretest ( runs test 1 on interpreter).

Autor: Subversion repository
Datum:
To: pcre-svn
Betreff: [Pcre-svn] [808] code/branches/pcre16: Commit progress so far on pcretest ( runs test 1 on interpreter).

Revision: 808

          http://vcs.pcre.org/viewvc?view=rev&revision=808
Author:   ph10
Date:     2011-12-18 11:11:48 +0000 (Sun, 18 Dec 2011)

Log Message:
-----------
Commit progress so far on pcretest (runs test 1 on interpreter).

Modified Paths:
--------------
    code/branches/pcre16/ChangeLog
    code/branches/pcre16/RunTest
    code/branches/pcre16/configure.ac
    code/branches/pcre16/pcre_compile.c
    code/branches/pcre16/pcretest.c

Modified: code/branches/pcre16/ChangeLog
===================================================================
--- code/branches/pcre16/ChangeLog    2011-12-18 10:03:38 UTC (rev 807)
+++ code/branches/pcre16/ChangeLog    2011-12-18 11:11:48 UTC (rev 808)
@@ -1,6 +1,16 @@
 ChangeLog for PCRE
 ------------------

+Version 8.22
+------------
+
+1.  Renamed "isnumber" as "is_a_number" because in some Mac environments this
+    name is defined in ctype.h.
+    
+2.  Fixed a bug in the code for calculating the fixed length of lookbehind
+    assertions.
+      
+
 Version 8.21 12-Dec-2011
 ------------------------

Modified: code/branches/pcre16/RunTest
===================================================================
--- code/branches/pcre16/RunTest    2011-12-18 10:03:38 UTC (rev 807)
+++ code/branches/pcre16/RunTest    2011-12-18 11:11:48 UTC (rev 808)
@@ -100,9 +100,30 @@
    *) echo "Failed to find internal link size"; exit 1;;
 esac

-$sim ./pcretest -C | $sim ./pcregrep 'No UTF-8 support' >/dev/null
-utf8=$?
+# Both 8-bit and 16-bit character strings may be supported, but only one
+# need be.

+$sim ./pcretest -C | $sim ./pcregrep '8-bit and 16-bit support' >/dev/null
+if [ $? -eq 0 ] ; then
+  test8=
+  test16=-16
+else
+  $sim ./pcretest -C | $sim ./pcregrep '8-bit support' >/dev/null
+  if [ $? -eq 0 ] ; then
+    test8=
+    test16=skip
+  else
+    test8=skip
+    test16=-16
+  fi
+fi
+           
+# UTF support always applies to both bit sizes if both are supported; we can't
+# have UTF-8 support without UTF-16 support (for example).
+
+$sim ./pcretest -C | $sim ./pcregrep 'No UTF-(.+?) support' >/dev/null
+utf=$?
+
 $sim ./pcretest -C | $sim ./pcregrep 'No Unicode properties support' >/dev/null
 ucp=$?

@@ -114,21 +135,21 @@
jitopt=-s+
fi

-if [ $utf8 -eq 0 ] ; then
+if [ $utf -eq 0 ] ; then
   if [ $do4 = yes ] ; then
-    echo "Can't run test 4 because UTF-8 support is not configured"
+    echo "Can't run test 4 because UTF support is not configured"
     exit 1
   fi
   if [ $do5 = yes ] ; then
-    echo "Can't run test 5 because UTF-8 support is not configured"
+    echo "Can't run test 5 because UTF support is not configured"
     exit 1
   fi
   if [ $do8 = yes ] ; then
-    echo "Can't run test 8 because UTF-8 support is not configured"
+    echo "Can't run test 8 because UTF support is not configured"
     exit 1
   fi
   if [ $do12 = yes ] ; then
-    echo "Can't run test 12 because UTF-8 support is not configured"
+    echo "Can't run test 12 because UTF support is not configured"
     exit 1
   fi
 fi
@@ -201,12 +222,20 @@
 echo PCRE C library tests using test data from $testdata
 $sim ./pcretest /dev/null

+for bmode in "$test8" "$test16"; do
+  case "$bmode" in
+    skip) continue;;
+    -16)  if [ "$test8" != "skip" ] ; then echo ""; fi
+          echo "---- Testing 16-bit library ----"; echo "";;
+    *)    echo "---- Testing 8-bit library ----"; echo "";;
+  esac      
+   
 # Primary test, compatible with JIT and all versions of Perl >= 5.8

 if [ $do1 = yes ] ; then
   echo "Test 1: main functionality (Compatible with Perl >= 5.8)"
   for opt in "" "-s" $jitopt; do
-    $sim $valgrind ./pcretest -q $opt $testdata/testinput1 testtry
+    $sim $valgrind ./pcretest -q $bmode $opt $testdata/testinput1 testtry
     if [ $? = 0 ] ; then
       $cf $testdata/testoutput1 testtry
       if [ $? != 0 ] ; then exit 1; fi
@@ -222,9 +251,9 @@
 # PCRE tests that are not JIT or Perl-compatible: API, errors, internals

 if [ $do2 = yes ] ; then
-  echo "Test 2: API, errors, internals, and non-Perl stuff (not UTF-8)"
+  echo "Test 2: API, errors, internals, and non-Perl stuff (not UTF-8/16)"
   for opt in "" "-s" $jitopt; do
-    $sim $valgrind ./pcretest -q $opt $testdata/testinput2 testtry
+    $sim $valgrind ./pcretest -q $bmode $opt $testdata/testinput2 testtry
     if [ $? = 0 ] ; then
       $cf $testdata/testoutput2 testtry
       if [ $? != 0 ] ; then exit 1; fi
@@ -278,7 +307,7 @@
   if [ "$locale" != "" ] ; then
     echo "Test 3: locale-specific features (using '$locale' locale)"
     for opt in "" "-s" $jitopt; do
-      $sim $valgrind ./pcretest -q $opt $infile testtry
+      $sim $valgrind ./pcretest -q $bmode $opt $infile testtry
       if [ $? = 0 ] ; then
         $cf $outfile testtry
         if [ $? != 0 ] ; then
@@ -304,15 +333,15 @@
   fi
 fi

-# Additional tests for UTF8 support
+# Additional tests for UTF support

 if [ $do4 = yes ] ; then
-  echo "Test 4: UTF-8 support (Compatible with Perl >= 5.8)"
-  if [ $utf8 -eq 0 ] ; then
-    echo "  Skipped because UTF-8 support is not available"
+  echo "Test 4: UTF-8/16 support (Compatible with Perl >= 5.8)"
+  if [ $utf -eq 0 ] ; then
+    echo "  Skipped because UTF support is not available"
   else
     for opt in "" "-s" $jitopt; do
-      $sim $valgrind ./pcretest -q $opt $testdata/testinput4 testtry
+      $sim $valgrind ./pcretest -q $bmode $opt $testdata/testinput4 testtry
       if [ $? = 0 ] ; then
         $cf $testdata/testoutput4 testtry
         if [ $? != 0 ] ; then exit 1; fi
@@ -327,12 +356,12 @@
 fi

 if [ $do5 = yes ] ; then
-  echo "Test 5: API, internals, and non-Perl stuff for UTF-8 support"
-  if [ $utf8 -eq 0 ] ; then
-    echo "  Skipped because UTF-8 support is not available"
+  echo "Test 5: API, internals, and non-Perl stuff for UTF-8/16 support"
+  if [ $utf -eq 0 ] ; then
+    echo "  Skipped because UTF support is not available"
   else
     for opt in "" "-s" $jitopt; do
-      $sim $valgrind ./pcretest -q $opt $testdata/testinput5 testtry
+      $sim $valgrind ./pcretest -q $bmode $opt $testdata/testinput5 testtry
       if [ $? = 0 ] ; then
         $cf $testdata/testoutput5 testtry
         if [ $? != 0 ] ; then exit 1; fi
@@ -348,11 +377,11 @@

 if [ $do6 = yes ] ; then
   echo "Test 6: Unicode property support (Compatible with Perl >= 5.10)"
-  if [ $utf8 -eq 0 -o $ucp -eq 0 ] ; then
+  if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
     echo "  Skipped because Unicode property support is not available"
   else
     for opt in "" "-s" $jitopt; do
-      $sim $valgrind ./pcretest -q $opt $testdata/testinput6 testtry
+      $sim $valgrind ./pcretest -q $bmode $opt $testdata/testinput6 testtry
       if [ $? = 0 ] ; then
         $cf $testdata/testoutput6 testtry
         if [ $? != 0 ] ; then exit 1; fi
@@ -371,7 +400,7 @@
 if [ $do7 = yes ] ; then
   echo "Test 7: DFA matching"
   for opt in "" "-s"; do
-    $sim $valgrind ./pcretest -q $opt -dfa $testdata/testinput7 testtry
+    $sim $valgrind ./pcretest -q $bmode $opt -dfa $testdata/testinput7 testtry
     if [ $? = 0 ] ; then
       $cf $testdata/testoutput7 testtry
       if [ $? != 0 ] ; then exit 1; fi
@@ -382,12 +411,12 @@
 fi

 if [ $do8 = yes ] ; then
-  echo "Test 8: DFA matching with UTF-8"
-  if [ $utf8 -eq 0 ] ; then
-    echo "  Skipped because UTF-8 support is not available"
+  echo "Test 8: DFA matching with UTF-8 or UTF-16"
+  if [ $utf -eq 0 ] ; then
+    echo "  Skipped because UTF support is not available"
   else
     for opt in "" "-s"; do
-      $sim $valgrind ./pcretest -q $opt -dfa $testdata/testinput8 testtry
+      $sim $valgrind ./pcretest -q $bmode $opt -dfa $testdata/testinput8 testtry
       if [ $? = 0 ] ; then
         $cf $testdata/testoutput8 testtry
         if [ $? != 0 ] ; then exit 1; fi
@@ -400,11 +429,11 @@

 if [ $do9 = yes ] ; then
   echo "Test 9: DFA matching with Unicode properties"
-  if [ $utf8 -eq 0 -o $ucp -eq 0 ] ; then
+  if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
     echo "  Skipped because Unicode property support is not available"
   else
     for opt in "" "-s"; do
-      $sim $valgrind ./pcretest -q $opt -dfa $testdata/testinput9 testtry
+      $sim $valgrind ./pcretest -q $bmode $opt -dfa $testdata/testinput9 testtry
       if [ $? = 0 ] ; then
         $cf $testdata/testoutput9 testtry
         if [ $? != 0 ] ; then exit 1; fi
@@ -429,7 +458,7 @@
     echo "  Skipped because Unicode property support is not available"
   else
     for opt in "" "-s"; do
-      $sim $valgrind ./pcretest -q $opt $testdata/testinput10 testtry
+      $sim $valgrind ./pcretest -q $bmode $opt $testdata/testinput10 testtry
       if [ $? = 0 ] ; then
         $cf $testdata/testoutput10 testtry
         if [ $? != 0 ] ; then exit 1; fi
@@ -445,7 +474,7 @@
 if [ $do11 = yes ] ; then
   echo "Test 11: Features from Perl >= 5.10 without UTF8 support"
   for opt in "" "-s" $jitopt; do
-    $sim $valgrind ./pcretest -q $opt $testdata/testinput11 testtry
+    $sim $valgrind ./pcretest -q $bmode $opt $testdata/testinput11 testtry
     if [ $? = 0 ] ; then
       $cf $testdata/testoutput11 testtry
       if [ $? != 0 ] ; then exit 1; fi
@@ -461,12 +490,12 @@
 # Test of Perl >= 5.10 features with UTF8 support

 if [ $do12 = yes ] ; then
-  echo "Test 12: Features from Perl >= 5.10 with UTF8 support"
-  if [ $utf8 -eq 0 ] ; then
-    echo "  Skipped because UTF-8 support is not available"
+  echo "Test 12: Features from Perl >= 5.10 with UTF-8 or UTF-16 support"
+  if [ $utf -eq 0 ] ; then
+    echo "  Skipped because UTF support is not available"
   else
     for opt in "" "-s" $jitopt; do
-      $sim $valgrind ./pcretest -q $opt $testdata/testinput12 testtry
+      $sim $valgrind ./pcretest -q $bmode $opt $testdata/testinput12 testtry
       if [ $? = 0 ] ; then
         $cf $testdata/testoutput12 testtry
         if [ $? != 0 ] ; then exit 1; fi
@@ -484,11 +513,11 @@

 if [ $do13 = yes ] ; then
   echo "Test 13: API, internals, and non-Perl stuff for Unicode property support"
-  if [ $utf8 -eq 0 -o $ucp -eq 0 ] ; then
+  if [ $utf -eq 0 -o $ucp -eq 0 ] ; then
     echo "  Skipped because Unicode property support is not available"
   else
     for opt in "" "-s" $jitopt; do
-      $sim $valgrind ./pcretest -q $opt $testdata/testinput13 testtry
+      $sim $valgrind ./pcretest -q $bmode $opt $testdata/testinput13 testtry
       if [ $? = 0 ] ; then
         $cf $testdata/testoutput13 testtry
         if [ $? != 0 ] ; then exit 1; fi
@@ -509,7 +538,7 @@
   if [ $jit -eq 0 ] ; then
     echo "  Skipped because JIT is not available or not usable"
   else
-    $sim $valgrind ./pcretest -q $testdata/testinput14 testtry
+    $sim $valgrind ./pcretest -q $bmode $testdata/testinput14 testtry
     if [ $? = 0 ] ; then
       $cf $testdata/testoutput14 testtry
       if [ $? != 0 ] ; then exit 1; fi
@@ -526,7 +555,7 @@
   if [ $jit -ne 0 ] ; then
     echo "  Skipped because JIT is available"
   else
-    $sim $valgrind ./pcretest -q $testdata/testinput15 testtry
+    $sim $valgrind ./pcretest -q $bmode $testdata/testinput15 testtry
     if [ $? = 0 ] ; then
       $cf $testdata/testoutput15 testtry
       if [ $? != 0 ] ; then exit 1; fi
@@ -536,4 +565,8 @@
   fi
 fi

+# End of loop for 8-bit/16-bit tests
+
+done
+
# End

Modified: code/branches/pcre16/configure.ac
===================================================================
--- code/branches/pcre16/configure.ac    2011-12-18 10:03:38 UTC (rev 807)
+++ code/branches/pcre16/configure.ac    2011-12-18 11:11:48 UTC (rev 808)
@@ -9,7 +9,7 @@
 dnl be defined as -RC2, for example. For real releases, it should be empty.

m4_define(pcre_major, [8])
-m4_define(pcre_minor, [21])
+m4_define(pcre_minor, [22])
m4_define(pcre_prerelease, [])
m4_define(pcre_date, [2011-12-12])

Modified: code/branches/pcre16/pcre_compile.c
===================================================================
--- code/branches/pcre16/pcre_compile.c    2011-12-18 10:03:38 UTC (rev 807)
+++ code/branches/pcre16/pcre_compile.c    2011-12-18 11:11:48 UTC (rev 808)
@@ -1709,6 +1709,7 @@
   int d;
   pcre_uchar *ce, *cs;
   register int op = *cc;
+  
   switch (op)
     {
     /* We only need to continue for OP_CBRA (normal capturing bracket) and
@@ -1768,7 +1769,8 @@
     case OP_ASSERTBACK:
     case OP_ASSERTBACK_NOT:
     do cc += GET(cc, 1); while (*cc == OP_ALT);
-    /* Fall through */
+    cc += PRIV(OP_lengths)[*cc];
+    break;

     /* Skip over things that don't match chars */

@@ -6661,11 +6663,11 @@

         if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS)
           {
-          BOOL isnumber = TRUE;
+          BOOL is_a_number = TRUE;
           for (p = ptr + 1; *p != 0 && *p != terminator; p++)
             {
-            if (!MAX_255(*p)) { isnumber = FALSE; break; }
-            if ((cd->ctypes[*p] & ctype_digit) == 0) isnumber = FALSE;
+            if (!MAX_255(*p)) { is_a_number = FALSE; break; }
+            if ((cd->ctypes[*p] & ctype_digit) == 0) is_a_number = FALSE;
             if ((cd->ctypes[*p] & ctype_word) == 0) break;
             }
           if (*p != terminator)
@@ -6673,7 +6675,7 @@
             *errorcodeptr = ERR57;
             break;
             }
-          if (isnumber)
+          if (is_a_number)
             {
             ptr++;
             goto HANDLE_NUMERICAL_RECURSION;

Modified: code/branches/pcre16/pcretest.c
===================================================================
--- code/branches/pcre16/pcretest.c    2011-12-18 10:03:38 UTC (rev 807)
+++ code/branches/pcre16/pcretest.c    2011-12-18 11:11:48 UTC (rev 808)
@@ -4,7 +4,8 @@

/* This program was hacked up as a tester for PCRE. I really should have
written it more tidily in the first place. Will I ever learn? It has grown and
-been extended and consequently is now rather, er, *very* untidy in places.
+been extended and consequently is now rather, er, *very* untidy in places. The
+addition of 16-bit support has made it even worse. :-(

-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -107,9 +108,9 @@
#include "pcre.h"
#include "pcre_internal.h"

-/* The pcre_printint() function, which prints the internal form of a compiled
-regex, is held in a separate file so that (a) it can be compiled in either
-8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
+/* The pcre_printint() function, which prints the internal form of a compiled
+regex, is held in a separate file so that (a) it can be compiled in either
+8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
when that is compiled in debug mode. */

#ifdef SUPPORT_PCRE8
@@ -149,7 +150,7 @@
#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
#endif

-#define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
+#define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))

/* It is possible to compile this test program without including support for
testing the POSIX interface, though this is not available via the standard
@@ -159,11 +160,11 @@
#include "pcreposix.h"
#endif

-/* It is also possible, for the benefit of the version currently imported into
-Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
-interface to the DFA matcher (NODFA), and without the doublecheck of the old
-"info" function (define NOINFOCHECK). In fact, we automatically cut out the
-UTF8 support if PCRE is built without it. */
+/* It is also possible, originally for the benefit of a version that was
+imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
+without the interface to the DFA matcher (NODFA), and without the doublecheck
+of the old "info" function (define NOINFOCHECK). In fact, we automatically cut
+out the UTF8 support if PCRE is built without it. */

#ifndef SUPPORT_UTF8
#ifndef NOUTF8
@@ -171,7 +172,113 @@
#endif
#endif

+/* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
+for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
+only from one place and is handled differently). I couldn't dream up any way of
+using a single macro to do this in a generic way, because of the many different
+argument requirements. We know that at least one of SUPPORT_PCRE8 and
+SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
+use these in the definitions of generic macros. */

+#ifdef SUPPORT_PCRE8
+#define PCHARS8(lv, p, len, f) \
+  lv = pchars((pcre_uint8 *)p, len, f)
+
+#define PCHARSV8(p, len, f) \
+  (void)pchars((pcre_uint8 *)p, len, f)
+
+#define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
+  re = pcre_compile((char *)pat, options, error, erroffset, tables)
+
+#define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
+    offsets, size_offsets) \
+  count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
+    offsets, size_offsets)
+
+#define PCRE_STUDY8(extra, re, options, error) \
+  extra = pcre_study(re, options, error)
+#endif
+
+
+#ifdef SUPPORT_PCRE16
+#define PCHARS16(lv, p, len, f) \
+  lv = pchars16((PCRE_SPTR16)p, len, f)
+
+#define PCHARSV16(p, len, f) \
+  (void)pchars16((PCRE_SPTR16)p, len, f)
+
+#define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
+  re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
+
+#define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
+    offsets, size_offsets) \
+  count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
+    options, offsets, size_offsets)
+
+#define PCRE_STUDY16(extra, re, options, error) \
+  extra = pcre16_study(re, options, error)
+#endif
+
+
+/* ----- Both modes are supported; a runtime test is needed ----- */
+
+#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
+
+#define PCHARS(lv, p, len, f) \
+  if (use_pcre16) \
+    PCHARS16(lv, p, len, f); \
+  else \
+    PCHARS8(lv, p, len, f)
+
+#define PCHARSV(p, len, f) \
+  if (use_pcre16) \
+    PCHARSV16(p, len, f); \
+  else \
+    PCHARSV8(p, len, f)
+
+#define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
+  if (use_pcre16) \
+    PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
+  else \
+    PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
+
+#define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
+    offsets, size_offsets) \
+  if (use_pcre16) \
+    PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
+      offsets, size_offsets); \
+  else \
+    PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
+      offsets, size_offsets)
+
+#define PCRE_STUDY(extra, re, options, error) \
+  if (use_pcre16) \
+    PCRE_STUDY16(extra, re, options, error); \
+  else \
+    PCRE_STUDY8(extra, re, options, error)
+
+/* ----- Only 8-bit mode is supported ----- */
+
+#elif defined SUPPORT_PCRE8
+#define PCHARS       PCHARS8
+#define PCHARSV      PCHARSV8
+#define PCRE_COMPILE PCRE_COMPILE8
+#define PCRE_EXEC    PCRE_EXEC8
+#define PCRE_STUDY   PCRE_STUDY8
+
+/* ----- Only 16-bit mode is supported ----- */
+
+#else
+#define PCHARS       PCHARS16
+#define PCHARSV      PCHARSV16
+#define PCRE_COMPILE PCRE_COMPILE16
+#define PCRE_EXEC    PCRE_EXEC16
+#define PCRE_STUDY   PCRE_STUDY16
+#endif
+
+/* ----- End of mode-specific function call macros ----- */
+
+
 /* Other parameters */

#ifndef CLOCKS_PER_SEC
@@ -203,8 +310,6 @@
static size_t first_gotten_store = 0;
static const unsigned char *last_callout_mark = NULL;

-static int (*fullinfo)(const pcre *, const pcre_extra *, int, void *);
-
/* The buffers grow automatically if very long input lines are encountered. */

static int buffer_size = 50000;
@@ -217,6 +322,16 @@
static pcre_uint16 *buffer16 = NULL;
#endif

+/* If we have 8-bit support, default use_pcre16 to false; if there is also
+16-bit support, it can be changed by an option. If there is no 8-bit support,
+there must be 16-bit support, so default it to 1. */
+
+#ifdef SUPPORT_PCRE8
+static int use_pcre16 = 0;
+#else
+static int use_pcre16 = 1;
+#endif
+
/* Textual explanations for runtime error codes */

static const char *errtexts[] = {
@@ -248,7 +363,7 @@
NULL, /* SHORTUTF8 is handled specially */
"nested recursion at the same subject position",
"JIT stack limit reached",
- "pattern compiled in wrong mode (8-bit/16-bit error)"
+ "pattern compiled in wrong mode (8-bit/16-bit error)"
};

@@ -264,7 +379,7 @@
/* This is the set of tables distributed as default with PCRE. It recognizes
only ASCII characters. */

-static const unsigned char tables0[] = {
+static const pcre_uint8 tables0[] = {

/* This table is a lower casing table. */

@@ -437,7 +552,7 @@
be at least an approximation of ISO 8859. In particular, there are characters
greater than 128 that are marked as spaces, letters, etc. */

-static const unsigned char tables1[] = {
+static const pcre_uint8 tables1[] = {
0,1,2,3,4,5,6,7,
8,9,10,11,12,13,14,15,
16,17,18,19,20,21,22,23,
@@ -610,47 +725,159 @@
}

+/*************************************************
+*            Convert UTF-8 string to value       *
+*************************************************/
+
+/* This function takes one or more bytes that represents a UTF-8 character,
+and returns the value of the character.
+
+Argument:
+  utf8bytes   a pointer to the byte vector
+  vptr        a pointer to an int to receive the value
+
+Returns:      >  0 => the number of bytes consumed
+              -6 to 0 => malformed UTF-8 character at offset = (-return)
+*/
+
+#if !defined NOUTF8
+
+static int
+utf82ord(pcre_uint8 *utf8bytes, int *vptr)
+{
+int c = *utf8bytes++;
+int d = c;
+int i, j, s;
+
+for (i = -1; i < 6; i++)               /* i is number of additional bytes */
+  {
+  if ((d & 0x80) == 0) break;
+  d <<= 1;
+  }
+
+if (i == -1) { *vptr = c; return 1; }  /* ascii character */
+if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
+
+/* i now has a value in the range 1-5 */
+
+s = 6*i;
+d = (c & utf8_table3[i]) << s;
+
+for (j = 0; j < i; j++)
+  {
+  c = *utf8bytes++;
+  if ((c & 0xc0) != 0x80) return -(j+1);
+  s -= 6;
+  d |= (c & 0x3f) << s;
+  }
+
+/* Check that encoding was the correct unique one */
+
+for (j = 0; j < utf8_table1_size; j++)
+  if (d <= utf8_table1[j]) break;
+if (j != i) return -(i+1);
+
+/* Valid value */
+
+*vptr = d;
+return i+1;
+}
+
+#endif
+
+
+
+/*************************************************
+*       Convert character value to UTF-8         *
+*************************************************/
+
+/* This function takes an integer value in the range 0 - 0x7fffffff
+and encodes it as a UTF-8 character in 0 to 6 bytes.
+
+Arguments:
+  cvalue     the character value
+  utf8bytes  pointer to buffer for result - at least 6 bytes long
+
+Returns:     number of characters placed in the buffer
+*/
+
+#if !defined NOUTF8
+
+static int
+ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
+{
+register int i, j;
+for (i = 0; i < utf8_table1_size; i++)
+  if (cvalue <= utf8_table1[i]) break;
+utf8bytes += i;
+for (j = i; j > 0; j--)
+ {
+ *utf8bytes-- = 0x80 | (cvalue & 0x3f);
+ cvalue >>= 6;
+ }
+*utf8bytes = utf8_table2[i] | cvalue;
+return i + 1;
+}
+
+#endif
+
+
+
 #ifdef SUPPORT_PCRE16
 /*************************************************
 *         Convert a string to 16-bit             *
 *************************************************/

-/* The result is always left in buffer16. */
+/* In non-UTF mode, the space needed for a 16-bit string is exactly double the
+8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
+double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
+in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
+result is always left in buffer16. */

static int
-to16(unsigned char *p, int utf)
+to16(pcre_uint8 *p, int utf, int len)
{
pcre_uint16 *pp;
-int len = (int)strlen((char *)p) + 1;

-if (buffer16_size < 2*len)
+if (buffer16_size < 2*len + 2)
   {
   if (buffer16 != NULL) free(buffer16);
-  buffer16_size = 2*len;
+  buffer16_size = 2*len + 2;
   buffer16 = (pcre_uint16 *)malloc(buffer16_size);
-  if (buffer16 == NULL) 
+  if (buffer16 == NULL)
     {
     fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
     exit(1);
     }
   }
-  
+
 pp = buffer16;

 if (!utf)
   {
-  while (*p != 0) *pp++ = *p++;    
-  *pp++ = 0; 
+  while (len-- > 0) *pp++ = *p++;
   }
-  
+
 else
   {
-fprintf(stderr, "pcretest: no support yet for UTF-16\n");
-exit(1);  
-  }   
-  
+  int c;
+  while (len > 0)
+    {
+    int chlen = utf82ord(p, &c);
+    p += chlen;
+    len -= chlen; 
+    if (c < 0x10000) *pp++ = c; else
+      {
+      c -= 0x10000;
+      *pp++ = 0xD800 | (c >> 10);
+      *pp++ = 0xDC00 | (c & 0x3ff);
+      }
+    }
+  }
+
+*pp = 0;
 return pp - buffer16;
-} 
+}
 #endif

@@ -727,9 +954,9 @@
   else
     {
     int new_buffer_size = 2*buffer_size;
-    pcre_uint8 *new_buffer = (unsigned char *)malloc(new_buffer_size);
-    pcre_uint8 *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
-    pcre_uint8 *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
+    pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
+    pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
+    pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);

     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
       {
@@ -760,10 +987,6 @@

-
-
-
-
 /*************************************************
 *          Read number from string               *
 *************************************************/
@@ -780,7 +1003,7 @@
 */

static int
-get_value(unsigned char *str, unsigned char **endptr)
+get_value(pcre_uint8 *str, pcre_uint8 **endptr)
{
int result = 0;
while(*str != 0 && isspace(*str)) str++;
@@ -791,115 +1014,16 @@

-
+#ifdef SUPPORT_PCRE8
 /*************************************************
-*            Convert UTF-8 string to value       *
+*         Print 8-bit character string           *
 *************************************************/

-/* This function takes one or more bytes that represents a UTF-8 character,
-and returns the value of the character.
+/* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
+If handed a NULL file, just counts chars without printing. */

-Argument:
-  utf8bytes   a pointer to the byte vector
-  vptr        a pointer to an int to receive the value
-
-Returns:      >  0 => the number of bytes consumed
-              -6 to 0 => malformed UTF-8 character at offset = (-return)
-*/
-
-#if !defined NOUTF8
-
-static int
-utf82ord(unsigned char *utf8bytes, int *vptr)
+static int pchars(pcre_uint8 *p, int length, FILE *f)
 {
-int c = *utf8bytes++;
-int d = c;
-int i, j, s;
-
-for (i = -1; i < 6; i++)               /* i is number of additional bytes */
-  {
-  if ((d & 0x80) == 0) break;
-  d <<= 1;
-  }
-
-if (i == -1) { *vptr = c; return 1; }  /* ascii character */
-if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
-
-/* i now has a value in the range 1-5 */
-
-s = 6*i;
-d = (c & utf8_table3[i]) << s;
-
-for (j = 0; j < i; j++)
-  {
-  c = *utf8bytes++;
-  if ((c & 0xc0) != 0x80) return -(j+1);
-  s -= 6;
-  d |= (c & 0x3f) << s;
-  }
-
-/* Check that encoding was the correct unique one */
-
-for (j = 0; j < utf8_table1_size; j++)
-  if (d <= utf8_table1[j]) break;
-if (j != i) return -(i+1);
-
-/* Valid value */
-
-*vptr = d;
-return i+1;
-}
-
-#endif
-
-
-
-/*************************************************
-*       Convert character value to UTF-8         *
-*************************************************/
-
-/* This function takes an integer value in the range 0 - 0x7fffffff
-and encodes it as a UTF-8 character in 0 to 6 bytes.
-
-Arguments:
-  cvalue     the character value
-  utf8bytes  pointer to buffer for result - at least 6 bytes long
-
-Returns:     number of characters placed in the buffer
-*/
-
-#if !defined NOUTF8
-
-static int
-ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
-{
-register int i, j;
-for (i = 0; i < utf8_table1_size; i++)
-  if (cvalue <= utf8_table1[i]) break;
-utf8bytes += i;
-for (j = i; j > 0; j--)
- {
- *utf8bytes-- = 0x80 | (cvalue & 0x3f);
- cvalue >>= 6;
- }
-*utf8bytes = utf8_table2[i] | cvalue;
-return i + 1;
-}
-
-#endif
-
-
-
-/*************************************************
-*             Print character string             *
-*************************************************/
-
-/* Character string printing function. Must handle UTF-8 strings in utf8
-mode. Yields number of characters printed. If handed a NULL file, just counts
-chars without printing. */
-
-static int pchars(unsigned char *p, int length, FILE *f)
-{
 int c = 0;
 int yield = 0;

@@ -914,7 +1038,7 @@
       {
       length -= rc - 1;
       p += rc;
-      if (PRINTHEX(c))
+      if (PRINTOK(c))
         {
         if (f != NULL) fprintf(f, "%c", c);
         yield++;
@@ -936,7 +1060,7 @@
    /* Not UTF-8, or malformed UTF-8  */

   c = *p++;
-  if (PRINTHEX(c))
+  if (PRINTOK(c))
     {
     if (f != NULL) fprintf(f, "%c", c);
     yield++;
@@ -950,10 +1074,69 @@

return yield;
}
+#endif

+#ifdef SUPPORT_PCRE16
 /*************************************************
+*           Print 16-bit character string        *
+*************************************************/
+
+/* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
+If handed a NULL file, just counts chars without printing. */
+
+static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
+{
+int yield = 0;
+
+while (length-- > 0)
+  {
+  int c = *p++ & 0xffff;
+  
+#if !defined NOUTF8
+  if (use_utf8 && c >= 0xD800 && c < 0xDC00 && length > 0)
+    {
+    int d = *p & 0xffff;
+    if (d >= 0xDC00 && d < 0xDFFF)
+      {
+      c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
+      length--;
+      p++; 
+      }
+    }   
+#endif
+
+  if (PRINTOK(c))
+    {
+    if (f != NULL) fprintf(f, "%c", c);
+    yield++;
+    }
+  else
+    {
+    yield += 4;
+    if (c < 0x100)
+      {
+      if (f != NULL) fprintf(f, "\\x%02x", c);
+      }
+    else
+      {
+      if (f != NULL) fprintf(f, "\\x{%02x}", c);
+      yield += (c <= 0x000000ff)? 2 :
+               (c <= 0x00000fff)? 3 :
+               (c <= 0x0000ffff)? 4 :
+               (c <= 0x000fffff)? 5 : 6;
+      }
+    }
+  }
+
+return yield;
+}
+#endif
+
+
+
+/*************************************************
 *              Callout function                  *
 *************************************************/

@@ -978,7 +1161,7 @@
     else
       {
       fprintf(f, "%2d: ", i/2);
-      (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
+      PCHARSV(cb->subject + cb->offset_vector[i],
         cb->offset_vector[i+1] - cb->offset_vector[i], f);
       fprintf(f, "\n");
       }
@@ -991,13 +1174,13 @@

if (f != NULL) fprintf(f, "--->");

-pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
-post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
+PCHARS(pre_start, cb->subject, cb->start_match, f);
+PCHARS(post_start, cb->subject + cb->start_match,
cb->current_position - cb->start_match, f);

-subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
+PCHARS(subject_length, cb->subject, cb->subject_length, NULL);

-(void)pchars((unsigned char *)(cb->subject + cb->current_position),
+PCHARSV(cb->subject + cb->current_position,
cb->subject_length - cb->current_position, f);

 if (f != NULL) fprintf(f, "\n");
@@ -1103,13 +1286,29 @@
 *          Call pcre_fullinfo()                  *
 *************************************************/

-/* Get one piece of information from the pcre_fullinfo() function */
+/* Get one piece of information from the pcre_fullinfo() function. When only
+one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
+value, but the code is defensive. */

static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
{
int rc;
-if ((rc = (fullinfo)(re, study, option, ptr)) < 0)
- fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
+
+if (use_pcre16)
+#ifdef SUPPORT_PCRE16
+ rc = pcre16_fullinfo(re, study, option, ptr);
+#else
+ rc = PCRE_ERROR_BADMODE;
+#endif
+else
+#ifdef SUPPORT_PCRE8
+ rc = pcre_fullinfo(re, study, option, ptr);
+#else
+ rc = PCRE_ERROR_BADMODE;
+#endif
+
+if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
+ use_pcre16? "16" : "", option);
}

@@ -1151,7 +1350,7 @@
{
*limit = mid;

-  count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
+  PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
     use_offsets, use_size_offsets);

if (count == errnumber)
@@ -1313,7 +1512,6 @@
int debug = 0;
int done = 0;
int all_use_dfa = 0;
-int use_pcre16 = 0;
int yield = 0;
int stack_size;

@@ -1329,7 +1527,7 @@
pcre_uchar *getnamesptr;

/* Get buffers from malloc() so that valgrind will check their misuse when
-debugging. They grow automatically when very long lines are read. The 16-bit
+debugging. They grow automatically when very long lines are read. The 16-bit
buffer (buffer16) is obtained only if needed. */

buffer = (pcre_uint8 *)malloc(buffer_size);
@@ -1353,16 +1551,19 @@

while (argc > 1 && argv[op][0] == '-')
{
- unsigned char *endptr;
+ pcre_uint8 *endptr;

-  if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
-  else if (strcmp(argv[op], "-m") == 0) showstore = 1;
+  if (strcmp(argv[op], "-m") == 0) showstore = 1;
   else if (strcmp(argv[op], "-s") == 0) force_study = 0;
   else if (strcmp(argv[op], "-s+") == 0)
     {
     force_study = 1;
     force_study_options = PCRE_STUDY_JIT_COMPILE;
     }
+#ifdef SUPPORT_PCRE16
+  else if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
+#endif
+
   else if (strcmp(argv[op], "-q") == 0) quiet = 1;
   else if (strcmp(argv[op], "-b") == 0) debug = 1;
   else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
@@ -1372,7 +1573,7 @@
   else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
 #endif
   else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
-      ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
+      ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
         *endptr == 0))
     {
     op++;
@@ -1382,7 +1583,7 @@
     {
     int both = argv[op][2] == 0;
     int temp;
-    if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
+    if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
                      *endptr == 0))
       {
       timeitm = temp;
@@ -1393,7 +1594,7 @@
     if (both) timeit = timeitm;
     }
   else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
-      ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
+      ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
         *endptr == 0))
     {
 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
@@ -1423,25 +1624,27 @@
     unsigned long int lrc;
     printf("PCRE version %s\n", pcre_version());
     printf("Compiled with\n");
-    
-/* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. */

+/* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
+are set, either both UTFs are supported or both are not supported. */
+
 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
     printf("  8-bit and 16-bit support\n");
     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
-    printf("  %sUTF-8 support\n", rc? "" : "No ");
-    (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
-    printf("  %sUTF-16 support\n", rc? "" : "No ");
+    if (rc)
+      printf("  UTF-8 and UTF-16 support\n");
+    else 
+      printf("  No UTF-8 or UTF-16 support\n");
 #elif defined SUPPORT_PCRE8
     printf("  8-bit support only\n");
     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
     printf("  %sUTF-8 support\n", rc? "" : "No ");
-#else    
+#else
     printf("  16-bit support only\n");
     (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
     printf("  %sUTF-16 support\n", rc? "" : "No ");
-#endif     
- 
+#endif
+
     (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
     printf("  %sUnicode properties support\n", rc? "" : "No ");
     (void)pcre_config(PCRE_CONFIG_JIT, &rc);
@@ -1488,10 +1691,6 @@
   argc--;
   }

-/* Select which fullinfo function to use. */
-
-fullinfo = use_pcre16? pcre16_fullinfo : pcre_fullinfo;
-
/* Get the store for the offsets vector, and remember what it was */

size_offsets_max = size_offsets;
@@ -1561,10 +1760,10 @@
#endif

const char *error;
- unsigned char *markptr;
- unsigned char *p, *pp, *ppp;
- unsigned char *to_file = NULL;
- const unsigned char *tables = NULL;
+ pcre_uint8 *markptr;
+ pcre_uint8 *p, *pp, *ppp;
+ pcre_uint8 *to_file = NULL;
+ const pcre_uint8 *tables = NULL;
unsigned long int true_size, true_study_size = 0;
size_t size, regex_gotten_store;
int do_allcaps = 0;
@@ -1898,15 +2097,15 @@

     {
     unsigned long int get_options;
-    
-    /* In 16-bit mode, convert the input. The space needed for a non-UTF string 
-    is exactly double the 8-bit size. For a UTF-8 string, the size needed for 
-    UTF-16 is no more than double, because up to 0xffff uses no more than 3
-    bytes in UTF-8 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8
-    and up to 4 bytes in UTF-16. */
-    
+
+    /* In 16-bit mode, convert the input. */
+
 #ifdef SUPPORT_PCRE16
-    if (use_pcre16) (void)to16(p, options & PCRE_UTF8);
+    if (use_pcre16) 
+      {
+      (void)to16(p, options & PCRE_UTF8, (int)strlen((char *)p));
+      p = (pcre_uint8 *)buffer16; 
+      } 
 #endif

     /* Compile many times when timing */
@@ -1918,12 +2117,7 @@
       clock_t start_time = clock();
       for (i = 0; i < timeit; i++)
         {
-#ifdef SUPPORT_PCRE16
-        if (use_pcre16)         
-          re = pcre16_compile((PCRE_SPTR16)buffer16, options, &error, &erroroffset, tables);
-        else 
-#endif         
-          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
+        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
         if (re != NULL) free(re);
         }
       time_taken = clock() - start_time;
@@ -1933,13 +2127,7 @@
       }

     first_gotten_store = 0;
-    
-#ifdef SUPPORT_PCRE16
-    if (use_pcre16) 
-      re = pcre16_compile((PCRE_SPTR16)buffer16, options, &error, &erroroffset, tables);
-    else
-#endif        
-      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
+    PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);

     /* Compilation failed; go back for another re, skipping to blank line
     if non-interactive. */
@@ -2001,21 +2189,15 @@
         clock_t start_time = clock();
         for (i = 0; i < timeit; i++)
           {
-          if (use_pcre16)  
-            extra = pcre16_study(re, study_options | force_study_options, &error);
-          else
-            extra = pcre_study(re, study_options | force_study_options, &error);
-          } 
+          PCRE_STUDY(extra, re, study_options | force_study_options, &error);
+          }
         time_taken = clock() - start_time;
         if (extra != NULL) pcre_free_study(extra);
         fprintf(outfile, "  Study time %.4f milliseconds\n",
           (((double)time_taken * 1000.0) / (double)timeit) /
             (double)CLOCKS_PER_SEC);
         }
-      if (use_pcre16)   
-        extra = pcre16_study(re, study_options | force_study_options, &error);
-      else   
-        extra = pcre_study(re, study_options | force_study_options, &error);
+      PCRE_STUDY(extra, re, study_options | force_study_options, &error);
       if (error != NULL)
         fprintf(outfile, "Failed to study: %s\n", error);
       else if (extra != NULL)
@@ -2090,7 +2272,7 @@
       fprintf(outfile, "------------------------------------------------------------------\n");
       if (use_pcre16)
         pcre16_printint(re, outfile, debug_lengths);
-      else   
+      else
         pcre_printint(re, outfile, debug_lengths);
       }

@@ -2121,10 +2303,10 @@

       /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
       that it gives the same results as the new function. */
-        
+
 #if !defined NOINFOCHECK
       if (!use_pcre16)
-        { 
+        {
         old_count = pcre_info(re, &old_options, &old_first_char);
         if (count < 0) fprintf(outfile,
           "Error %d from pcre_info()\n", count);
@@ -2133,16 +2315,16 @@
           if (old_count != count) fprintf(outfile,
             "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
               old_count);
-        
+
           if (old_first_char != first_char) fprintf(outfile,
             "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
               first_char, old_first_char);
-        
+
           if (old_options != (int)get_options) fprintf(outfile,
             "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
               get_options, old_options);
           }
-        }   
+        }
 #endif

       if (size != regex_gotten_store) fprintf(outfile,
@@ -2233,7 +2415,7 @@
           ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
           "" : " (caseless)";

-        if (PRINTHEX(first_char))
+        if (PRINTOK(first_char))
           fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
         else
           fprintf(outfile, "First char = %d%s\n", first_char, caseless);
@@ -2249,7 +2431,7 @@
           ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
           "" : " (caseless)";

-        if (PRINTHEX(need_char))
+        if (PRINTOK(need_char))
           fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
         else
           fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
@@ -2292,7 +2474,7 @@
                   fprintf(outfile, "\n  ");
                   c = 2;
                   }
-                if (PRINTHEX(i) && i != ' ')
+                if (PRINTOK(i) && i != ' ')
                   {
                   fprintf(outfile, "%c ", i);
                   c += 2;
@@ -2479,7 +2661,7 @@
 #if !defined NOUTF8
         if (use_utf8 && c > 255)
           {
-          unsigned char buff8[8];
+          pcre_uint8 buff8[8];
           int ii, utn;
           utn = ord2utf8(c, buff8);
           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
@@ -2495,7 +2677,7 @@
 #if !defined NOUTF8
         if (*p == '{')
           {
-          unsigned char *pt = p;
+          pcre_uint8 *pt = p;
           c = 0;

           /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
@@ -2507,7 +2689,7 @@
             c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
           if (*pt == '}')
             {
-            unsigned char buff8[8];
+            pcre_uint8 buff8[8];
             int ii, utn;
             if (use_utf8)
               {
@@ -2817,13 +2999,13 @@
           if (pmatch[i].rm_so >= 0)
             {
             fprintf(outfile, "%2d: ", (int)i);
-            (void)pchars(dbuffer + pmatch[i].rm_so,
+            PCHARSV(dbuffer + pmatch[i].rm_so,
               pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
             fprintf(outfile, "\n");
             if (do_showcaprest || (i == 0 && do_showrest))
               {
               fprintf(outfile, "%2d+ ", (int)i);
-              (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
+              PCHARSV(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
                 outfile);
               fprintf(outfile, "\n");
               }
@@ -2831,12 +3013,20 @@
           }
         }
       free(pmatch);
+      goto NEXT_DATA;
       }

+#endif  /* !defined NOPOSIX */
+
     /* Handle matching via the native interface - repeats for /g and /G */

-    else
-#endif  /* !defined NOPOSIX */
+#ifdef SUPPORT_PCRE16
+    if (use_pcre16) 
+      {
+      len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
+      bptr = (pcre_uint8 *)buffer16;
+      }  
+#endif

     for (;; gmatched++)    /* Loop for /g or /G */
       {
@@ -2847,11 +3037,6 @@
         register int i;
         clock_t time_taken;
         clock_t start_time = clock();
-        
-#ifdef SUPPORT_PCRE16
-        if (use_pcre16) len = to16(bptr, options & PCRE_UTF8);
-#endif
-

 #if !defined NODFA
         if (all_use_dfa || use_dfa)
@@ -2866,9 +3051,10 @@
 #endif

         for (i = 0; i < timeitm; i++)
-          count = pcre_exec(re, extra, (char *)bptr, len,
+          {
+          PCRE_EXEC(count, re, extra, bptr, len,
             start_offset, options | g_notempty, use_offsets, use_size_offsets);
-
+          }
         time_taken = clock() - start_time;
         fprintf(outfile, "Execute time %.4f milliseconds\n",
           (((double)time_taken * 1000.0) / (double)timeitm) /
@@ -2913,7 +3099,7 @@
           }
         extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
         extra->callout_data = &callout_data;
-        count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
+        PCRE_EXEC(count, re, extra, bptr, len, start_offset,
           options | g_notempty, use_offsets, use_size_offsets);
         extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
         }
@@ -2938,12 +3124,8 @@

       else
         {
-        if (use_pcre16) 
-          count = pcre16_exec(re, extra, (PCRE_SPTR16)buffer16, len,
-            start_offset, options | g_notempty, use_offsets, use_size_offsets);
-        else     
-          count = pcre_exec(re, extra, (char *)bptr, len,
-            start_offset, options | g_notempty, use_offsets, use_size_offsets);
+        PCRE_EXEC(count, re, extra, bptr, len, start_offset,
+          options | g_notempty, use_offsets, use_size_offsets);
         if (count == 0)
           {
           fprintf(outfile, "Matched, but too many substrings\n");
@@ -3004,13 +3186,13 @@
           else
             {
             fprintf(outfile, "%2d: ", i/2);
-            (void)pchars(bptr + use_offsets[i],
+            PCHARSV(bptr + use_offsets[i],
               use_offsets[i+1] - use_offsets[i], outfile);
             fprintf(outfile, "\n");
             if (do_showcaprest || (i == 0 && do_showrest))
               {
               fprintf(outfile, "%2d+ ", i/2);
-              (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
+              PCHARSV(bptr + use_offsets[i+1], len - use_offsets[i+1],
                 outfile);
               fprintf(outfile, "\n");
               }
@@ -3106,7 +3288,7 @@
         if (use_size_offsets > 1)
           {
           fprintf(outfile, ": ");
-          pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
+          PCHARSV(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
             outfile);
           }
         fprintf(outfile, "\n");

Diese Nachricht ist Teil des folgenden Threads:
	Der komplette Thread sortiert nach Datum

[Pcre-svn] [808] code/branches/pcre16: Commit progress so fa…