[Pcre-svn] [805] code/branches/pcre16: Preliminary work on pcretest for 16-bit ( not complete by any means).

Author: Subversion repository
Date:
To: pcre-svn
Subject: [Pcre-svn] [805] code/branches/pcre16: Preliminary work on pcretest for 16-bit ( not complete by any means).

Revision: 805

          http://vcs.pcre.org/viewvc?view=rev&revision=805
Author:   ph10
Date:     2011-12-14 16:49:20 +0000 (Wed, 14 Dec 2011)

Log Message:
-----------
Preliminary work on pcretest for 16-bit (not complete by any means).

Modified Paths:
--------------
    code/branches/pcre16/Makefile.am
    code/branches/pcre16/pcre_compile.c
    code/branches/pcre16/pcre_config.c
    code/branches/pcre16/pcretest.c

Added Paths:
-----------
    code/branches/pcre16/pcre16_printint.c
    code/branches/pcre16/pcre_printint.c

Removed Paths:
-------------
    code/branches/pcre16/pcre_printint.src

Modified: code/branches/pcre16/Makefile.am
===================================================================
--- code/branches/pcre16/Makefile.am    2011-12-14 11:18:01 UTC (rev 804)
+++ code/branches/pcre16/Makefile.am    2011-12-14 16:49:20 UTC (rev 805)
@@ -240,10 +240,9 @@

endif # WITH_PCRE16

-# The pcre_printint.src file is #included by some source files, so it must be
-# distributed. The pcre_chartables.c.dist file is the default version of
-# pcre_chartables.c, used unless --enable-rebuild-chartables is specified.
-EXTRA_DIST += pcre_printint.src pcre_chartables.c.dist
+# The pcre_chartables.c.dist file is the default version of pcre_chartables.c,
+# used unless --enable-rebuild-chartables is specified.
+EXTRA_DIST += pcre_chartables.c.dist

# The JIT compiler lives in a separate directory, but its files are #included
# when pcre_jit_compile.c is processed, so they must be distributed.
@@ -338,8 +337,13 @@
pcretest_SOURCES = pcretest.c
pcretest_LDADD = $(LIBREADLINE)
if WITH_PCRE8
+pcretest_SOURCES += pcre_printint.c
pcretest_LDADD += libpcreposix.la
endif # WITH_PCRE8
+if WITH_PCRE16
+pcretest_SOURCES += pcre16_printint.c
+pcretest_LDADD += libpcre16.la
+endif # WITH_PCRE16

TESTS += RunGrepTest
dist_noinst_SCRIPTS += RunGrepTest

Added: code/branches/pcre16/pcre16_printint.c
===================================================================
--- code/branches/pcre16/pcre16_printint.c                            (rev 0)
+++ code/branches/pcre16/pcre16_printint.c    2011-12-14 16:49:20 UTC (rev 805)
@@ -0,0 +1,45 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+           Copyright (c) 1997-2011 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 16 bit character support. */
+#define COMPILE_PCRE16
+
+#include "pcre_printint.c"
+
+/* End of pcre16_printint.c */

Modified: code/branches/pcre16/pcre_compile.c
===================================================================
--- code/branches/pcre16/pcre_compile.c    2011-12-14 11:18:01 UTC (rev 804)
+++ code/branches/pcre16/pcre_compile.c    2011-12-14 16:49:20 UTC (rev 805)
@@ -53,12 +53,13 @@
 #include "pcre_internal.h"

-/* When PCRE_DEBUG is defined, we need the pcre_printint() function, which is
-also used by pcretest. PCRE_DEBUG is not defined when building a production
-library. */
+/* When PCRE_DEBUG is defined, we need the pcre(16)_printint() function, which
+is also used by pcretest. PCRE_DEBUG is not defined when building a production
+library. We do not need to select pcre16_printint.c specially, because the
+COMPILE_PCREx macro will already be appropriately set. */

#ifdef PCRE_DEBUG
-#include "pcre_printint.src"
+#include "pcre_printint.c"
#endif

@@ -8061,7 +8062,11 @@
     else printf("Req char = \\x%02x%s\n", ch, caseless);
   }

+#ifdef COMPILE_PCRE8
pcre_printint(re, stdout, TRUE);
+#else
+pcre16_printint(re, stdout, TRUE);
+#endif

/* This check is done here in the debugging case so that the code that
was compiled can be seen. */

Modified: code/branches/pcre16/pcre_config.c
===================================================================
--- code/branches/pcre16/pcre_config.c    2011-12-14 11:18:01 UTC (rev 804)
+++ code/branches/pcre16/pcre_config.c    2011-12-14 16:49:20 UTC (rev 805)
@@ -73,20 +73,28 @@
 switch (what)
   {
   case PCRE_CONFIG_UTF8:
-#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+#if defined COMPILE_PCRE16
+  return PCRE_ERROR_BADOPTION;
+#else   
+#if defined SUPPORT_UTF
   *((int *)where) = 1;
 #else
   *((int *)where) = 0;
 #endif
   break;
+#endif

case PCRE_CONFIG_UTF16:
-#if defined SUPPORT_UTF && defined COMPILE_PCRE16
+#if defined COMPILE_PCRE8
+ return PCRE_ERROR_BADOPTION;
+#else
+#if defined SUPPORT_UTF
*((int *)where) = 1;
#else
*((int *)where) = 0;
#endif
break;
+#endif

case PCRE_CONFIG_UNICODE_PROPERTIES:
#ifdef SUPPORT_UCP

Added: code/branches/pcre16/pcre_printint.c
===================================================================
--- code/branches/pcre16/pcre_printint.c                            (rev 0)
+++ code/branches/pcre16/pcre_printint.c    2011-12-14 16:49:20 UTC (rev 805)
@@ -0,0 +1,696 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+           Copyright (c) 1997-2010 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains a PCRE private debugging function for printing out the
+internal form of a compiled regular expression, along with some supporting
+local functions. This source file is used in two places:
+
+(1) It is #included by pcre_compile.c when it is compiled in debugging mode
+(PCRE_DEBUG defined in pcre_internal.h). It is not included in production
+compiles.
+
+(2) It is also compiled separately and linked with pcretest.c, which can be
+asked to print out a compiled regex for debugging purposes. */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/* We have to include pcre_internal.h because we need the internal info for
+displaying the results of pcre_study() and we also need to know about the
+internal macros, structures, and other internal data values; pcretest has
+"inside information" compared to a program that strictly follows the PCRE API.
+
+Although pcre_internal.h does itself include pcre.h, we explicitly include it
+here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
+appropriately for an application, not for building PCRE. */
+
+#include "pcre.h"
+#include "pcre_internal.h"
+
+/* These are the funtions that are contained within. It doesn't seem worth
+having a separate .h file just for this. */
+
+#ifdef COMPILE_PCRE8
+void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
+#endif
+#ifdef COMPILE_PCRE16
+void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
+#endif
+
+/* Macro that decides whether a character should be output as a literal or in
+hexadecimal. We don't use isprint() because that can vary from system to system
+(even without the use of locales) and we want the output always to be the same,
+for testing purposes. */
+
+#ifdef EBCDIC
+#define PRINTABLE(c) ((c) >= 64 && (c) < 255)
+#else
+#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
+#endif
+
+/* The table of operator names. */
+
+static const char *OP_names[] = { OP_NAME_LIST };
+
+/* This table of operator lengths is not actually used by the working code,
+but its size is needed for a check that ensures it is the correct size for the
+number of opcodes (thus catching update omissions). */
+
+static const pcre_uint8 OP_lengths[] = { OP_LENGTHS };
+
+
+
+/*************************************************
+*       Print single- or multi-byte character    *
+*************************************************/
+
+static int
+print_char(FILE *f, pcre_uchar *ptr, BOOL utf)
+{
+int c = *ptr;
+
+#ifndef SUPPORT_UTF
+(void)utf;  /* Avoid compiler warning */
+if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
+return 0;
+
+#else
+
+#ifdef COMPILE_PCRE8
+
+if (!utf || (c & 0xc0) != 0xc0)
+  {
+  if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
+  return 0;
+  }
+else
+  {
+  int i;
+  int a = PRIV(utf8_table4)[c & 0x3f];  /* Number of additional bytes */
+  int s = 6*a;
+  c = (c & PRIV(utf8_table3)[a]) << s;
+  for (i = 1; i <= a; i++)
+    {
+    /* This is a check for malformed UTF-8; it should only occur if the sanity
+    check has been turned off. Rather than swallow random bytes, just stop if
+    we hit a bad one. Print it with \X instead of \x as an indication. */
+
+    if ((ptr[i] & 0xc0) != 0x80)
+      {
+      fprintf(f, "\\X{%x}", c);
+      return i - 1;
+      }
+
+    /* The byte is OK */
+
+    s -= 6;
+    c |= (ptr[i] & 0x3f) << s;
+    }
+  fprintf(f, "\\x{%x}", c);
+  return a;
+  }
+
+#else
+
+#ifdef COMPILE_PCRE16
+
+if (!utf || (c & 0xfc00) != 0xd800)
+  {
+  if (PRINTABLE(c)) fprintf(f, "%c", c);
+  else if (c <= 0xff) fprintf(f, "\\x%02x", c);
+  else fprintf(f, "\\x{%x}", c);
+  return 0;
+  }
+else
+  {
+  /* This is a check for malformed UTF-16; it should only occur if the sanity
+  check has been turned off. Rather than swallow a low surrogate, just stop if
+  we hit a bad one. Print it with \X instead of \x as an indication. */
+
+  if ((ptr[1] & 0xfc00) != 0xdc00)
+    {
+    fprintf(f, "\\X{%x}", c);
+    return 0;
+    }
+
+  c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
+  fprintf(f, "\\x{%x}", c);
+  return 1;
+  }
+
+#endif /* COMPILE_PCRE16 */
+
+#endif /* COMPILE_PCRE8 */
+
+#endif /* SUPPORT_UTF */
+}
+
+/*************************************************
+*  Print uchar string (regardless of utf)        *
+*************************************************/
+
+static void
+print_puchar(FILE *f, PCRE_PUCHAR ptr)
+{
+while (*ptr != '\0')
+  {
+  register int c = *ptr++;
+  if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
+  }
+}
+
+/*************************************************
+*          Find Unicode property name            *
+*************************************************/
+
+static const char *
+get_ucpname(int ptype, int pvalue)
+{
+#ifdef SUPPORT_UCP
+int i;
+for (i = PRIV(utt_size) - 1; i >= 0; i--)
+  {
+  if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break;
+  }
+return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??";
+#else
+/* It gets harder and harder to shut off unwanted compiler warnings. */
+ptype = ptype * pvalue;
+return (ptype == pvalue)? "??" : "??";
+#endif
+}
+
+
+
+/*************************************************
+*         Print compiled regex                   *
+*************************************************/
+
+/* Make this function work for a regex with integers either byte order.
+However, we assume that what we are passed is a compiled regex. The
+print_lengths flag controls whether offsets and lengths of items are printed.
+They can be turned off from pcretest so that automatic tests on bytecode can be
+written that do not depend on the value of LINK_SIZE. */
+
+#ifdef COMPILE_PCRE8
+void
+pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
+#else
+void
+pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths)
+#endif
+{
+real_pcre *re = (real_pcre *)external_re;
+pcre_uchar *codestart, *code;
+BOOL utf;
+
+unsigned int options = re->options;
+int offset = re->name_table_offset;
+int count = re->name_count;
+int size = re->name_entry_size;
+
+if (re->magic_number != MAGIC_NUMBER)
+  {
+  offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
+  count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
+  size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
+  options = ((options << 24) & 0xff000000) |
+            ((options <<  8) & 0x00ff0000) |
+            ((options >>  8) & 0x0000ff00) |
+            ((options >> 24) & 0x000000ff);
+  }
+
+code = codestart = (pcre_uchar *)re + offset + count * size;
+/* PCRE_UTF16 has the same value as PCRE_UTF8. */
+utf = (options & PCRE_UTF8) != 0;
+
+for(;;)
+  {
+  pcre_uchar *ccode;
+  const char *flag = "  ";
+  int c;
+  int extra = 0;
+
+  if (print_lengths)
+    fprintf(f, "%3d ", (int)(code - codestart));
+  else
+    fprintf(f, "    ");
+
+  switch(*code)
+    {
+/* ========================================================================== */
+      /* These cases are never obeyed. This is a fudge that causes a compile-
+      time error if the vectors OP_names or OP_lengths, which are indexed
+      by opcode, are not the correct length. It seems to be the only way to do
+      such a check at compile time, as the sizeof() operator does not work in
+      the C preprocessor. */
+
+      case OP_TABLE_LENGTH:
+      case OP_TABLE_LENGTH +
+        ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
+        (sizeof(OP_lengths) == OP_TABLE_LENGTH)):
+      break;
+/* ========================================================================== */
+
+    case OP_END:
+    fprintf(f, "    %s\n", OP_names[*code]);
+    fprintf(f, "------------------------------------------------------------------\n");
+    return;
+
+    case OP_CHAR:
+    fprintf(f, "    ");
+    do
+      {
+      code++;
+      code += 1 + print_char(f, code, utf);
+      }
+    while (*code == OP_CHAR);
+    fprintf(f, "\n");
+    continue;
+
+    case OP_CHARI:
+    fprintf(f, " /i ");
+    do
+      {
+      code++;
+      code += 1 + print_char(f, code, utf);
+      }
+    while (*code == OP_CHARI);
+    fprintf(f, "\n");
+    continue;
+
+    case OP_CBRA:
+    case OP_CBRAPOS:
+    case OP_SCBRA:
+    case OP_SCBRAPOS:
+    if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
+      else fprintf(f, "    ");
+    fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
+    break;
+
+    case OP_BRA:
+    case OP_BRAPOS:
+    case OP_SBRA:
+    case OP_SBRAPOS:
+    case OP_KETRMAX:
+    case OP_KETRMIN:
+    case OP_KETRPOS:
+    case OP_ALT:
+    case OP_KET:
+    case OP_ASSERT:
+    case OP_ASSERT_NOT:
+    case OP_ASSERTBACK:
+    case OP_ASSERTBACK_NOT:
+    case OP_ONCE:
+    case OP_ONCE_NC:
+    case OP_COND:
+    case OP_SCOND:
+    case OP_REVERSE:
+    if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
+      else fprintf(f, "    ");
+    fprintf(f, "%s", OP_names[*code]);
+    break;
+
+    case OP_CLOSE:
+    fprintf(f, "    %s %d", OP_names[*code], GET2(code, 1));
+    break;
+
+    case OP_CREF:
+    case OP_NCREF:
+    fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
+    break;
+
+    case OP_RREF:
+    c = GET2(code, 1);
+    if (c == RREF_ANY)
+      fprintf(f, "    Cond recurse any");
+    else
+      fprintf(f, "    Cond recurse %d", c);
+    break;
+
+    case OP_NRREF:
+    c = GET2(code, 1);
+    if (c == RREF_ANY)
+      fprintf(f, "    Cond nrecurse any");
+    else
+      fprintf(f, "    Cond nrecurse %d", c);
+    break;
+
+    case OP_DEF:
+    fprintf(f, "    Cond def");
+    break;
+
+    case OP_STARI:
+    case OP_MINSTARI:
+    case OP_POSSTARI:
+    case OP_PLUSI:
+    case OP_MINPLUSI:
+    case OP_POSPLUSI:
+    case OP_QUERYI:
+    case OP_MINQUERYI:
+    case OP_POSQUERYI:
+    flag = "/i";
+    /* Fall through */
+    case OP_STAR:
+    case OP_MINSTAR:
+    case OP_POSSTAR:
+    case OP_PLUS:
+    case OP_MINPLUS:
+    case OP_POSPLUS:
+    case OP_QUERY:
+    case OP_MINQUERY:
+    case OP_POSQUERY:
+    case OP_TYPESTAR:
+    case OP_TYPEMINSTAR:
+    case OP_TYPEPOSSTAR:
+    case OP_TYPEPLUS:
+    case OP_TYPEMINPLUS:
+    case OP_TYPEPOSPLUS:
+    case OP_TYPEQUERY:
+    case OP_TYPEMINQUERY:
+    case OP_TYPEPOSQUERY:
+    fprintf(f, " %s ", flag);
+    if (*code >= OP_TYPESTAR)
+      {
+      fprintf(f, "%s", OP_names[code[1]]);
+      if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
+        {
+        fprintf(f, " %s ", get_ucpname(code[2], code[3]));
+        extra = 2;
+        }
+      }
+    else extra = print_char(f, code+1, utf);
+    fprintf(f, "%s", OP_names[*code]);
+    break;
+
+    case OP_EXACTI:
+    case OP_UPTOI:
+    case OP_MINUPTOI:
+    case OP_POSUPTOI:
+    flag = "/i";
+    /* Fall through */
+    case OP_EXACT:
+    case OP_UPTO:
+    case OP_MINUPTO:
+    case OP_POSUPTO:
+    fprintf(f, " %s ", flag);
+    extra = print_char(f, code + 1 + IMM2_SIZE, utf);
+    fprintf(f, "{");
+    if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
+    fprintf(f, "%d}", GET2(code,1));
+    if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
+      else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
+    break;
+
+    case OP_TYPEEXACT:
+    case OP_TYPEUPTO:
+    case OP_TYPEMINUPTO:
+    case OP_TYPEPOSUPTO:
+    fprintf(f, "    %s", OP_names[code[1 + IMM2_SIZE]]);
+    if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
+      {
+      fprintf(f, " %s ", get_ucpname(code[1 + IMM2_SIZE + 1],
+        code[1 + IMM2_SIZE + 2]));
+      extra = 2;
+      }
+    fprintf(f, "{");
+    if (*code != OP_TYPEEXACT) fprintf(f, "0,");
+    fprintf(f, "%d}", GET2(code,1));
+    if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
+      else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
+    break;
+
+    case OP_NOTI:
+    flag = "/i";
+    /* Fall through */
+    case OP_NOT:
+    c = code[1];
+    if (PRINTABLE(c)) fprintf(f, " %s [^%c]", flag, c);
+      else fprintf(f, " %s [^\\x%02x]", flag, c);
+    break;
+
+    case OP_NOTSTARI:
+    case OP_NOTMINSTARI:
+    case OP_NOTPOSSTARI:
+    case OP_NOTPLUSI:
+    case OP_NOTMINPLUSI:
+    case OP_NOTPOSPLUSI:
+    case OP_NOTQUERYI:
+    case OP_NOTMINQUERYI:
+    case OP_NOTPOSQUERYI:
+    flag = "/i";
+    /* Fall through */
+
+    case OP_NOTSTAR:
+    case OP_NOTMINSTAR:
+    case OP_NOTPOSSTAR:
+    case OP_NOTPLUS:
+    case OP_NOTMINPLUS:
+    case OP_NOTPOSPLUS:
+    case OP_NOTQUERY:
+    case OP_NOTMINQUERY:
+    case OP_NOTPOSQUERY:
+    c = code[1];
+    if (PRINTABLE(c)) fprintf(f, " %s [^%c]", flag, c);
+      else fprintf(f, " %s [^\\x%02x]", flag, c);
+    fprintf(f, "%s", OP_names[*code]);
+    break;
+
+    case OP_NOTEXACTI:
+    case OP_NOTUPTOI:
+    case OP_NOTMINUPTOI:
+    case OP_NOTPOSUPTOI:
+    flag = "/i";
+    /* Fall through */
+
+    case OP_NOTEXACT:
+    case OP_NOTUPTO:
+    case OP_NOTMINUPTO:
+    case OP_NOTPOSUPTO:
+    c = code[1 + IMM2_SIZE];
+    if (PRINTABLE(c)) fprintf(f, " %s [^%c]{", flag, c);
+      else fprintf(f, " %s [^\\x%02x]{", flag, c);
+    if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
+    fprintf(f, "%d}", GET2(code,1));
+    if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
+      else
+    if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
+    break;
+
+    case OP_RECURSE:
+    if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
+      else fprintf(f, "    ");
+    fprintf(f, "%s", OP_names[*code]);
+    break;
+
+    case OP_REFI:
+    flag = "/i";
+    /* Fall through */
+    case OP_REF:
+    fprintf(f, " %s \\%d", flag, GET2(code,1));
+    ccode = code + PRIV(OP_lengths)[*code];
+    goto CLASS_REF_REPEAT;
+
+    case OP_CALLOUT:
+    fprintf(f, "    %s %d %d %d", OP_names[*code], code[1], GET(code,2),
+      GET(code, 2 + LINK_SIZE));
+    break;
+
+    case OP_PROP:
+    case OP_NOTPROP:
+    fprintf(f, "    %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
+    break;
+
+    /* OP_XCLASS can only occur in UTF or PCRE16 modes. However, there's no
+    harm in having this code always here, and it makes it less messy without
+    all those #ifdefs. */
+
+    case OP_CLASS:
+    case OP_NCLASS:
+    case OP_XCLASS:
+      {
+      int i, min, max;
+      BOOL printmap;
+      pcre_uint8 *map;
+
+      fprintf(f, "    [");
+
+      if (*code == OP_XCLASS)
+        {
+        extra = GET(code, 1);
+        ccode = code + LINK_SIZE + 1;
+        printmap = (*ccode & XCL_MAP) != 0;
+        if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
+        }
+      else
+        {
+        printmap = TRUE;
+        ccode = code + 1;
+        }
+
+      /* Print a bit map */
+
+      if (printmap)
+        {
+        map = (pcre_uint8 *)ccode;
+        for (i = 0; i < 256; i++)
+          {
+          if ((map[i/8] & (1 << (i&7))) != 0)
+            {
+            int j;
+            for (j = i+1; j < 256; j++)
+              if ((map[j/8] & (1 << (j&7))) == 0) break;
+            if (i == '-' || i == ']') fprintf(f, "\\");
+            if (PRINTABLE(i)) fprintf(f, "%c", i);
+              else fprintf(f, "\\x%02x", i);
+            if (--j > i)
+              {
+              if (j != i + 1) fprintf(f, "-");
+              if (j == '-' || j == ']') fprintf(f, "\\");
+              if (PRINTABLE(j)) fprintf(f, "%c", j);
+                else fprintf(f, "\\x%02x", j);
+              }
+            i = j;
+            }
+          }
+        ccode += 32 / sizeof(pcre_uchar);
+        }
+
+      /* For an XCLASS there is always some additional data */
+
+      if (*code == OP_XCLASS)
+        {
+        int ch;
+        while ((ch = *ccode++) != XCL_END)
+          {
+          if (ch == XCL_PROP)
+            {
+            int ptype = *ccode++;
+            int pvalue = *ccode++;
+            fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
+            }
+          else if (ch == XCL_NOTPROP)
+            {
+            int ptype = *ccode++;
+            int pvalue = *ccode++;
+            fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
+            }
+          else
+            {
+            ccode += 1 + print_char(f, ccode, TRUE);
+            if (ch == XCL_RANGE)
+              {
+              fprintf(f, "-");
+              ccode += 1 + print_char(f, ccode, TRUE);
+              }
+            }
+          }
+        }
+
+      /* Indicate a non-UTF class which was created by negation */
+
+      fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
+
+      /* Handle repeats after a class or a back reference */
+
+      CLASS_REF_REPEAT:
+      switch(*ccode)
+        {
+        case OP_CRSTAR:
+        case OP_CRMINSTAR:
+        case OP_CRPLUS:
+        case OP_CRMINPLUS:
+        case OP_CRQUERY:
+        case OP_CRMINQUERY:
+        fprintf(f, "%s", OP_names[*ccode]);
+        extra += PRIV(OP_lengths)[*ccode];
+        break;
+
+        case OP_CRRANGE:
+        case OP_CRMINRANGE:
+        min = GET2(ccode,1);
+        max = GET2(ccode,1 + IMM2_SIZE);
+        if (max == 0) fprintf(f, "{%d,}", min);
+        else fprintf(f, "{%d,%d}", min, max);
+        if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
+        extra += PRIV(OP_lengths)[*ccode];
+        break;
+
+        /* Do nothing if it's not a repeat; this code stops picky compilers
+        warning about the lack of a default code path. */
+
+        default:
+        break;
+        }
+      }
+    break;
+
+    case OP_MARK:
+    case OP_PRUNE_ARG:
+    case OP_SKIP_ARG:
+    case OP_THEN_ARG:
+    fprintf(f, "    %s ", OP_names[*code]);
+    print_puchar(f, code + 2);
+    extra += code[1];
+    break;
+
+    case OP_THEN:
+    fprintf(f, "    %s", OP_names[*code]);
+    break;
+
+    case OP_CIRCM:
+    case OP_DOLLM:
+    flag = "/m";
+    /* Fall through */
+
+    /* Anything else is just an item with no data, but possibly a flag. */
+
+    default:
+    fprintf(f, " %s %s", flag, OP_names[*code]);
+    break;
+    }
+
+  code += PRIV(OP_lengths)[*code] + extra;
+  fprintf(f, "\n");
+  }
+}
+
+/* End of pcre_printint.src */

Deleted: code/branches/pcre16/pcre_printint.src
===================================================================
--- code/branches/pcre16/pcre_printint.src    2011-12-14 11:18:01 UTC (rev 804)
+++ code/branches/pcre16/pcre_printint.src    2011-12-14 16:49:20 UTC (rev 805)
@@ -1,665 +0,0 @@
-/*************************************************
-*      Perl-Compatible Regular Expressions       *
-*************************************************/
-
-/* PCRE is a library of functions to support regular expressions whose syntax
-and semantics are as close as possible to those of the Perl 5 language.
-
-                       Written by Philip Hazel
-           Copyright (c) 1997-2010 University of Cambridge
-
------------------------------------------------------------------------------
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-      this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-
-    * Neither the name of the University of Cambridge nor the names of its
-      contributors may be used to endorse or promote products derived from
-      this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
------------------------------------------------------------------------------
-*/
-
-
-/* This module contains a PCRE private debugging function for printing out the
-internal form of a compiled regular expression, along with some supporting
-local functions. This source file is used in two places:
-
-(1) It is #included by pcre_compile.c when it is compiled in debugging mode
-(PCRE_DEBUG defined in pcre_internal.h). It is not included in production
-compiles.
-
-(2) It is always #included by pcretest.c, which can be asked to print out a
-compiled regex for debugging purposes. */
-
-
-/* Macro that decides whether a character should be output as a literal or in
-hexadecimal. We don't use isprint() because that can vary from system to system
-(even without the use of locales) and we want the output always to be the same,
-for testing purposes. This macro is used in pcretest as well as in this file. */
-
-#ifdef EBCDIC
-#define PRINTABLE(c) ((c) >= 64 && (c) < 255)
-#else
-#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
-#endif
-
-/* The table of operator names. */
-
-static const char *OP_names[] = { OP_NAME_LIST };
-
-
-
-/*************************************************
-*       Print single- or multi-byte character    *
-*************************************************/
-
-static int
-print_char(FILE *f, pcre_uchar *ptr, BOOL utf)
-{
-int c = *ptr;
-
-#ifndef SUPPORT_UTF
-(void)utf;  /* Avoid compiler warning */
-if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
-return 0;
-
-#else
-
-#ifdef COMPILE_PCRE8
-
-if (!utf || (c & 0xc0) != 0xc0)
-  {
-  if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
-  return 0;
-  }
-else
-  {
-  int i;
-  int a = PRIV(utf8_table4)[c & 0x3f];  /* Number of additional bytes */
-  int s = 6*a;
-  c = (c & PRIV(utf8_table3)[a]) << s;
-  for (i = 1; i <= a; i++)
-    {
-    /* This is a check for malformed UTF-8; it should only occur if the sanity
-    check has been turned off. Rather than swallow random bytes, just stop if
-    we hit a bad one. Print it with \X instead of \x as an indication. */
-
-    if ((ptr[i] & 0xc0) != 0x80)
-      {
-      fprintf(f, "\\X{%x}", c);
-      return i - 1;
-      }
-
-    /* The byte is OK */
-
-    s -= 6;
-    c |= (ptr[i] & 0x3f) << s;
-    }
-  fprintf(f, "\\x{%x}", c);
-  return a;
-  }
-
-#else
-
-#ifdef COMPILE_PCRE16
-
-if (!utf || (c & 0xfc00) != 0xd800)
-  {
-  if (PRINTABLE(c)) fprintf(f, "%c", c);
-  else if (c <= 0xff) fprintf(f, "\\x%02x", c);
-  else fprintf(f, "\\x{%x}", c);
-  return 0;
-  }
-else
-  {
-  /* This is a check for malformed UTF-16; it should only occur if the sanity
-  check has been turned off. Rather than swallow a low surrogate, just stop if
-  we hit a bad one. Print it with \X instead of \x as an indication. */
-
-  if ((ptr[1] & 0xfc00) != 0xdc00)
-    {
-    fprintf(f, "\\X{%x}", c);
-    return 0;
-    }
-
-  c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
-  fprintf(f, "\\x{%x}", c);
-  return 1;
-  }
-
-#endif /* COMPILE_PCRE16 */
-
-#endif /* COMPILE_PCRE8 */
-
-#endif /* SUPPORT_UTF */
-}
-
-/*************************************************
-*  Print uchar string (regardless of utf)        *
-*************************************************/
-
-static void
-print_puchar(FILE *f, PCRE_PUCHAR ptr)
-{
-while (*ptr != '\0')
-  {
-  register int c = *ptr++;
-  if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
-  }
-}
-
-/*************************************************
-*          Find Unicode property name            *
-*************************************************/
-
-static const char *
-get_ucpname(int ptype, int pvalue)
-{
-#ifdef SUPPORT_UCP
-int i;
-for (i = PRIV(utt_size) - 1; i >= 0; i--)
-  {
-  if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break;
-  }
-return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??";
-#else
-/* It gets harder and harder to shut off unwanted compiler warnings. */
-ptype = ptype * pvalue;
-return (ptype == pvalue)? "??" : "??";
-#endif
-}
-
-
-
-/*************************************************
-*         Print compiled regex                   *
-*************************************************/
-
-/* Make this function work for a regex with integers either byte order.
-However, we assume that what we are passed is a compiled regex. The
-print_lengths flag controls whether offsets and lengths of items are printed.
-They can be turned off from pcretest so that automatic tests on bytecode can be
-written that do not depend on the value of LINK_SIZE. */
-
-static void
-pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
-{
-real_pcre *re = (real_pcre *)external_re;
-pcre_uchar *codestart, *code;
-BOOL utf;
-
-unsigned int options = re->options;
-int offset = re->name_table_offset;
-int count = re->name_count;
-int size = re->name_entry_size;
-
-if (re->magic_number != MAGIC_NUMBER)
-  {
-  offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
-  count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
-  size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
-  options = ((options << 24) & 0xff000000) |
-            ((options <<  8) & 0x00ff0000) |
-            ((options >>  8) & 0x0000ff00) |
-            ((options >> 24) & 0x000000ff);
-  }
-
-code = codestart = (pcre_uchar *)re + offset + count * size;
-/* PCRE_UTF16 has the same value as PCRE_UTF8. */
-utf = (options & PCRE_UTF8) != 0;
-
-for(;;)
-  {
-  pcre_uchar *ccode;
-  const char *flag = "  ";
-  int c;
-  int extra = 0;
-
-  if (print_lengths)
-    fprintf(f, "%3d ", (int)(code - codestart));
-  else
-    fprintf(f, "    ");
-
-  switch(*code)
-    {
-/* ========================================================================== */
-      /* These cases are never obeyed. This is a fudge that causes a compile-
-      time error if the vectors OP_names or PRIV(OP_lengths), which are indexed
-      by opcode, are not the correct length. It seems to be the only way to do
-      such a check at compile time, as the sizeof() operator does not work in
-      the C preprocessor. We do this while compiling pcretest, because that
-      #includes pcre_tables.c, which holds PRIV(OP_lengths). We can't do this
-      when building pcre_compile.c with PCRE_DEBUG set, because it doesn't then
-      know the size of PRIV(OP_lengths). */
-
-#ifdef COMPILING_PCRETEST
-      case OP_TABLE_LENGTH:
-      case OP_TABLE_LENGTH +
-        ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
-        (sizeof(PRIV(OP_lengths)) == OP_TABLE_LENGTH)):
-      break;
-#endif
-/* ========================================================================== */
-
-    case OP_END:
-    fprintf(f, "    %s\n", OP_names[*code]);
-    fprintf(f, "------------------------------------------------------------------\n");
-    return;
-
-    case OP_CHAR:
-    fprintf(f, "    ");
-    do
-      {
-      code++;
-      code += 1 + print_char(f, code, utf);
-      }
-    while (*code == OP_CHAR);
-    fprintf(f, "\n");
-    continue;
-
-    case OP_CHARI:
-    fprintf(f, " /i ");
-    do
-      {
-      code++;
-      code += 1 + print_char(f, code, utf);
-      }
-    while (*code == OP_CHARI);
-    fprintf(f, "\n");
-    continue;
-
-    case OP_CBRA:
-    case OP_CBRAPOS:
-    case OP_SCBRA:
-    case OP_SCBRAPOS:
-    if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
-      else fprintf(f, "    ");
-    fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
-    break;
-
-    case OP_BRA:
-    case OP_BRAPOS:
-    case OP_SBRA:
-    case OP_SBRAPOS:
-    case OP_KETRMAX:
-    case OP_KETRMIN:
-    case OP_KETRPOS:
-    case OP_ALT:
-    case OP_KET:
-    case OP_ASSERT:
-    case OP_ASSERT_NOT:
-    case OP_ASSERTBACK:
-    case OP_ASSERTBACK_NOT:
-    case OP_ONCE:
-    case OP_ONCE_NC:
-    case OP_COND:
-    case OP_SCOND:
-    case OP_REVERSE:
-    if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
-      else fprintf(f, "    ");
-    fprintf(f, "%s", OP_names[*code]);
-    break;
-
-    case OP_CLOSE:
-    fprintf(f, "    %s %d", OP_names[*code], GET2(code, 1));
-    break;
-
-    case OP_CREF:
-    case OP_NCREF:
-    fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
-    break;
-
-    case OP_RREF:
-    c = GET2(code, 1);
-    if (c == RREF_ANY)
-      fprintf(f, "    Cond recurse any");
-    else
-      fprintf(f, "    Cond recurse %d", c);
-    break;
-
-    case OP_NRREF:
-    c = GET2(code, 1);
-    if (c == RREF_ANY)
-      fprintf(f, "    Cond nrecurse any");
-    else
-      fprintf(f, "    Cond nrecurse %d", c);
-    break;
-
-    case OP_DEF:
-    fprintf(f, "    Cond def");
-    break;
-
-    case OP_STARI:
-    case OP_MINSTARI:
-    case OP_POSSTARI:
-    case OP_PLUSI:
-    case OP_MINPLUSI:
-    case OP_POSPLUSI:
-    case OP_QUERYI:
-    case OP_MINQUERYI:
-    case OP_POSQUERYI:
-    flag = "/i";
-    /* Fall through */
-    case OP_STAR:
-    case OP_MINSTAR:
-    case OP_POSSTAR:
-    case OP_PLUS:
-    case OP_MINPLUS:
-    case OP_POSPLUS:
-    case OP_QUERY:
-    case OP_MINQUERY:
-    case OP_POSQUERY:
-    case OP_TYPESTAR:
-    case OP_TYPEMINSTAR:
-    case OP_TYPEPOSSTAR:
-    case OP_TYPEPLUS:
-    case OP_TYPEMINPLUS:
-    case OP_TYPEPOSPLUS:
-    case OP_TYPEQUERY:
-    case OP_TYPEMINQUERY:
-    case OP_TYPEPOSQUERY:
-    fprintf(f, " %s ", flag);
-    if (*code >= OP_TYPESTAR)
-      {
-      fprintf(f, "%s", OP_names[code[1]]);
-      if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
-        {
-        fprintf(f, " %s ", get_ucpname(code[2], code[3]));
-        extra = 2;
-        }
-      }
-    else extra = print_char(f, code+1, utf);
-    fprintf(f, "%s", OP_names[*code]);
-    break;
-
-    case OP_EXACTI:
-    case OP_UPTOI:
-    case OP_MINUPTOI:
-    case OP_POSUPTOI:
-    flag = "/i";
-    /* Fall through */
-    case OP_EXACT:
-    case OP_UPTO:
-    case OP_MINUPTO:
-    case OP_POSUPTO:
-    fprintf(f, " %s ", flag);
-    extra = print_char(f, code + 1 + IMM2_SIZE, utf);
-    fprintf(f, "{");
-    if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
-    fprintf(f, "%d}", GET2(code,1));
-    if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
-      else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
-    break;
-
-    case OP_TYPEEXACT:
-    case OP_TYPEUPTO:
-    case OP_TYPEMINUPTO:
-    case OP_TYPEPOSUPTO:
-    fprintf(f, "    %s", OP_names[code[1 + IMM2_SIZE]]);
-    if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
-      {
-      fprintf(f, " %s ", get_ucpname(code[1 + IMM2_SIZE + 1],
-        code[1 + IMM2_SIZE + 2]));
-      extra = 2;
-      }
-    fprintf(f, "{");
-    if (*code != OP_TYPEEXACT) fprintf(f, "0,");
-    fprintf(f, "%d}", GET2(code,1));
-    if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
-      else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
-    break;
-
-    case OP_NOTI:
-    flag = "/i";
-    /* Fall through */
-    case OP_NOT:
-    c = code[1];
-    if (PRINTABLE(c)) fprintf(f, " %s [^%c]", flag, c);
-      else fprintf(f, " %s [^\\x%02x]", flag, c);
-    break;
-
-    case OP_NOTSTARI:
-    case OP_NOTMINSTARI:
-    case OP_NOTPOSSTARI:
-    case OP_NOTPLUSI:
-    case OP_NOTMINPLUSI:
-    case OP_NOTPOSPLUSI:
-    case OP_NOTQUERYI:
-    case OP_NOTMINQUERYI:
-    case OP_NOTPOSQUERYI:
-    flag = "/i";
-    /* Fall through */
-
-    case OP_NOTSTAR:
-    case OP_NOTMINSTAR:
-    case OP_NOTPOSSTAR:
-    case OP_NOTPLUS:
-    case OP_NOTMINPLUS:
-    case OP_NOTPOSPLUS:
-    case OP_NOTQUERY:
-    case OP_NOTMINQUERY:
-    case OP_NOTPOSQUERY:
-    c = code[1];
-    if (PRINTABLE(c)) fprintf(f, " %s [^%c]", flag, c);
-      else fprintf(f, " %s [^\\x%02x]", flag, c);
-    fprintf(f, "%s", OP_names[*code]);
-    break;
-
-    case OP_NOTEXACTI:
-    case OP_NOTUPTOI:
-    case OP_NOTMINUPTOI:
-    case OP_NOTPOSUPTOI:
-    flag = "/i";
-    /* Fall through */
-
-    case OP_NOTEXACT:
-    case OP_NOTUPTO:
-    case OP_NOTMINUPTO:
-    case OP_NOTPOSUPTO:
-    c = code[1 + IMM2_SIZE];
-    if (PRINTABLE(c)) fprintf(f, " %s [^%c]{", flag, c);
-      else fprintf(f, " %s [^\\x%02x]{", flag, c);
-    if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
-    fprintf(f, "%d}", GET2(code,1));
-    if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
-      else
-    if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
-    break;
-
-    case OP_RECURSE:
-    if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
-      else fprintf(f, "    ");
-    fprintf(f, "%s", OP_names[*code]);
-    break;
-
-    case OP_REFI:
-    flag = "/i";
-    /* Fall through */
-    case OP_REF:
-    fprintf(f, " %s \\%d", flag, GET2(code,1));
-    ccode = code + PRIV(OP_lengths)[*code];
-    goto CLASS_REF_REPEAT;
-
-    case OP_CALLOUT:
-    fprintf(f, "    %s %d %d %d", OP_names[*code], code[1], GET(code,2),
-      GET(code, 2 + LINK_SIZE));
-    break;
-
-    case OP_PROP:
-    case OP_NOTPROP:
-    fprintf(f, "    %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
-    break;
-
-    /* OP_XCLASS can only occur in UTF or PCRE16 modes. However, there's no
-    harm in having this code always here, and it makes it less messy without
-    all those #ifdefs. */
-
-    case OP_CLASS:
-    case OP_NCLASS:
-    case OP_XCLASS:
-      {
-      int i, min, max;
-      BOOL printmap;
-      pcre_uint8 *map;
-
-      fprintf(f, "    [");
-
-      if (*code == OP_XCLASS)
-        {
-        extra = GET(code, 1);
-        ccode = code + LINK_SIZE + 1;
-        printmap = (*ccode & XCL_MAP) != 0;
-        if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
-        }
-      else
-        {
-        printmap = TRUE;
-        ccode = code + 1;
-        }
-
-      /* Print a bit map */
-
-      if (printmap)
-        {
-        map = (pcre_uint8 *)ccode;
-        for (i = 0; i < 256; i++)
-          {
-          if ((map[i/8] & (1 << (i&7))) != 0)
-            {
-            int j;
-            for (j = i+1; j < 256; j++)
-              if ((map[j/8] & (1 << (j&7))) == 0) break;
-            if (i == '-' || i == ']') fprintf(f, "\\");
-            if (PRINTABLE(i)) fprintf(f, "%c", i);
-              else fprintf(f, "\\x%02x", i);
-            if (--j > i)
-              {
-              if (j != i + 1) fprintf(f, "-");
-              if (j == '-' || j == ']') fprintf(f, "\\");
-              if (PRINTABLE(j)) fprintf(f, "%c", j);
-                else fprintf(f, "\\x%02x", j);
-              }
-            i = j;
-            }
-          }
-        ccode += 32 / sizeof(pcre_uchar);
-        }
-
-      /* For an XCLASS there is always some additional data */
-
-      if (*code == OP_XCLASS)
-        {
-        int ch;
-        while ((ch = *ccode++) != XCL_END)
-          {
-          if (ch == XCL_PROP)
-            {
-            int ptype = *ccode++;
-            int pvalue = *ccode++;
-            fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
-            }
-          else if (ch == XCL_NOTPROP)
-            {
-            int ptype = *ccode++;
-            int pvalue = *ccode++;
-            fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
-            }
-          else
-            {
-            ccode += 1 + print_char(f, ccode, TRUE);
-            if (ch == XCL_RANGE)
-              {
-              fprintf(f, "-");
-              ccode += 1 + print_char(f, ccode, TRUE);
-              }
-            }
-          }
-        }
-
-      /* Indicate a non-UTF class which was created by negation */
-
-      fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
-
-      /* Handle repeats after a class or a back reference */
-
-      CLASS_REF_REPEAT:
-      switch(*ccode)
-        {
-        case OP_CRSTAR:
-        case OP_CRMINSTAR:
-        case OP_CRPLUS:
-        case OP_CRMINPLUS:
-        case OP_CRQUERY:
-        case OP_CRMINQUERY:
-        fprintf(f, "%s", OP_names[*ccode]);
-        extra += PRIV(OP_lengths)[*ccode];
-        break;
-
-        case OP_CRRANGE:
-        case OP_CRMINRANGE:
-        min = GET2(ccode,1);
-        max = GET2(ccode,1 + IMM2_SIZE);
-        if (max == 0) fprintf(f, "{%d,}", min);
-        else fprintf(f, "{%d,%d}", min, max);
-        if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
-        extra += PRIV(OP_lengths)[*ccode];
-        break;
-
-        /* Do nothing if it's not a repeat; this code stops picky compilers
-        warning about the lack of a default code path. */
-
-        default:
-        break;
-        }
-      }
-    break;
-
-    case OP_MARK:
-    case OP_PRUNE_ARG:
-    case OP_SKIP_ARG:
-    case OP_THEN_ARG:
-    fprintf(f, "    %s ", OP_names[*code]);
-    print_puchar(f, code + 2);
-    extra += code[1];
-    break;
-
-    case OP_THEN:
-    fprintf(f, "    %s", OP_names[*code]);
-    break;
-
-    case OP_CIRCM:
-    case OP_DOLLM:
-    flag = "/m";
-    /* Fall through */
-
-    /* Anything else is just an item with no data, but possibly a flag. */
-
-    default:
-    fprintf(f, " %s %s", flag, OP_names[*code]);
-    break;
-    }
-
-  code += PRIV(OP_lengths)[*code] + extra;
-  fprintf(f, "\n");
-  }
-}
-
-/* End of pcre_printint.src */

Modified: code/branches/pcre16/pcretest.c
===================================================================
--- code/branches/pcre16/pcretest.c    2011-12-14 11:18:01 UTC (rev 804)
+++ code/branches/pcre16/pcretest.c    2011-12-14 16:49:20 UTC (rev 805)
@@ -107,6 +107,18 @@
 #include "pcre.h"
 #include "pcre_internal.h"

+/* The pcre_printint() function, which prints the internal form of a compiled
+regex, is held in a separate file so that (a) it can be compiled in either
+8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
+when that is compiled in debug mode. */
+
+#ifdef SUPPORT_PCRE8
+void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
+#endif
+#ifdef SUPPORT_PCRE16
+void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
+#endif
+
/* We need access to some of the data tables that PCRE uses. So as not to have
to keep two copies, we include the source file here, changing the names of the
external symbols to prevent clashes. */
@@ -125,20 +137,18 @@

#include "pcre_tables.c"

-/* We also need the pcre_printint() function for printing out compiled
-patterns. This function is in a separate file so that it can be included in
-pcre_compile.c when that module is compiled with debugging enabled. It needs to
-know which case is being compiled. */
-
-#define COMPILING_PCRETEST
-#include "pcre_printint.src"
-
/* The definition of the macro PRINTABLE, which determines whether to print an
output character as-is or as a hex value when showing compiled patterns, is
-contained in the printint.src file. We uses it here also, in cases when the
-locale has not been explicitly changed, so as to get consistent output from
-systems that differ in their output from isprint() even in the "C" locale. */
+the same as in the printint.src file. We uses it here in cases when the locale
+has not been explicitly changed, so as to get consistent output from systems
+that differ in their output from isprint() even in the "C" locale. */

+#ifdef EBCDIC
+#define PRINTABLE(c) ((c) >= 64 && (c) < 255)
+#else
+#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
+#endif
+
#define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))

/* It is possible to compile this test program without including support for
@@ -193,6 +203,8 @@
static size_t first_gotten_store = 0;
static const unsigned char *last_callout_mark = NULL;

+static int (*fullinfo)(const pcre *, const pcre_extra *, int, void *);
+
/* The buffers grow automatically if very long input lines are encountered. */

static int buffer_size = 50000;
@@ -200,6 +212,11 @@
static pcre_uint8 *dbuffer = NULL;
static pcre_uint8 *pbuffer = NULL;

+#ifdef SUPPORT_PCRE16
+static int buffer16_size = 0;
+static pcre_uint16 *buffer16 = NULL;
+#endif
+
/* Textual explanations for runtime error codes */

static const char *errtexts[] = {
@@ -230,7 +247,8 @@
"bad offset value",
NULL, /* SHORTUTF8 is handled specially */
"nested recursion at the same subject position",
- "JIT stack limit reached"
+ "JIT stack limit reached",
+ "pattern compiled in wrong mode (8-bit/16-bit error)"
};

@@ -592,7 +610,51 @@
}

+#ifdef SUPPORT_PCRE16
 /*************************************************
+*         Convert a string to 16-bit             *
+*************************************************/
+
+/* The result is always left in buffer16. */
+
+static int
+to16(unsigned char *p, int utf)
+{
+pcre_uint16 *pp;
+int len = (int)strlen((char *)p) + 1;
+
+if (buffer16_size < 2*len)
+  {
+  if (buffer16 != NULL) free(buffer16);
+  buffer16_size = 2*len;
+  buffer16 = (pcre_uint16 *)malloc(buffer16_size);
+  if (buffer16 == NULL) 
+    {
+    fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
+    exit(1);
+    }
+  }
+  
+pp = buffer16;
+
+if (!utf)
+  {
+  while (*p != 0) *pp++ = *p++;    
+  *pp++ = 0; 
+  }
+  
+else
+  {
+fprintf(stderr, "pcretest: no support yet for UTF-16\n");
+exit(1);  
+  }   
+  
+return pp - buffer16;
+} 
+#endif
+
+
+/*************************************************
 *        Read or extend an input line            *
 *************************************************/

@@ -1046,7 +1108,7 @@
static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
{
int rc;
-if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
+if ((rc = (fullinfo)(re, study, option, ptr)) < 0)
fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
}

@@ -1191,6 +1253,9 @@
 printf("This version of pcretest is not linked with readline().\n");
 #endif
 printf("\nOptions:\n");
+#ifdef SUPPORT_PCRE16
+printf("  -16      use 16-bit interface\n");
+#endif
 printf("  -b       show compiled code (bytecode)\n");
 printf("  -C       show PCRE compile-time options and exit\n");
 printf("  -d       debug: show compiled code and information (-b and -i)\n");
@@ -1248,12 +1313,12 @@
 int debug = 0;
 int done = 0;
 int all_use_dfa = 0;
+int use_pcre16 = 0;
 int yield = 0;
 int stack_size;

pcre_jit_stack *jit_stack = NULL;

-
/* These vectors store, end-to-end, a list of captured substring names. Assume
that 1024 is plenty long enough for the few names we'll be testing. */

@@ -1263,8 +1328,9 @@
pcre_uchar *copynamesptr;
pcre_uchar *getnamesptr;

-/* Get buffers from malloc() so that Electric Fence will check their misuse
-when I am debugging. They grow automatically when very long lines are read. */
+/* Get buffers from malloc() so that valgrind will check their misuse when
+debugging. They grow automatically when very long lines are read. The 16-bit
+buffer (buffer16) is obtained only if needed. */

buffer = (pcre_uint8 *)malloc(buffer_size);
dbuffer = (pcre_uint8 *)malloc(buffer_size);
@@ -1289,7 +1355,8 @@
{
unsigned char *endptr;

-  if (strcmp(argv[op], "-m") == 0) showstore = 1;
+  if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
+  else if (strcmp(argv[op], "-m") == 0) showstore = 1;
   else if (strcmp(argv[op], "-s") == 0) force_study = 0;
   else if (strcmp(argv[op], "-s+") == 0)
     {
@@ -1356,8 +1423,25 @@
     unsigned long int lrc;
     printf("PCRE version %s\n", pcre_version());
     printf("Compiled with\n");
+    
+/* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. */
+
+#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
+    printf("  8-bit and 16-bit support\n");
     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
     printf("  %sUTF-8 support\n", rc? "" : "No ");
+    (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
+    printf("  %sUTF-16 support\n", rc? "" : "No ");
+#elif defined SUPPORT_PCRE8
+    printf("  8-bit support only\n");
+    (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
+    printf("  %sUTF-8 support\n", rc? "" : "No ");
+#else    
+    printf("  16-bit support only\n");
+    (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
+    printf("  %sUTF-16 support\n", rc? "" : "No ");
+#endif     
+ 
     (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
     printf("  %sUnicode properties support\n", rc? "" : "No ");
     (void)pcre_config(PCRE_CONFIG_JIT, &rc);
@@ -1404,6 +1488,10 @@
   argc--;
   }

+/* Select which fullinfo function to use. */
+
+fullinfo = use_pcre16? pcre16_fullinfo : pcre_fullinfo;
+
/* Get the store for the offsets vector, and remember what it was */

size_offsets_max = size_offsets;
@@ -1442,11 +1530,20 @@

/* Set alternative malloc function */

+#ifdef SUPPORT_PCRE8
pcre_malloc = new_malloc;
pcre_free = new_free;
pcre_stack_malloc = stack_malloc;
pcre_stack_free = stack_free;
+#endif

+#ifdef SUPPORT_PCRE16
+pcre16_malloc = new_malloc;
+pcre16_free = new_free;
+pcre16_stack_malloc = stack_malloc;
+pcre16_stack_free = stack_free;
+#endif
+
/* Heading line unless quiet, then prompt for first regex if stdin */

if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
@@ -1764,7 +1861,7 @@

/* Handle compiling via the POSIX interface, which doesn't support the
timing, showing, or debugging options, nor the ability to pass over
- local character tables. */
+ local character tables. Neither does it have 16-bit support. */

#if !defined NOPOSIX
if (posix || do_posix)
@@ -1801,7 +1898,19 @@

     {
     unsigned long int get_options;
+    
+    /* In 16-bit mode, convert the input. The space needed for a non-UTF string 
+    is exactly double the 8-bit size. For a UTF-8 string, the size needed for 
+    UTF-16 is no more than double, because up to 0xffff uses no more than 3
+    bytes in UTF-8 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8
+    and up to 4 bytes in UTF-16. */
+    
+#ifdef SUPPORT_PCRE16
+    if (use_pcre16) (void)to16(p, options & PCRE_UTF8);
+#endif

+    /* Compile many times when timing */
+
     if (timeit > 0)
       {
       register int i;
@@ -1809,7 +1918,12 @@
       clock_t start_time = clock();
       for (i = 0; i < timeit; i++)
         {
-        re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
+#ifdef SUPPORT_PCRE16
+        if (use_pcre16)         
+          re = pcre16_compile((PCRE_SPTR16)buffer16, options, &error, &erroroffset, tables);
+        else 
+#endif         
+          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
         if (re != NULL) free(re);
         }
       time_taken = clock() - start_time;
@@ -1819,7 +1933,13 @@
       }

     first_gotten_store = 0;
-    re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
+    
+#ifdef SUPPORT_PCRE16
+    if (use_pcre16) 
+      re = pcre16_compile((PCRE_SPTR16)buffer16, options, &error, &erroroffset, tables);
+    else
+#endif        
+      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);

     /* Compilation failed; go back for another re, skipping to blank line
     if non-interactive. */
@@ -1880,14 +2000,22 @@
         clock_t time_taken;
         clock_t start_time = clock();
         for (i = 0; i < timeit; i++)
-          extra = pcre_study(re, study_options | force_study_options, &error);
+          {
+          if (use_pcre16)  
+            extra = pcre16_study(re, study_options | force_study_options, &error);
+          else
+            extra = pcre_study(re, study_options | force_study_options, &error);
+          } 
         time_taken = clock() - start_time;
         if (extra != NULL) pcre_free_study(extra);
         fprintf(outfile, "  Study time %.4f milliseconds\n",
           (((double)time_taken * 1000.0) / (double)timeit) /
             (double)CLOCKS_PER_SEC);
         }
-      extra = pcre_study(re, study_options | force_study_options, &error);
+      if (use_pcre16)   
+        extra = pcre16_study(re, study_options | force_study_options, &error);
+      else   
+        extra = pcre_study(re, study_options | force_study_options, &error);
       if (error != NULL)
         fprintf(outfile, "Failed to study: %s\n", error);
       else if (extra != NULL)
@@ -1953,16 +2081,17 @@
         }
       }

-    /* Extract information from the compiled data if required. There are now
-    two info-returning functions. The old one has a limited interface and
-    returns only limited data. Check that it agrees with the newer one. */
+    /* Extract and display information from the compiled data if required. */

     SHOW_INFO:

     if (do_debug)
       {
       fprintf(outfile, "------------------------------------------------------------------\n");
-      pcre_printint(re, outfile, debug_lengths);
+      if (use_pcre16)
+        pcre16_printint(re, outfile, debug_lengths);
+      else   
+        pcre_printint(re, outfile, debug_lengths);
       }

     /* We already have the options in get_options (see above) */
@@ -1990,24 +2119,30 @@
       new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
       new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);

+      /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
+      that it gives the same results as the new function. */
+        
 #if !defined NOINFOCHECK
-      old_count = pcre_info(re, &old_options, &old_first_char);
-      if (count < 0) fprintf(outfile,
-        "Error %d from pcre_info()\n", count);
-      else
-        {
-        if (old_count != count) fprintf(outfile,
-          "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
-            old_count);
-
-        if (old_first_char != first_char) fprintf(outfile,
-          "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
-            first_char, old_first_char);
-
-        if (old_options != (int)get_options) fprintf(outfile,
-          "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
-            get_options, old_options);
-        }
+      if (!use_pcre16)
+        { 
+        old_count = pcre_info(re, &old_options, &old_first_char);
+        if (count < 0) fprintf(outfile,
+          "Error %d from pcre_info()\n", count);
+        else
+          {
+          if (old_count != count) fprintf(outfile,
+            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
+              old_count);
+        
+          if (old_first_char != first_char) fprintf(outfile,
+            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
+              first_char, old_first_char);
+        
+          if (old_options != (int)get_options) fprintf(outfile,
+            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
+              get_options, old_options);
+          }
+        }   
 #endif

       if (size != regex_gotten_store) fprintf(outfile,
@@ -2712,6 +2847,11 @@
         register int i;
         clock_t time_taken;
         clock_t start_time = clock();
+        
+#ifdef SUPPORT_PCRE16
+        if (use_pcre16) len = to16(bptr, options & PCRE_UTF8);
+#endif
+

 #if !defined NODFA
         if (all_use_dfa || use_dfa)
@@ -2798,8 +2938,12 @@

       else
         {
-        count = pcre_exec(re, extra, (char *)bptr, len,
-          start_offset, options | g_notempty, use_offsets, use_size_offsets);
+        if (use_pcre16) 
+          count = pcre16_exec(re, extra, (PCRE_SPTR16)buffer16, len,
+            start_offset, options | g_notempty, use_offsets, use_size_offsets);
+        else     
+          count = pcre_exec(re, extra, (char *)bptr, len,
+            start_offset, options | g_notempty, use_offsets, use_size_offsets);
         if (count == 0)
           {
           fprintf(outfile, "Matched, but too many substrings\n");
@@ -3124,6 +3268,10 @@
 free(pbuffer);
 free(offsets);

+#ifdef SUPPORT_PCRE16
+if (buffer16 != NULL) free(buffer16);
+#endif
+
return yield;
}

This message is part of the following thread:
	the complete thread tree sorted by date

[Pcre-svn] [805] code/branches/pcre16: Preliminary work on p…