[Pcre-svn] [475] code/trunk: Tidies to allow easier embedded…

トップ ページ
このメッセージを削除
著者: Subversion repository
日付:  
To: pcre-svn
題目: [Pcre-svn] [475] code/trunk: Tidies to allow easier embedded compilation; avoid ( double) where possible.
Revision: 475
          http://vcs.pcre.org/viewvc?view=rev&revision=475
Author:   ph10
Date:     2010-01-02 18:21:30 +0000 (Sat, 02 Jan 2010)


Log Message:
-----------
Tidies to allow easier embedded compilation; avoid (double) where possible.

Modified Paths:
--------------
    code/trunk/CMakeLists.txt
    code/trunk/ChangeLog
    code/trunk/configure.ac
    code/trunk/pcre_compile.c
    code/trunk/pcre_dfa_exec.c
    code/trunk/pcre_exec.c
    code/trunk/pcre_internal.h
    code/trunk/pcre_printint.src
    code/trunk/pcre_study.c


Modified: code/trunk/CMakeLists.txt
===================================================================
--- code/trunk/CMakeLists.txt    2010-01-02 16:30:46 UTC (rev 474)
+++ code/trunk/CMakeLists.txt    2010-01-02 18:21:30 UTC (rev 475)
@@ -35,6 +35,7 @@
 #            to disable the final configuration report.
 # 2009-04-11 PH applied Christian Ehrlicher's patch to show compiler flags that
 #            are set by specifying a release type.
+# 2010-01-02 PH added test for stdint.h


PROJECT(PCRE C CXX)

@@ -55,6 +56,7 @@
INCLUDE(CheckTypeSize)

 CHECK_INCLUDE_FILE(dirent.h     HAVE_DIRENT_H)
+CHECK_INCLUDE_FILE(stdint.h     HAVE_STDINT_H)
 CHECK_INCLUDE_FILE(sys/stat.h   HAVE_SYS_STAT_H)
 CHECK_INCLUDE_FILE(sys/types.h  HAVE_SYS_TYPES_H)
 CHECK_INCLUDE_FILE(unistd.h     HAVE_UNISTD_H)


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2010-01-02 16:30:46 UTC (rev 474)
+++ code/trunk/ChangeLog    2010-01-02 18:21:30 UTC (rev 475)
@@ -30,6 +30,30 @@
 5.  The C++ GlobalReplace function was not working like Perl for the special
     situation when an empty string is matched. It now does the fancy magic
     stuff that is necessary. 
+    
+6.  In pcre_internal.h, obsolete includes to setjmp.h and stdarg.h have been 
+    removed. (These were left over from very, very early versions of PCRE.)
+    
+7.  Some cosmetic changes to the code to make life easier when compiling it
+    as part of something else:
+    
+    (a) Change DEBUG to PCRE_DEBUG. 
+    
+    (b) In pcre_compile(), rename the member of the "branch_chain" structure 
+        called "current" as "current_branch", to prevent a collision with the 
+        Linux macro when compiled as a kernel module.
+        
+    (c) In pcre_study(), rename the function set_bit() as set_table_bit(), to 
+        prevent a collision with the Linux macro when compiled as a kernel 
+        module.
+        
+8.  In pcre_compile() there are some checks for integer overflows that used to
+    cast potentially large values to (double). This has been changed to that
+    when building, a check for int64_t is made, and if it is found, it is used 
+    instead, thus avoiding the use of floating point arithmetic. (There is no 
+    other use of FP in PCRE.) If int64_t is not found, the fallback is to 
+    double. 
+       



Version 8.00 19-Oct-09

Modified: code/trunk/configure.ac
===================================================================
--- code/trunk/configure.ac    2010-01-02 16:30:46 UTC (rev 474)
+++ code/trunk/configure.ac    2010-01-02 18:21:30 UTC (rev 475)
@@ -9,7 +9,7 @@
 m4_define(pcre_major, [8])
 m4_define(pcre_minor, [01])
 m4_define(pcre_prerelease, [])
-m4_define(pcre_date, [2009-12-11])
+m4_define(pcre_date, [2010-01-02])


# Libtool shared library interface versions (current:revision:age)
m4_define(libpcre_version, [0:1:0])
@@ -66,6 +66,9 @@
AC_COMPILE_IFELSE(AC_LANG_PROGRAM([],[]),, CXX=""; CXXCP=""; CXXFLAGS="")
AC_LANG_POP

+# Check for a 64-bit integer type
+AC_TYPE_INT64_T
+
AC_PROG_INSTALL
AC_LIBTOOL_WIN32_DLL
AC_PROG_LIBTOOL

Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c    2010-01-02 16:30:46 UTC (rev 474)
+++ code/trunk/pcre_compile.c    2010-01-02 18:21:30 UTC (rev 475)
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.


                        Written by Philip Hazel
-           Copyright (c) 1997-2009 University of Cambridge
+           Copyright (c) 1997-2010 University of Cambridge


-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -53,10 +53,11 @@
#include "pcre_internal.h"


-/* When DEBUG is defined, we need the pcre_printint() function, which is also
-used by pcretest. DEBUG is not defined when building a production library. */
+/* When PCRE_DEBUG is defined, we need the pcre_printint() function, which is
+also used by pcretest. PCRE_DEBUG is not defined when building a production
+library. */

-#ifdef DEBUG
+#ifdef PCRE_DEBUG
#include "pcre_printint.src"
#endif

@@ -1994,9 +1995,10 @@
 could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,
   BOOL utf8)
 {
-while (bcptr != NULL && bcptr->current >= code)
+while (bcptr != NULL && bcptr->current_branch >= code)
   {
-  if (!could_be_empty_branch(bcptr->current, endcode, utf8)) return FALSE;
+  if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8)) 
+    return FALSE;
   bcptr = bcptr->outer;
   }
 return TRUE;
@@ -2658,7 +2660,7 @@
 uschar *utf8_char = NULL;
 #endif


-#ifdef DEBUG
+#ifdef PCRE_DEBUG
if (lengthptr != NULL) DPRINTF((">> start branch\n"));
#endif

@@ -2717,7 +2719,7 @@

   if (lengthptr != NULL)
     {
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
     if (code > cd->hwm) cd->hwm = code;                 /* High water info */
 #endif
     if (code > cd->start_workspace + COMPILE_WORK_SIZE) /* Check for overrun */
@@ -4213,13 +4215,15 @@
           {
           /* In the pre-compile phase, we don't actually do the replication. We
           just adjust the length as if we had. Do some paranoid checks for
-          potential integer overflow. */
+          potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit
+          integer type when available, otherwise double. */


           if (lengthptr != NULL)
             {
             int delta = (repeat_min - 1)*length_prevgroup;
-            if ((double)(repeat_min - 1)*(double)length_prevgroup >
-                                                            (double)INT_MAX ||
+            if ((INT64_OR_DOUBLE)(repeat_min - 1)*
+                  (INT64_OR_DOUBLE)length_prevgroup > 
+                    (INT64_OR_DOUBLE)INT_MAX ||
                 OFLOW_MAX - *lengthptr < delta)
               {
               *errorcodeptr = ERR20;
@@ -4265,15 +4269,16 @@
         just adjust the length as if we had. For each repetition we must add 1
         to the length for BRAZERO and for all but the last repetition we must
         add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some
-        paranoid checks to avoid integer overflow. */
+        paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is 
+        a 64-bit integer type when available, otherwise double. */


         if (lengthptr != NULL && repeat_max > 0)
           {
           int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
                       2 - 2*LINK_SIZE;   /* Last one doesn't nest */
-          if ((double)repeat_max *
-                (double)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
-                  > (double)INT_MAX ||
+          if ((INT64_OR_DOUBLE)repeat_max *
+                (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
+                  > (INT64_OR_DOUBLE)INT_MAX ||
               OFLOW_MAX - *lengthptr < delta)
             {
             *errorcodeptr = ERR20;
@@ -5787,7 +5792,7 @@
 branch_chain bc;


bc.outer = bcptr;
-bc.current = code;
+bc.current_branch = code;

firstbyte = reqbyte = REQ_UNSET;

@@ -6028,7 +6033,7 @@
     {
     *code = OP_ALT;
     PUT(code, 1, code - last_branch);
-    bc.current = last_branch = code;
+    bc.current_branch = last_branch = code;
     code += 1 + LINK_SIZE;
     }


@@ -6641,7 +6646,7 @@

*code++ = OP_END;

-#ifndef DEBUG
+#ifndef PCRE_DEBUG
if (code - codestart > length) errorcode = ERR23;
#endif

@@ -6765,7 +6770,7 @@
/* Print out the compiled data if debugging is enabled. This is never the
case when building a production library. */

-#ifdef DEBUG
+#ifdef PCRE_DEBUG

printf("Length = %d top_bracket = %d top_backref = %d\n",
length, re->top_bracket, re->top_backref);
@@ -6803,7 +6808,7 @@
if (errorcodeptr != NULL) *errorcodeptr = ERR23;
return NULL;
}
-#endif /* DEBUG */
+#endif /* PCRE_DEBUG */

return (pcre *)re;
}

Modified: code/trunk/pcre_dfa_exec.c
===================================================================
--- code/trunk/pcre_dfa_exec.c    2010-01-02 16:30:46 UTC (rev 474)
+++ code/trunk/pcre_dfa_exec.c    2010-01-02 18:21:30 UTC (rev 475)
@@ -255,7 +255,7 @@
 #define INTS_PER_STATEBLOCK  (sizeof(stateblock)/sizeof(int))



-#ifdef DEBUG
+#ifdef PCRE_DEBUG
 /*************************************************
 *             Print character string             *
 *************************************************/
@@ -559,7 +559,7 @@
   workspace[0] ^= 1;              /* Remember for the restarting feature */
   workspace[1] = active_count;


-#ifdef DEBUG
+#ifdef PCRE_DEBUG
   printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
   pchars((uschar *)ptr, strlen((char *)ptr), stdout);
   printf("\"\n");
@@ -605,7 +605,7 @@
     int state_offset = current_state->offset;
     int count, codevalue, rrc;


-#ifdef DEBUG
+#ifdef PCRE_DEBUG
     printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
     if (clen == 0) printf("EOL\n");
       else if (c > 32 && c < 127) printf("'%c'\n", c);


Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c    2010-01-02 16:30:46 UTC (rev 474)
+++ code/trunk/pcre_exec.c    2010-01-02 18:21:30 UTC (rev 475)
@@ -89,7 +89,7 @@




-#ifdef DEBUG
+#ifdef PCRE_DEBUG
 /*************************************************
 *        Debugging function to print chars       *
 *************************************************/
@@ -141,7 +141,7 @@
 {
 USPTR p = md->start_subject + md->offset_vector[offset];


-#ifdef DEBUG
+#ifdef PCRE_DEBUG
if (eptr >= md->end_subject)
printf("matching subject <null>");
else
@@ -254,7 +254,7 @@
#ifndef NO_RECURSE
#define REGISTER register

-#ifdef DEBUG
+#ifdef PCRE_DEBUG
 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
   { \
   printf("match() called in line %d\n", __LINE__); \
@@ -622,7 +622,7 @@
 /* OK, now we can get on with the real code of the function. Recursive calls
 are specified by the macro RMATCH and RRETURN is used to return. When
 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
-and a "return", respectively (possibly with some debugging if DEBUG is
+and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
 defined). However, RMATCH isn't like a function call because it's quite a
 complicated macro. It has to be used in one particular way. This shouldn't,
 however, impact performance when true recursion is being used. */
@@ -713,7 +713,7 @@
     number = GET2(ecode, 1+LINK_SIZE);
     offset = number << 1;


-#ifdef DEBUG
+#ifdef PCRE_DEBUG
     printf("start bracket %d\n", number);
     printf("subject=");
     pchars(eptr, 16, TRUE, md);
@@ -1039,7 +1039,7 @@
     number = GET2(ecode, 1);
     offset = number << 1;


-#ifdef DEBUG
+#ifdef PCRE_DEBUG
       printf("end bracket %d at *ACCEPT", number);
       printf("\n");
 #endif
@@ -1468,7 +1468,7 @@
       number = GET2(prev, 1+LINK_SIZE);
       offset = number << 1;


-#ifdef DEBUG
+#ifdef PCRE_DEBUG
       printf("end bracket %d", number);
       printf("\n");
 #endif
@@ -5635,7 +5635,7 @@
       }
     }


-#ifdef DEBUG /* Sigh. Some compilers never learn. */
+#ifdef PCRE_DEBUG /* Sigh. Some compilers never learn. */
printf(">>>> Match against: ");
pchars(start_match, end_subject - start_match, TRUE, md);
printf("\n");

Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h    2010-01-02 16:30:46 UTC (rev 474)
+++ code/trunk/pcre_internal.h    2010-01-02 18:21:30 UTC (rev 475)
@@ -7,7 +7,7 @@
 and semantics are as close as possible to those of the Perl 5 language.


                        Written by Philip Hazel
-           Copyright (c) 1997-2009 University of Cambridge
+           Copyright (c) 1997-2010 University of Cambridge


-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -45,10 +45,10 @@
#ifndef PCRE_INTERNAL_H
#define PCRE_INTERNAL_H

-/* Define DEBUG to get debugging output on stdout. */
+/* Define PCRE_DEBUG to get debugging output on stdout. */

#if 0
-#define DEBUG
+#define PCRE_DEBUG
#endif

/* We do not support both EBCDIC and UTF-8 at the same time. The "configure"
@@ -74,7 +74,7 @@
be absolutely sure we get our version. */

#undef DPRINTF
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
#define DPRINTF(p) printf p
#else
#define DPRINTF(p) /* Nothing */
@@ -86,8 +86,6 @@

#include <ctype.h>
#include <limits.h>
-#include <setjmp.h>
-#include <stdarg.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
@@ -186,6 +184,23 @@
#error Cannot determine a type for 32-bit unsigned integers
#endif

+/* When checking for integer overflow in pcre_compile(), we need to handle
+large integers. If a 64-bit integer type is available, we can use that.
+Otherwise we have to cast to double, which of course requires floating point
+arithmetic. Handle this by defining a macro for the appropriate type. If
+stdint.h is available, include it; it may define INT64_MAX. The macro int64_t
+may be set by "configure". */
+
+#if HAVE_STDINT_H
+#include <stdint.h>
+#endif
+
+#if defined INT64_MAX || defined int64_t
+#define INT64_OR_DOUBLE int64_t
+#else
+#define INT64_OR_DOUBLE double
+#endif
+
/* All character handling must be done as unsigned characters. Otherwise there
are problems with top-bit-set characters and functions such as isspace().
However, we leave the interface to the outside world as char *, because that
@@ -1579,7 +1594,7 @@

typedef struct branch_chain {
struct branch_chain *outer;
- uschar *current;
+ uschar *current_branch;
} branch_chain;

/* Structure for items in a linked list that represents an explicit recursive

Modified: code/trunk/pcre_printint.src
===================================================================
--- code/trunk/pcre_printint.src    2010-01-02 16:30:46 UTC (rev 474)
+++ code/trunk/pcre_printint.src    2010-01-02 18:21:30 UTC (rev 475)
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.


                        Written by Philip Hazel
-           Copyright (c) 1997-2009 University of Cambridge
+           Copyright (c) 1997-2010 University of Cambridge


-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -43,7 +43,8 @@
local functions. This source file is used in two places:

(1) It is #included by pcre_compile.c when it is compiled in debugging mode
-(DEBUG defined in pcre_internal.h). It is not included in production compiles.
+(PCRE_DEBUG defined in pcre_internal.h). It is not included in production
+compiles.

(2) It is always #included by pcretest.c, which can be asked to print out a
compiled regex for debugging purposes. */

Modified: code/trunk/pcre_study.c
===================================================================
--- code/trunk/pcre_study.c    2010-01-02 16:30:46 UTC (rev 474)
+++ code/trunk/pcre_study.c    2010-01-02 18:21:30 UTC (rev 475)
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.


                        Written by Philip Hazel
-           Copyright (c) 1997-2009 University of Cambridge
+           Copyright (c) 1997-2010 University of Cambridge


-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -444,7 +444,8 @@
*/

 static void
-set_bit(uschar *start_bits, unsigned int c, BOOL caseless, compile_data *cd)
+set_table_bit(uschar *start_bits, unsigned int c, BOOL caseless, 
+  compile_data *cd)
 {
 start_bits[c/8] |= (1 << (c&7));
 if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
@@ -606,7 +607,7 @@
       case OP_QUERY:
       case OP_MINQUERY:
       case OP_POSQUERY:
-      set_bit(start_bits, tcode[1], caseless, cd);
+      set_table_bit(start_bits, tcode[1], caseless, cd);
       tcode += 2;
 #ifdef SUPPORT_UTF8
       if (utf8 && tcode[-1] >= 0xc0)
@@ -619,7 +620,7 @@
       case OP_UPTO:
       case OP_MINUPTO:
       case OP_POSUPTO:
-      set_bit(start_bits, tcode[3], caseless, cd);
+      set_table_bit(start_bits, tcode[3], caseless, cd);
       tcode += 4;
 #ifdef SUPPORT_UTF8
       if (utf8 && tcode[-1] >= 0xc0)
@@ -637,7 +638,7 @@
       case OP_PLUS:
       case OP_MINPLUS:
       case OP_POSPLUS:
-      set_bit(start_bits, tcode[1], caseless, cd);
+      set_table_bit(start_bits, tcode[1], caseless, cd);
       try_next = FALSE;
       break;