Re: [pcre-dev] Yet Another RC

Top Page
Delete this message
Author: Craig Silverstein
Date:  
To: silvermoonwoman
CC: pcre-dev
Subject: Re: [pcre-dev] Yet Another RC
} When building RC3 using CMake as configurer for Visual Studio
} Express, I get these warnings and errors.

I have a patch that fixes most of the warnings. I've fixed the
easy-to-fix warnings in pcretest.c; even though they're not strictly
necessary, it seems nice to clean up the make output when possible.

The warnings for pcrecpp pointed out a flaw in the old code: it didn't
deal with arrays of type unsigned char (as opposed to just char). At
google we use -funsigned-char to make chars unsigned by default, which
is why this hadn't come up before.

I added explicit support for unsigned char in pcrecpp, and modified
the unittest to make use of it for some of the utf8 tests. I'm sure
pcre itself is careful about signedness, so it doesn't really matter
whether the input arrays use type signed char or unsigned char, so all
I did was convert all unsigned char's to char's in the user-facing
functions. All tests still pass, so I guess that's enough. :-)

That said, this is a relatively significant change to be going into an
-RC3 release. Philip, up to you if you want to include it or wait
until the next pcre version. Patch is below, against svn-head.

craig

--cut here--

Index: pcrecpp.h
===================================================================
--- pcrecpp.h    (revision 254)
+++ pcrecpp.h    (working copy)
@@ -491,10 +491,16 @@
  public:
   // We provide implicit conversions from strings so that users can
   // pass in a string or a "const char*" wherever an "RE" is expected.
-  RE(const char* pat) { Init(pat, NULL); }
-  RE(const char *pat, const RE_Options& option) { Init(pat, &option); }
   RE(const string& pat) { Init(pat, NULL); }
   RE(const string& pat, const RE_Options& option) { Init(pat, &option); }
+  RE(const char* pat) { Init(pat, NULL); }
+  RE(const char* pat, const RE_Options& option) { Init(pat, &option); }
+  RE(const unsigned char* pat) {
+    Init(reinterpret_cast<const char*>(pat), NULL);
+  }
+  RE(const unsigned char* pat, const RE_Options& option) {
+    Init(reinterpret_cast<const char*>(pat), &option);
+  }


   // Copy constructor & assignment - note that these are expensive
   // because they recompile the expression.
Index: pcrecpp.cc
===================================================================
--- pcrecpp.cc    (revision 254)
+++ pcrecpp.cc    (working copy)
@@ -717,7 +717,7 @@
   long r;
   if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
   if (r < SHRT_MIN || r > SHRT_MAX) return false;       // Out of range
-  *(reinterpret_cast<short*>(dest)) = r;
+  *(reinterpret_cast<short*>(dest)) = static_cast<short>(r);
   return true;
 }


@@ -728,7 +728,7 @@
   unsigned long r;
   if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
   if (r > USHRT_MAX) return false;                      // Out of range
-  *(reinterpret_cast<unsigned short*>(dest)) = r;
+  *(reinterpret_cast<unsigned short*>(dest)) = static_cast<unsigned short>(r);
   return true;
 }


Index: pcre_stringpiece.h.in
===================================================================
--- pcre_stringpiece.h.in    (revision 254)
+++ pcre_stringpiece.h.in    (working copy)
@@ -68,7 +68,10 @@
   StringPiece()
     : ptr_(NULL), length_(0) { }
   StringPiece(const char* str)
-    : ptr_(str), length_(static_cast<int>(strlen(str))) { }
+    : ptr_(str), length_(static_cast<int>(strlen(ptr_))) { }
+  StringPiece(const unsigned char* str)
+    : ptr_(reinterpret_cast<const char*>(str)),
+      length_(static_cast<int>(strlen(ptr_))) { }
   StringPiece(const string& str)
     : ptr_(str.data()), length_(static_cast<int>(str.size())) { }
   StringPiece(const char* offset, int len)
Index: pcretest.c
===================================================================
--- pcretest.c    (revision 254)
+++ pcretest.c    (working copy)
@@ -1596,15 +1596,15 @@
       else
         {
         uschar sbuf[8];
-        sbuf[0] = (true_size >> 24)  & 255;
-        sbuf[1] = (true_size >> 16)  & 255;
-        sbuf[2] = (true_size >>  8)  & 255;
-        sbuf[3] = (true_size)  & 255;
+        sbuf[0] = (uschar)((true_size >> 24)  & 255);
+        sbuf[1] = (uschar)((true_size >> 16)  & 255);
+        sbuf[2] = (uschar)((true_size >>  8)  & 255);
+        sbuf[3] = (uschar)((true_size)  & 255);


-        sbuf[4] = (true_study_size >> 24)  & 255;
-        sbuf[5] = (true_study_size >> 16)  & 255;
-        sbuf[6] = (true_study_size >>  8)  & 255;
-        sbuf[7] = (true_study_size)  & 255;
+        sbuf[4] = (uschar)((true_study_size >> 24)  & 255);
+        sbuf[5] = (uschar)((true_study_size >> 16)  & 255);
+        sbuf[6] = (uschar)((true_study_size >>  8)  & 255);
+        sbuf[7] = (uschar)((true_study_size)  & 255);


         if (fwrite(sbuf, 1, 8, f) < 8 ||
             fwrite(re, 1, true_size, f) < true_size)
Index: pcrecpp_unittest.cc
===================================================================
--- pcrecpp_unittest.cc    (revision 254)
+++ pcrecpp_unittest.cc    (working copy)
@@ -1136,13 +1136,13 @@
     printf("Testing UTF-8 handling\n");


     // Three Japanese characters (nihongo)
-    const char utf8_string[] = {
+    const unsigned char utf8_string[] = {
          0xe6, 0x97, 0xa5, // 65e5
          0xe6, 0x9c, 0xac, // 627c
          0xe8, 0xaa, 0x9e, // 8a9e
          0
     };
-    const char utf8_pattern[] = {
+    const unsigned char utf8_pattern[] = {
          '.',
          0xe6, 0x9c, 0xac, // 627c
          '.',