[Pcre-svn] [802] code/branches/pcre16: Minor issues after me…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [802] code/branches/pcre16: Minor issues after merge.
Revision: 802
          http://vcs.pcre.org/viewvc?view=rev&revision=802
Author:   ph10
Date:     2011-12-13 09:52:20 +0000 (Tue, 13 Dec 2011)


Log Message:
-----------
Minor issues after merge.

Modified Paths:
--------------
    code/branches/pcre16/pcre_compile.c
    code/branches/pcre16/pcre_study.c
    code/branches/pcre16/testdata/testinput13
    code/branches/pcre16/testdata/testoutput13


Modified: code/branches/pcre16/pcre_compile.c
===================================================================
--- code/branches/pcre16/pcre_compile.c    2011-12-12 16:23:37 UTC (rev 801)
+++ code/branches/pcre16/pcre_compile.c    2011-12-13 09:52:20 UTC (rev 802)
@@ -3762,7 +3762,7 @@


     /* For optimization purposes, we track some properties of the class.
     class_has_8bitchar will be non-zero, if the class contains at least one
-    < 256 character. class_single_char will be 1, if the class only contains
+    < 256 character. class_single_char will be 1 if the class contains only
     a single character. */


     class_has_8bitchar = 0;
@@ -3933,7 +3933,7 @@
       of the specials, which just set a flag. The sequence \b is a special
       case. Inside a class (and only there) it is treated as backspace. We
       assume that other escapes have more than one character in them, so
-      speculatively set both class_has_8bitchar class_single_char bigger
+      speculatively set both class_has_8bitchar and class_single_char bigger
       than one. Unrecognized escapes fall through and are either treated
       as literal characters (by default), or are faulted if
       PCRE_EXTRA is set. */
@@ -4420,6 +4420,7 @@
       class_lastchar = c;


       /* Handle a character that cannot go in the bit map */
+       
 #if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)
       if ((c > 255) || (utf && ((options & PCRE_CASELESS) != 0 && c > 127)))
 #elif defined SUPPORT_UTF
@@ -4427,15 +4428,15 @@
 #elif !(defined COMPILE_PCRE8)
       if (c > 255)
 #endif
+
 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
         {
         xclass = TRUE;
         *class_uchardata++ = XCL_SINGLE;
 #ifdef SUPPORT_UTF
 #ifndef COMPILE_PCRE8
-        /* In non 8 bit mode, we can get here even
-        if we are not in UTF mode. */
-        if (!utf)
+        /* In non 8 bit mode, we can get here even if we are not in UTF mode. */
+        if (!utf) 
           *class_uchardata++ = c;
         else
 #endif
@@ -4448,8 +4449,7 @@
 #ifdef COMPILE_PCRE8
         if ((options & PCRE_CASELESS) != 0)
 #else
-        /* In non 8 bit mode, we can get here even
-        if we are not in UTF mode. */
+        /* In non 8 bit mode, we can get here even if we are not in UTF mode. */
         if (utf && (options & PCRE_CASELESS) != 0)
 #endif
           {
@@ -4465,7 +4465,7 @@
             However, that uses less memory, and so if this happens to be at the
             end of the regex, there will not be enough memory in the real
             compile for this temporary storage. */
-              
+            
             if (lengthptr != NULL)
               {
               *lengthptr += class_uchardata - class_uchardata_base;
@@ -4478,6 +4478,7 @@
         }
       else
 #endif  /* SUPPORT_UTF || COMPILE_PCRE16 */
+
       /* Handle a single-byte character */
         {
         class_has_8bitchar = 1;
@@ -4488,7 +4489,6 @@
           classbits[c/8] |= (1 << (c&7));
           }
         }
-
       }


     /* Loop until ']' reached. This "while" is the end of the "do" far above.
@@ -4508,11 +4508,9 @@
       goto FAILED;
       }


-    /* COMMENT NEEDS FIXING - no longer true.
-    If class_charcount is 1, we saw precisely one character whose value is
-    less than 256. As long as there were no characters >= 128 and there was no
-    use of \p or \P, in other words, no use of any XCLASS features, we can
-    optimize.
+    /* If class_charcount is 1, we saw precisely one character. As long as
+    there were no negated characters >= 128 and there was no use of \p or \P,
+    in other words, no use of any XCLASS features, we can optimize.


     In UTF-8 mode, we can optimize the negative case only if there were no
     characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR


Modified: code/branches/pcre16/pcre_study.c
===================================================================
--- code/branches/pcre16/pcre_study.c    2011-12-12 16:23:37 UTC (rev 801)
+++ code/branches/pcre16/pcre_study.c    2011-12-13 09:52:20 UTC (rev 802)
@@ -1433,7 +1433,7 @@
     study->flags |= PCRE_STUDY_MAPPED;
     memcpy(study->start_bits, start_bits, sizeof(start_bits));
     }
-  else memset(study->start_bits, 0, 32 * sizeof(pcre_uchar));
+  else memset(study->start_bits, 0, 32 * sizeof(pcre_uint8));


#ifdef PCRE_DEBUG
if (bits_set)

Modified: code/branches/pcre16/testdata/testinput13
===================================================================
--- code/branches/pcre16/testdata/testinput13    2011-12-12 16:23:37 UTC (rev 801)
+++ code/branches/pcre16/testdata/testinput13    2011-12-13 09:52:20 UTC (rev 802)
@@ -580,4 +580,8 @@


/(?<=ab\Cde)X/8

+/[ⱥ]/8iBZ
+
+/[^ⱥ]/8iBZ
+
/-- End of testinput13 --/

Modified: code/branches/pcre16/testdata/testoutput13
===================================================================
--- code/branches/pcre16/testdata/testoutput13    2011-12-12 16:23:37 UTC (rev 801)
+++ code/branches/pcre16/testdata/testoutput13    2011-12-13 09:52:20 UTC (rev 802)
@@ -1289,4 +1289,20 @@
 /(?<=ab\Cde)X/8
 Failed: \C not allowed in lookbehind assertion at offset 10


+/[ⱥ]/8iBZ
+------------------------------------------------------------------
+        Bra
+     /i \x{2c65}
+        Ket
+        End
+------------------------------------------------------------------
+
+/[^ⱥ]/8iBZ
+------------------------------------------------------------------
+        Bra
+        [^\x{2c65}\x{23a}]
+        Ket
+        End
+------------------------------------------------------------------
+
 /-- End of testinput13 --/