[Pcre-svn] [1431] code/trunk: Revert RAWUCHAR macros, renaming them as UCHAR21 and adding an explanatory

Author: Subversion repository
Date:
To: pcre-svn
Subject: [Pcre-svn] [1431] code/trunk: Revert RAWUCHAR macros, renaming them as UCHAR21 and adding an explanatory

Revision: 1431

          http://vcs.pcre.org/viewvc?view=rev&revision=1431
Author:   ph10
Date:     2014-01-02 17:41:28 +0000 (Thu, 02 Jan 2014)

Log Message:
-----------
Revert RAWUCHAR macros, renaming them as UCHAR21 and adding an explanatory
comment.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_dfa_exec.c
    code/trunk/pcre_exec.c
    code/trunk/pcre_internal.h
    code/trunk/pcre_string_utils.c

Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2014-01-01 17:11:54 UTC (rev 1430)
+++ code/trunk/ChangeLog    2014-01-02 17:41:28 UTC (rev 1431)
@@ -31,10 +31,11 @@
     must be bigger than the treshold as well. This function is useful, when
     the characters above the treshold are handled in the same way.

-7.  The macros RAWUCHAR and RAWUCHARTEST were identical (and the latter was not
-    testing anything, contrary to its name and comment) and were not really 
-    fulfilling any useful function, so I have replaced their use by plain code. 
-    Similarly for RAWUCHARINC and RAWUCHARINCTEST.  
+7.  The macros whose names start with RAWUCHAR are placeholders for a future 
+    mode in which only the bottom 21 bits of 32-bit data items are used. To 
+    make this more memorable for those maintaining the code, the names have 
+    been changed to start with UCHAR21, and an extensive comment has been added 
+    to their definition.

Version 8.34 15-December-2013

Modified: code/trunk/pcre_dfa_exec.c
===================================================================
--- code/trunk/pcre_dfa_exec.c    2014-01-01 17:11:54 UTC (rev 1430)
+++ code/trunk/pcre_dfa_exec.c    2014-01-02 17:41:28 UTC (rev 1431)
@@ -7,7 +7,7 @@
 below for why this module is different).

                        Written by Philip Hazel
-           Copyright (c) 1997-2013 University of Cambridge
+           Copyright (c) 1997-2014 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -1473,7 +1473,7 @@
           goto ANYNL01;

           case CHAR_CR:
-          if (ptr + 1 < end_subject && ptr[1] == CHAR_LF) ncount = 1;
+          if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
           /* Fall through */

           ANYNL01:
@@ -1742,7 +1742,7 @@
           goto ANYNL02;

           case CHAR_CR:
-          if (ptr + 1 < end_subject && ptr[1] == CHAR_LF) ncount = 1;
+          if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
           /* Fall through */

           ANYNL02:
@@ -2012,7 +2012,7 @@
           goto ANYNL03;

           case CHAR_CR:
-          if (ptr + 1 < end_subject && ptr[1] == CHAR_LF) ncount = 1;
+          if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
           /* Fall through */

           ANYNL03:
@@ -2210,7 +2210,7 @@
           if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
             reset_could_continue = TRUE;
           }
-        else if (ptr[1] == CHAR_LF)
+        else if (UCHAR21TEST(ptr + 1) == CHAR_LF)
           {
           ADD_NEW_DATA(-(state_offset + 1), 0, 1);
           }
@@ -3474,12 +3474,12 @@
           {
           pcre_uchar csc;
           while (current_subject < end_subject &&
-                 (csc = *current_subject) != first_char && csc != first_char2)
+                 (csc = UCHAR21TEST(current_subject)) != first_char && csc != first_char2)
             current_subject++;
           }
         else
           while (current_subject < end_subject &&
-                 *current_subject != first_char)
+                 UCHAR21TEST(current_subject) != first_char)
             current_subject++;
         }

@@ -3509,9 +3509,10 @@
           ANYCRLF, and we are now at a LF, advance the match position by one
           more character. */

-          if (current_subject[-1] == CHAR_CR &&
+          if (UCHAR21TEST(current_subject - 1) == CHAR_CR &&
                (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
-               current_subject < end_subject && *current_subject == CHAR_NL)
+               current_subject < end_subject &&
+               UCHAR21TEST(current_subject) == CHAR_NL)
             current_subject++;
           }
         }
@@ -3522,7 +3523,7 @@
         {
         while (current_subject < end_subject)
           {
-          register pcre_uint32 c = *current_subject;
+          register pcre_uint32 c = UCHAR21TEST(current_subject);
 #ifndef COMPILE_PCRE8
           if (c > 255) c = 255;
 #endif
@@ -3579,7 +3580,7 @@
             {
             while (p < end_subject)
               {
-              register pcre_uint32 pp = *p++;
+              register pcre_uint32 pp = UCHAR21INCTEST(p);
               if (pp == req_char || pp == req_char2) { p--; break; }
               }
             }
@@ -3587,7 +3588,7 @@
             {
             while (p < end_subject)
               {
-              if (*p++ == req_char) { p--; break; }
+              if (UCHAR21INCTEST(p) == req_char) { p--; break; }
               }
             }

@@ -3655,9 +3656,9 @@
not contain any explicit matches for \r or \n, and the newline option is CRLF
or ANY or ANYCRLF, advance the match position by one more character. */

-  if (current_subject[-1] == CHAR_CR &&
+  if (UCHAR21TEST(current_subject - 1) == CHAR_CR &&
       current_subject < end_subject &&
-      *current_subject == CHAR_NL &&
+      UCHAR21TEST(current_subject) == CHAR_NL &&
       (re->flags & PCRE_HASCRORLF) == 0 &&
         (md->nltype == NLTYPE_ANY ||
          md->nltype == NLTYPE_ANYCRLF ||

Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c    2014-01-01 17:11:54 UTC (rev 1430)
+++ code/trunk/pcre_exec.c    2014-01-02 17:41:28 UTC (rev 1431)
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                        Written by Philip Hazel
-           Copyright (c) 1997-2013 University of Cambridge
+           Copyright (c) 1997-2014 University of Cambridge

-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -134,7 +134,7 @@
BOOL utf = md->utf;
if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
while (length-- > 0)
- if (isprint(c = *p++)) printf("%c", (char)c); else printf("\\x{%02x}", c);
+ if (isprint(c = UCHAR21INCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
}
#endif

@@ -237,8 +237,8 @@
       {
       pcre_uint32 cc, cp;
       if (eptr >= md->end_subject) return -2;   /* Partial match */
-      cc = *eptr;
-      cp = *p;
+      cc = UCHAR21TEST(eptr);
+      cp = UCHAR21TEST(p);
       if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
       p++;
       eptr++;
@@ -254,7 +254,7 @@
   while (length-- > 0)
     {
     if (eptr >= md->end_subject) return -2;   /* Partial match */
-    if (*p++ != *eptr++) return -1;
+    if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;
     }
   }

@@ -2103,7 +2103,7 @@
             eptr + 1 >= md->end_subject &&
             NLBLOCK->nltype == NLTYPE_FIXED &&
             NLBLOCK->nllen == 2 &&
-            *eptr == NLBLOCK->nl[0])
+            UCHAR21TEST(eptr) == NLBLOCK->nl[0])
           {
           md->hitend = TRUE;
           if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -2147,7 +2147,7 @@
           eptr + 1 >= md->end_subject &&
           NLBLOCK->nltype == NLTYPE_FIXED &&
           NLBLOCK->nllen == 2 &&
-          *eptr == NLBLOCK->nl[0])
+          UCHAR21TEST(eptr) == NLBLOCK->nl[0])
         {
         md->hitend = TRUE;
         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -2290,7 +2290,7 @@
         eptr + 1 >= md->end_subject &&
         NLBLOCK->nltype == NLTYPE_FIXED &&
         NLBLOCK->nllen == 2 &&
-        *eptr == NLBLOCK->nl[0])
+        UCHAR21TEST(eptr) == NLBLOCK->nl[0])
       {
       md->hitend = TRUE;
       if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -2444,7 +2444,7 @@
         {
         SCHECK_PARTIAL();
         }
-      else if (*eptr == CHAR_LF) eptr++;
+      else if (UCHAR21TEST(eptr) == CHAR_LF) eptr++;
       break;

       case CHAR_LF:
@@ -3218,7 +3218,7 @@
         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
         RRETURN(MATCH_NOMATCH);
         }
-      while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
+      while (length-- > 0) if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
       }
     else
 #endif
@@ -3258,7 +3258,7 @@

       if (fc < 128)
         {
-        pcre_uint32 cc = *eptr;
+        pcre_uint32 cc = UCHAR21(eptr);
         if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
         ecode++;
         eptr++;
@@ -3527,7 +3527,7 @@
           SCHECK_PARTIAL();
           RRETURN(MATCH_NOMATCH);
           }
-        cc = *eptr;
+        cc = UCHAR21TEST(eptr);
         if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
         eptr++;
         }
@@ -3545,7 +3545,7 @@
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          cc = *eptr;
+          cc = UCHAR21TEST(eptr);
           if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
           eptr++;
           }
@@ -3562,7 +3562,7 @@
             SCHECK_PARTIAL();
             break;
             }
-          cc = *eptr;
+          cc = UCHAR21TEST(eptr);
           if (fc != cc && foc != cc) break;
           eptr++;
           }
@@ -3589,7 +3589,7 @@
           SCHECK_PARTIAL();
           RRETURN(MATCH_NOMATCH);
           }
-        if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
+        if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
         }

       if (min == max) continue;
@@ -3606,7 +3606,7 @@
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
+          if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
           }
         /* Control never gets here */
         }
@@ -3620,7 +3620,7 @@
             SCHECK_PARTIAL();
             break;
             }
-          if (fc != *eptr) break;
+          if (fc != UCHAR21TEST(eptr)) break;
           eptr++;
           }
         if (possessive) continue;    /* No backtracking */
@@ -4375,7 +4375,7 @@
               eptr + 1 >= md->end_subject &&
               NLBLOCK->nltype == NLTYPE_FIXED &&
               NLBLOCK->nllen == 2 &&
-              *eptr == NLBLOCK->nl[0])
+              UCHAR21(eptr) == NLBLOCK->nl[0])
             {
             md->hitend = TRUE;
             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -4417,7 +4417,7 @@
             default: RRETURN(MATCH_NOMATCH);

             case CHAR_CR:
-            if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
+            if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
             break;

             case CHAR_LF:
@@ -4527,7 +4527,7 @@
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          cc = *eptr;
+          cc = UCHAR21(eptr);
           if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
             RRETURN(MATCH_NOMATCH);
           eptr++;
@@ -4544,7 +4544,7 @@
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          cc = *eptr;
+          cc = UCHAR21(eptr);
           if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
             RRETURN(MATCH_NOMATCH);
           eptr++;
@@ -4561,7 +4561,7 @@
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          cc = *eptr;
+          cc = UCHAR21(eptr);
           if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
             RRETURN(MATCH_NOMATCH);
           eptr++;
@@ -4578,7 +4578,7 @@
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          cc = *eptr;
+          cc = UCHAR21(eptr);
           if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
             RRETURN(MATCH_NOMATCH);
           eptr++;
@@ -4595,7 +4595,7 @@
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          cc = *eptr;
+          cc = UCHAR21(eptr);
           if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
             RRETURN(MATCH_NOMATCH);
           eptr++;
@@ -5156,7 +5156,7 @@
               {
               default: RRETURN(MATCH_NOMATCH);
               case CHAR_CR:
-              if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
+              if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
               break;

               case CHAR_LF:
@@ -5695,7 +5695,7 @@
                   eptr + 1 >= md->end_subject &&
                   NLBLOCK->nltype == NLTYPE_FIXED &&
                   NLBLOCK->nllen == 2 &&
-                  *eptr == NLBLOCK->nl[0])
+                  UCHAR21(eptr) == NLBLOCK->nl[0])
                 {
                 md->hitend = TRUE;
                 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -5721,7 +5721,7 @@
                   eptr + 1 >= md->end_subject &&
                   NLBLOCK->nltype == NLTYPE_FIXED &&
                   NLBLOCK->nllen == 2 &&
-                  *eptr == NLBLOCK->nl[0])
+                  UCHAR21(eptr) == NLBLOCK->nl[0])
                 {
                 md->hitend = TRUE;
                 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -5778,7 +5778,7 @@
             if (c == CHAR_CR)
               {
               if (++eptr >= md->end_subject) break;
-              if (*eptr == CHAR_LF) eptr++;
+              if (UCHAR21(eptr) == CHAR_LF) eptr++;
               }
             else
               {
@@ -5941,8 +5941,8 @@
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           eptr--;
           BACKCHAR(eptr);
-          if (ctype == OP_ANYNL && eptr > pp  && *eptr == CHAR_NL &&
-              eptr[-1] == CHAR_CR) eptr--;
+          if (ctype == OP_ANYNL && eptr > pp  && UCHAR21(eptr) == CHAR_NL &&
+              UCHAR21(eptr - 1) == CHAR_CR) eptr--;
           }
         }
       else
@@ -6789,10 +6789,10 @@

       if (first_char != first_char2)
         while (start_match < end_subject &&
-          (smc = *start_match) != first_char && smc != first_char2)
+          (smc = UCHAR21TEST(start_match)) != first_char && smc != first_char2)
           start_match++;
       else
-        while (start_match < end_subject && *start_match != first_char)
+        while (start_match < end_subject && UCHAR21TEST(start_match) != first_char)
           start_match++;
       }

@@ -6824,7 +6824,7 @@
         if (start_match[-1] == CHAR_CR &&
              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
              start_match < end_subject &&
-             *start_match == CHAR_NL)
+             UCHAR21TEST(start_match) == CHAR_NL)
           start_match++;
         }
       }
@@ -6835,7 +6835,7 @@
       {
       while (start_match < end_subject)
         {
-        register pcre_uint32 c = *start_match;
+        register pcre_uint32 c = UCHAR21TEST(start_match);
 #ifndef COMPILE_PCRE8
         if (c > 255) c = 255;
 #endif
@@ -6893,7 +6893,7 @@
           {
           while (p < end_subject)
             {
-            register pcre_uint32 pp = *p++;
+            register pcre_uint32 pp = UCHAR21INCTEST(p);
             if (pp == req_char || pp == req_char2) { p--; break; }
             }
           }
@@ -6901,7 +6901,7 @@
           {
           while (p < end_subject)
             {
-            if (*p++ == req_char) { p--; break; }
+            if (UCHAR21INCTEST(p) == req_char) { p--; break; }
             }
           }

Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h    2014-01-01 17:11:54 UTC (rev 1430)
+++ code/trunk/pcre_internal.h    2014-01-02 17:41:28 UTC (rev 1431)
@@ -7,7 +7,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                        Written by Philip Hazel
-           Copyright (c) 1997-2013 University of Cambridge
+           Copyright (c) 1997-2014 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -316,7 +316,8 @@
        &(NLBLOCK->nllen), utf)) \
     : \
     ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
-     *p == NLBLOCK->nl[0] && (NLBLOCK->nllen == 1 || p[1] == NLBLOCK->nl[1]) \
+     UCHAR21TEST(p) == NLBLOCK->nl[0] && \
+     (NLBLOCK->nllen == 1 || UCHAR21TEST(p+1) == NLBLOCK->nl[1])       \
     ) \
   )

@@ -329,8 +330,8 @@
        &(NLBLOCK->nllen), utf)) \
     : \
     ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
-     *(p - NLBLOCK->nllen) == NLBLOCK->nl[0] &&              \
-     (NLBLOCK->nllen == 1 || *(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \
+     UCHAR21TEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] &&              \
+     (NLBLOCK->nllen == 1 || UCHAR21TEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \
     ) \
   )

@@ -581,12 +582,27 @@
#define MAX_MARK ((1u << 8) - 1)
#endif

+/* There is a proposed future special "UTF-21" mode, in which only the lowest 
+21 bits of a 32-bit character are interpreted as UTF, with the remaining 11 
+high-order bits available to the application for other uses. In preparation for 
+the future implementation of this mode, there are macros that load a data item
+and, if in this special mode, mask it to 21 bits. These macros all have names
+starting with UCHAR21. In all other modes, including the normal 32-bit
+library, the macros all have the same simple definitions. When the new mode is
+implemented, it is expected that these definitions will be varied appropriately
+using #ifdef when compiling the library that supports the special mode. */
+
+#define UCHAR21(eptr)        (*(eptr))
+#define UCHAR21TEST(eptr)    (*(eptr))
+#define UCHAR21INC(eptr)     (*(eptr)++)
+#define UCHAR21INCTEST(eptr) (*(eptr)++)
+
 /* When UTF encoding is being used, a character is no longer just a single
-byte. The macros for character handling generate simple sequences when used in
-character-mode, and more complicated ones for UTF characters. GETCHARLENTEST
-and other macros are not used when UTF is not supported, so they are not
-defined. To make sure they can never even appear when UTF support is omitted,
-we don't even define them. */
+byte in 8-bit mode or a single short in 16-bit mode. The macros for character
+handling generate simple sequences when used in the basic mode, and more
+complicated ones for UTF characters. GETCHARLENTEST and other macros are not
+used when UTF is not supported. To make sure they can never even appear when
+UTF support is omitted, we don't even define them. */

#ifndef SUPPORT_UTF

Modified: code/trunk/pcre_string_utils.c
===================================================================
--- code/trunk/pcre_string_utils.c    2014-01-01 17:11:54 UTC (rev 1430)
+++ code/trunk/pcre_string_utils.c    2014-01-02 17:41:28 UTC (rev 1431)
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                        Written by Philip Hazel
-           Copyright (c) 1997-2013 University of Cambridge
+           Copyright (c) 1997-2014 University of Cambridge

-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -91,8 +91,8 @@

 while (*str1 != '\0' || *str2 != '\0')
   {
-  c1 = *str1++;
-  c2 = *str2++;
+  c1 = UCHAR21INC(str1);
+  c2 = UCHAR21INC(str2);
   if (c1 != c2)
     return ((c1 > c2) << 1) - 1;
   }
@@ -131,7 +131,7 @@

 while (*str1 != '\0' || *ustr2 != '\0')
   {
-  c1 = *str1++;
+  c1 = UCHAR21INC(str1);
   c2 = (pcre_uchar)*ustr2++;
   if (c1 != c2)
     return ((c1 > c2) << 1) - 1;

This message is part of the following thread:
	the complete thread tree sorted by date

[Pcre-svn] [1431] code/trunk: Revert RAWUCHAR macros, renam…