[Pcre-svn] [1041] code/trunk: Turn case lists for horizontal…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [1041] code/trunk: Turn case lists for horizontal and vertical white space into macros so they are
Revision: 1041
          http://vcs.pcre.org/viewvc?view=rev&revision=1041
Author:   ph10
Date:     2012-09-16 11:16:27 +0100 (Sun, 16 Sep 2012)


Log Message:
-----------
Turn case lists for horizontal and vertical white space into macros so they are
defined only once.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_compile.c
    code/trunk/pcre_dfa_exec.c
    code/trunk/pcre_exec.c
    code/trunk/pcre_internal.h


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2012-09-16 06:52:27 UTC (rev 1040)
+++ code/trunk/ChangeLog    2012-09-16 10:16:27 UTC (rev 1041)
@@ -83,7 +83,10 @@


19. Improving the first n character searches.

+20. Turn case lists for horizontal and vertical white space into macros so that
+    they are defined only once.


+
Version 8.31 06-July-2012
-------------------------


Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c    2012-09-16 06:52:27 UTC (rev 1040)
+++ code/trunk/pcre_compile.c    2012-09-16 10:16:27 UTC (rev 1041)
@@ -3168,28 +3168,9 @@
   case OP_NOT_HSPACE:
   switch(next)
     {
-    case CHAR_HT:
-    case CHAR_SPACE:
-#ifndef EBCDIC     
-    case 0xa0:
-    case 0x1680:
-    case 0x180e:
-    case 0x2000:
-    case 0x2001:
-    case 0x2002:
-    case 0x2003:
-    case 0x2004:
-    case 0x2005:
-    case 0x2006:
-    case 0x2007:
-    case 0x2008:
-    case 0x2009:
-    case 0x200A:
-    case 0x202f:
-    case 0x205f:
-    case 0x3000:
-#endif  /* Not EBCDIC */ 
+    HSPACE_CASES: 
     return op_code == OP_NOT_HSPACE;
+     
     default:
     return op_code != OP_NOT_HSPACE;
     }
@@ -3199,16 +3180,9 @@
   case OP_NOT_VSPACE:
   switch(next)
     {
-    case CHAR_LF:
-    case CHAR_VT:
-    case CHAR_FF:
-    case CHAR_CR:
-    case CHAR_NEL:
-#ifndef EBCDIC 
-    case 0x2028:
-    case 0x2029:
-#endif 
+    VSPACE_CASES: 
     return op_code == OP_NOT_VSPACE;
+     
     default:
     return op_code != OP_NOT_VSPACE;
     }
@@ -3265,28 +3239,9 @@
     case ESC_H:
     switch(c)
       {
-      case CHAR_HT:
-      case CHAR_SPACE:
-#ifndef EBCDIC       
-      case 0xa0:
-      case 0x1680:
-      case 0x180e:
-      case 0x2000:
-      case 0x2001:
-      case 0x2002:
-      case 0x2003:
-      case 0x2004:
-      case 0x2005:
-      case 0x2006:
-      case 0x2007:
-      case 0x2008:
-      case 0x2009:
-      case 0x200A:
-      case 0x202f:
-      case 0x205f:
-      case 0x3000:
-#endif  /* Not EBCDIC */ 
+      HSPACE_CASES: 
       return -next != ESC_h;
+       
       default:
       return -next == ESC_h;
       }
@@ -3295,16 +3250,9 @@
     case ESC_V:
     switch(c)
       {
-      case CHAR_LF:
-      case CHAR_VT:
-      case CHAR_FF:
-      case CHAR_CR:
-      case CHAR_NEL:
-#ifndef EBCDIC 
-      case 0x2028:
-      case 0x2029:
-#endif  /* Not EBCDIC */ 
+      VSPACE_CASES: 
       return -next != ESC_v;
+       
       default:
       return -next == ESC_v;
       }


Modified: code/trunk/pcre_dfa_exec.c
===================================================================
--- code/trunk/pcre_dfa_exec.c    2012-09-16 06:52:27 UTC (rev 1040)
+++ code/trunk/pcre_dfa_exec.c    2012-09-16 10:16:27 UTC (rev 1041)
@@ -1448,15 +1448,7 @@
         BOOL OK;
         switch (c)
           {
-          case CHAR_LF:
-          case CHAR_VT:
-          case CHAR_FF:
-          case CHAR_CR:
-          case CHAR_NEL:
-#ifndef EBCDIC
-          case 0x2028:
-          case 0x2029:
-#endif  /* Not EBCDIC */
+          VSPACE_CASES: 
           OK = TRUE;
           break;


@@ -1489,27 +1481,7 @@
         BOOL OK;
         switch (c)
           {
-          case CHAR_HT:
-          case CHAR_SPACE:
-#ifndef EBCDIC
-          case 0xa0:      /* NBSP */
-          case 0x1680:    /* OGHAM SPACE MARK */
-          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-          case 0x2000:    /* EN QUAD */
-          case 0x2001:    /* EM QUAD */
-          case 0x2002:    /* EN SPACE */
-          case 0x2003:    /* EM SPACE */
-          case 0x2004:    /* THREE-PER-EM SPACE */
-          case 0x2005:    /* FOUR-PER-EM SPACE */
-          case 0x2006:    /* SIX-PER-EM SPACE */
-          case 0x2007:    /* FIGURE SPACE */
-          case 0x2008:    /* PUNCTUATION SPACE */
-          case 0x2009:    /* THIN SPACE */
-          case 0x200A:    /* HAIR SPACE */
-          case 0x202f:    /* NARROW NO-BREAK SPACE */
-          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-          case 0x3000:    /* IDEOGRAPHIC SPACE */
-#endif  /* Not EBCDIC */
+          HSPACE_CASES: 
           OK = TRUE;
           break;


@@ -1729,15 +1701,7 @@
         BOOL OK;
         switch (c)
           {
-          case CHAR_LF:
-          case CHAR_VT:
-          case CHAR_FF:
-          case CHAR_CR:
-          case CHAR_NEL:
-#ifndef EBCDIC
-          case 0x2028:
-          case 0x2029:
-#endif  /* Not EBCDIC */
+          VSPACE_CASES: 
           OK = TRUE;
           break;


@@ -1777,27 +1741,7 @@
         BOOL OK;
         switch (c)
           {
-          case CHAR_HT:
-          case CHAR_SPACE:
-#ifndef EBCDIC 
-          case 0xa0:      /* NBSP */
-          case 0x1680:    /* OGHAM SPACE MARK */
-          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-          case 0x2000:    /* EN QUAD */
-          case 0x2001:    /* EM QUAD */
-          case 0x2002:    /* EN SPACE */
-          case 0x2003:    /* EM SPACE */
-          case 0x2004:    /* THREE-PER-EM SPACE */
-          case 0x2005:    /* FOUR-PER-EM SPACE */
-          case 0x2006:    /* SIX-PER-EM SPACE */
-          case 0x2007:    /* FIGURE SPACE */
-          case 0x2008:    /* PUNCTUATION SPACE */
-          case 0x2009:    /* THIN SPACE */
-          case 0x200A:    /* HAIR SPACE */
-          case 0x202f:    /* NARROW NO-BREAK SPACE */
-          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-          case 0x3000:    /* IDEOGRAPHIC SPACE */
-#endif  /* Not EBCDIC */           
+          HSPACE_CASES: 
           OK = TRUE;
           break;


@@ -1999,15 +1943,7 @@
         BOOL OK;
         switch (c)
           {
-          case CHAR_LF:
-          case CHAR_VT:
-          case CHAR_FF:
-          case CHAR_CR:
-          case CHAR_NEL:
-#ifndef EBCDIC
-          case 0x2028:
-          case 0x2029:
-#endif  /* Not EBCDIC */
+          VSPACE_CASES: 
           OK = TRUE;
           break;


@@ -2043,27 +1979,7 @@
         BOOL OK;
         switch (c)
           {
-          case CHAR_HT:
-          case CHAR_SPACE:
-#ifndef EBCDIC 
-          case 0xa0:      /* NBSP */
-          case 0x1680:    /* OGHAM SPACE MARK */
-          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-          case 0x2000:    /* EN QUAD */
-          case 0x2001:    /* EM QUAD */
-          case 0x2002:    /* EN SPACE */
-          case 0x2003:    /* EM SPACE */
-          case 0x2004:    /* THREE-PER-EM SPACE */
-          case 0x2005:    /* FOUR-PER-EM SPACE */
-          case 0x2006:    /* SIX-PER-EM SPACE */
-          case 0x2007:    /* FIGURE SPACE */
-          case 0x2008:    /* PUNCTUATION SPACE */
-          case 0x2009:    /* THIN SPACE */
-          case 0x200A:    /* HAIR SPACE */
-          case 0x202f:    /* NARROW NO-BREAK SPACE */
-          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-          case 0x3000:    /* IDEOGRAPHIC SPACE */
-#endif  /* Not EBCDIC */           
+          HSPACE_CASES: 
           OK = TRUE;
           break;


@@ -2206,15 +2122,7 @@
       case OP_NOT_VSPACE:
       if (clen > 0) switch(c)
         {
-        case CHAR_LF:
-        case CHAR_VT:
-        case CHAR_FF:
-        case CHAR_CR:
-        case CHAR_NEL:
-#ifndef EBCDIC
-        case 0x2028:
-        case 0x2029:
-#endif  /* Not EBCDIC */
+        VSPACE_CASES: 
         break;


         default:
@@ -2227,19 +2135,12 @@
       case OP_VSPACE:
       if (clen > 0) switch(c)
         {
-        case CHAR_LF:
-        case CHAR_VT:
-        case CHAR_FF:
-        case CHAR_CR:
-        case CHAR_NEL:
-#ifndef EBCDIC
-        case 0x2028:
-        case 0x2029:
-#endif  /* Not EBCDIC */
+        VSPACE_CASES: 
         ADD_NEW(state_offset + 1, 0);
         break;


-        default: break;
+        default: 
+        break;
         }
       break;


@@ -2247,27 +2148,7 @@
       case OP_NOT_HSPACE:
       if (clen > 0) switch(c)
         {
-        case CHAR_HT:
-        case CHAR_SPACE:
-#ifndef EBCDIC 
-        case 0xa0:      /* NBSP */
-        case 0x1680:    /* OGHAM SPACE MARK */
-        case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-        case 0x2000:    /* EN QUAD */
-        case 0x2001:    /* EM QUAD */
-        case 0x2002:    /* EN SPACE */
-        case 0x2003:    /* EM SPACE */
-        case 0x2004:    /* THREE-PER-EM SPACE */
-        case 0x2005:    /* FOUR-PER-EM SPACE */
-        case 0x2006:    /* SIX-PER-EM SPACE */
-        case 0x2007:    /* FIGURE SPACE */
-        case 0x2008:    /* PUNCTUATION SPACE */
-        case 0x2009:    /* THIN SPACE */
-        case 0x200A:    /* HAIR SPACE */
-        case 0x202f:    /* NARROW NO-BREAK SPACE */
-        case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-        case 0x3000:    /* IDEOGRAPHIC SPACE */
-#endif  /* Not EBCDIC */           
+        HSPACE_CASES: 
         break;


         default:
@@ -2280,29 +2161,12 @@
       case OP_HSPACE:
       if (clen > 0) switch(c)
         {
-        case CHAR_HT:
-        case CHAR_SPACE:
-#ifndef EBCDIC 
-        case 0xa0:      /* NBSP */
-        case 0x1680:    /* OGHAM SPACE MARK */
-        case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-        case 0x2000:    /* EN QUAD */
-        case 0x2001:    /* EM QUAD */
-        case 0x2002:    /* EN SPACE */
-        case 0x2003:    /* EM SPACE */
-        case 0x2004:    /* THREE-PER-EM SPACE */
-        case 0x2005:    /* FOUR-PER-EM SPACE */
-        case 0x2006:    /* SIX-PER-EM SPACE */
-        case 0x2007:    /* FIGURE SPACE */
-        case 0x2008:    /* PUNCTUATION SPACE */
-        case 0x2009:    /* THIN SPACE */
-        case 0x200A:    /* HAIR SPACE */
-        case 0x202f:    /* NARROW NO-BREAK SPACE */
-        case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-        case 0x3000:    /* IDEOGRAPHIC SPACE */
-#endif  /* Not EBCDIC */           
+        HSPACE_CASES: 
         ADD_NEW(state_offset + 1, 0);
         break;
+        
+        default:
+        break;  
         }
       break;



Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c    2012-09-16 06:52:27 UTC (rev 1040)
+++ code/trunk/pcre_exec.c    2012-09-16 10:16:27 UTC (rev 1041)
@@ -2429,10 +2429,10 @@
       case CHAR_VT:
       case CHAR_FF:
       case CHAR_NEL:
-#ifndef EBCDIC       
+#ifndef EBCDIC
       case 0x2028:
       case 0x2029:
-#endif  /* Not EBCDIC */       
+#endif  /* Not EBCDIC */
       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
       break;
       }
@@ -2448,29 +2448,8 @@
     GETCHARINCTEST(c, eptr);
     switch(c)
       {
+      HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
       default: break;
-      case CHAR_HT:
-      case CHAR_SPACE:
-#ifndef EBCDIC       
-      case 0xa0:      /* NBSP */
-      case 0x1680:    /* OGHAM SPACE MARK */
-      case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-      case 0x2000:    /* EN QUAD */
-      case 0x2001:    /* EM QUAD */
-      case 0x2002:    /* EN SPACE */
-      case 0x2003:    /* EM SPACE */
-      case 0x2004:    /* THREE-PER-EM SPACE */
-      case 0x2005:    /* FOUR-PER-EM SPACE */
-      case 0x2006:    /* SIX-PER-EM SPACE */
-      case 0x2007:    /* FIGURE SPACE */
-      case 0x2008:    /* PUNCTUATION SPACE */
-      case 0x2009:    /* THIN SPACE */
-      case 0x200A:    /* HAIR SPACE */
-      case 0x202f:    /* NARROW NO-BREAK SPACE */
-      case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-      case 0x3000:    /* IDEOGRAPHIC SPACE */
-#endif  /* Not EBCDIC */     
-      RRETURN(MATCH_NOMATCH);
       }
     ecode++;
     break;
@@ -2484,29 +2463,8 @@
     GETCHARINCTEST(c, eptr);
     switch(c)
       {
+      HSPACE_CASES: break;  /* Byte and multibyte cases */
       default: RRETURN(MATCH_NOMATCH);
-      case CHAR_HT:
-      case CHAR_SPACE:
-#ifndef EBCDIC       
-      case 0xa0:      /* NBSP */
-      case 0x1680:    /* OGHAM SPACE MARK */
-      case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-      case 0x2000:    /* EN QUAD */
-      case 0x2001:    /* EM QUAD */
-      case 0x2002:    /* EN SPACE */
-      case 0x2003:    /* EM SPACE */
-      case 0x2004:    /* THREE-PER-EM SPACE */
-      case 0x2005:    /* FOUR-PER-EM SPACE */
-      case 0x2006:    /* SIX-PER-EM SPACE */
-      case 0x2007:    /* FIGURE SPACE */
-      case 0x2008:    /* PUNCTUATION SPACE */
-      case 0x2009:    /* THIN SPACE */
-      case 0x200A:    /* HAIR SPACE */
-      case 0x202f:    /* NARROW NO-BREAK SPACE */
-      case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-      case 0x3000:    /* IDEOGRAPHIC SPACE */
-#endif  /* Not EBCDIC */       
-      break;
       }
     ecode++;
     break;
@@ -2520,17 +2478,8 @@
     GETCHARINCTEST(c, eptr);
     switch(c)
       {
+      VSPACE_CASES: RRETURN(MATCH_NOMATCH);
       default: break;
-      case CHAR_LF:
-      case CHAR_VT:
-      case CHAR_FF:
-      case CHAR_CR:
-      case CHAR_NEL:
-#ifndef EBCDIC       
-      case 0x2028:    /* LINE SEPARATOR */
-      case 0x2029:    /* PARAGRAPH SEPARATOR */
-#endif  /* Not EBCDIC */       
-      RRETURN(MATCH_NOMATCH);
       }
     ecode++;
     break;
@@ -2544,17 +2493,8 @@
     GETCHARINCTEST(c, eptr);
     switch(c)
       {
+      VSPACE_CASES: break;
       default: RRETURN(MATCH_NOMATCH);
-      case CHAR_LF:
-      case CHAR_VT:
-      case CHAR_FF:
-      case CHAR_CR:
-      case CHAR_NEL:
-#ifndef EBCDIC
-      case 0x2028:    /* LINE SEPARATOR */
-      case 0x2029:    /* PARAGRAPH SEPARATOR */
-#endif  /* Not EBCDIC */
-      break;
       }
     ecode++;
     break;
@@ -2652,19 +2592,19 @@
       RRETURN(MATCH_NOMATCH);
       }
     else
-      { 
-      int lgb, rgb; 
+      {
+      int lgb, rgb;
       GETCHARINCTEST(c, eptr);
-      lgb = UCD_GRAPHBREAK(c); 
+      lgb = UCD_GRAPHBREAK(c);
       while (eptr < md->end_subject)
         {
         int len = 1;
         if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
-        rgb = UCD_GRAPHBREAK(c); 
+        rgb = UCD_GRAPHBREAK(c);
         if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
-        lgb = rgb; 
+        lgb = rgb;
         eptr += len;
-        } 
+        }
       }
     CHECK_PARTIAL();
     ecode++;
@@ -4243,19 +4183,19 @@
             RRETURN(MATCH_NOMATCH);
             }
           else
-            { 
-            int lgb, rgb; 
+            {
+            int lgb, rgb;
             GETCHARINCTEST(c, eptr);
-            lgb = UCD_GRAPHBREAK(c); 
+            lgb = UCD_GRAPHBREAK(c);
            while (eptr < md->end_subject)
               {
               int len = 1;
               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
-              rgb = UCD_GRAPHBREAK(c); 
+              rgb = UCD_GRAPHBREAK(c);
               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
-              lgb = rgb; 
+              lgb = rgb;
               eptr += len;
-              } 
+              }
             }
           CHECK_PARTIAL();
           }
@@ -4333,10 +4273,10 @@
             case CHAR_VT:
             case CHAR_FF:
             case CHAR_NEL:
-#ifndef EBCDIC 
+#ifndef EBCDIC
             case 0x2028:
             case 0x2029:
-#endif  /* Not EBCDIC */ 
+#endif  /* Not EBCDIC */
             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
             break;
             }
@@ -4354,29 +4294,8 @@
           GETCHARINC(c, eptr);
           switch(c)
             {
+            HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
             default: break;
-            case CHAR_HT:
-            case CHAR_SPACE:
-#ifndef EBCDIC             
-            case 0xa0:      /* NBSP */
-            case 0x1680:    /* OGHAM SPACE MARK */
-            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-            case 0x2000:    /* EN QUAD */
-            case 0x2001:    /* EM QUAD */
-            case 0x2002:    /* EN SPACE */
-            case 0x2003:    /* EM SPACE */
-            case 0x2004:    /* THREE-PER-EM SPACE */
-            case 0x2005:    /* FOUR-PER-EM SPACE */
-            case 0x2006:    /* SIX-PER-EM SPACE */
-            case 0x2007:    /* FIGURE SPACE */
-            case 0x2008:    /* PUNCTUATION SPACE */
-            case 0x2009:    /* THIN SPACE */
-            case 0x200A:    /* HAIR SPACE */
-            case 0x202f:    /* NARROW NO-BREAK SPACE */
-            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-            case 0x3000:    /* IDEOGRAPHIC SPACE */
-#endif  /* Not EBCDIC */             
-            RRETURN(MATCH_NOMATCH);
             }
           }
         break;
@@ -4392,29 +4311,8 @@
           GETCHARINC(c, eptr);
           switch(c)
             {
+            HSPACE_CASES: break;  /* Byte and multibyte cases */
             default: RRETURN(MATCH_NOMATCH);
-            case CHAR_HT:
-            case CHAR_SPACE:
-#ifndef EBCDIC 
-            case 0xa0:      /* NBSP */
-            case 0x1680:    /* OGHAM SPACE MARK */
-            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-            case 0x2000:    /* EN QUAD */
-            case 0x2001:    /* EM QUAD */
-            case 0x2002:    /* EN SPACE */
-            case 0x2003:    /* EM SPACE */
-            case 0x2004:    /* THREE-PER-EM SPACE */
-            case 0x2005:    /* FOUR-PER-EM SPACE */
-            case 0x2006:    /* SIX-PER-EM SPACE */
-            case 0x2007:    /* FIGURE SPACE */
-            case 0x2008:    /* PUNCTUATION SPACE */
-            case 0x2009:    /* THIN SPACE */
-            case 0x200A:    /* HAIR SPACE */
-            case 0x202f:    /* NARROW NO-BREAK SPACE */
-            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-            case 0x3000:    /* IDEOGRAPHIC SPACE */
-#endif             
-            break;
             }
           }
         break;
@@ -4430,17 +4328,8 @@
           GETCHARINC(c, eptr);
           switch(c)
             {
+            VSPACE_CASES: RRETURN(MATCH_NOMATCH);
             default: break;
-            case CHAR_LF:
-            case CHAR_VT:
-            case CHAR_FF:
-            case CHAR_CR:
-            case CHAR_NEL:
-#ifndef EBCDIC 
-            case 0x2028:    /* LINE SEPARATOR */
-            case 0x2029:    /* PARAGRAPH SEPARATOR */
-#endif             
-            RRETURN(MATCH_NOMATCH);
             }
           }
         break;
@@ -4456,17 +4345,8 @@
           GETCHARINC(c, eptr);
           switch(c)
             {
+            VSPACE_CASES: break;
             default: RRETURN(MATCH_NOMATCH);
-            case CHAR_LF:
-            case CHAR_VT:
-            case CHAR_FF:
-            case CHAR_CR:
-            case CHAR_NEL:
-#ifndef EBCDIC             
-            case 0x2028:    /* LINE SEPARATOR */
-            case 0x2029:    /* PARAGRAPH SEPARATOR */
-#endif             
-            break;
             }
           }
         break;
@@ -4655,29 +4535,10 @@
           switch(*eptr++)
             {
             default: break;
-            case CHAR_HT:
-            case CHAR_SPACE:
-#ifndef EBCDIC             
-            case 0xa0:      /* NBSP */
+            HSPACE_BYTE_CASES:
 #ifdef COMPILE_PCRE16
-            case 0x1680:    /* OGHAM SPACE MARK */
-            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-            case 0x2000:    /* EN QUAD */
-            case 0x2001:    /* EM QUAD */
-            case 0x2002:    /* EN SPACE */
-            case 0x2003:    /* EM SPACE */
-            case 0x2004:    /* THREE-PER-EM SPACE */
-            case 0x2005:    /* FOUR-PER-EM SPACE */
-            case 0x2006:    /* SIX-PER-EM SPACE */
-            case 0x2007:    /* FIGURE SPACE */
-            case 0x2008:    /* PUNCTUATION SPACE */
-            case 0x2009:    /* THIN SPACE */
-            case 0x200A:    /* HAIR SPACE */
-            case 0x202f:    /* NARROW NO-BREAK SPACE */
-            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-            case 0x3000:    /* IDEOGRAPHIC SPACE */
-#endif  /* COMPILE_PCRE16 */
-#endif  /* Not EBCDIC */
+            HSPACE_MULTIBYTE_CASES:
+#endif
             RRETURN(MATCH_NOMATCH);
             }
           }
@@ -4694,29 +4555,10 @@
           switch(*eptr++)
             {
             default: RRETURN(MATCH_NOMATCH);
-            case CHAR_HT:
-            case CHAR_SPACE:
-#ifndef EBCDIC             
-            case 0xa0:      /* NBSP */
+            HSPACE_BYTE_CASES:
 #ifdef COMPILE_PCRE16
-            case 0x1680:    /* OGHAM SPACE MARK */
-            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-            case 0x2000:    /* EN QUAD */
-            case 0x2001:    /* EM QUAD */
-            case 0x2002:    /* EN SPACE */
-            case 0x2003:    /* EM SPACE */
-            case 0x2004:    /* THREE-PER-EM SPACE */
-            case 0x2005:    /* FOUR-PER-EM SPACE */
-            case 0x2006:    /* SIX-PER-EM SPACE */
-            case 0x2007:    /* FIGURE SPACE */
-            case 0x2008:    /* PUNCTUATION SPACE */
-            case 0x2009:    /* THIN SPACE */
-            case 0x200A:    /* HAIR SPACE */
-            case 0x202f:    /* NARROW NO-BREAK SPACE */
-            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-            case 0x3000:    /* IDEOGRAPHIC SPACE */
-#endif  /* COMPILE_PCRE16 */
-#endif  /* Not EBCDIC */
+            HSPACE_MULTIBYTE_CASES:
+#endif
             break;
             }
           }
@@ -4732,17 +4574,12 @@
             }
           switch(*eptr++)
             {
-            default: break;
-            case CHAR_LF:
-            case CHAR_VT:
-            case CHAR_FF:
-            case CHAR_CR:
-            case CHAR_NEL:
+            VSPACE_BYTE_CASES:
 #ifdef COMPILE_PCRE16
-            case 0x2028:    /* LINE SEPARATOR */
-            case 0x2029:    /* PARAGRAPH SEPARATOR */
+            VSPACE_MULTIBYTE_CASES:
 #endif
             RRETURN(MATCH_NOMATCH);
+            default: break;
             }
           }
         break;
@@ -4758,14 +4595,9 @@
           switch(*eptr++)
             {
             default: RRETURN(MATCH_NOMATCH);
-            case CHAR_LF:
-            case CHAR_VT:
-            case CHAR_FF:
-            case CHAR_CR:
-            case CHAR_NEL:
+            VSPACE_BYTE_CASES:
 #ifdef COMPILE_PCRE16
-            case 0x2028:    /* LINE SEPARATOR */
-            case 0x2029:    /* PARAGRAPH SEPARATOR */
+            VSPACE_MULTIBYTE_CASES:
 #endif
             break;
             }
@@ -5066,19 +4898,19 @@
             RRETURN(MATCH_NOMATCH);
             }
           else
-            { 
-            int lgb, rgb; 
+            {
+            int lgb, rgb;
             GETCHARINCTEST(c, eptr);
-            lgb = UCD_GRAPHBREAK(c); 
+            lgb = UCD_GRAPHBREAK(c);
             while (eptr < md->end_subject)
               {
               int len = 1;
               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
-              rgb = UCD_GRAPHBREAK(c); 
+              rgb = UCD_GRAPHBREAK(c);
               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
-              lgb = rgb; 
+              lgb = rgb;
               eptr += len;
-              } 
+              }
             }
           CHECK_PARTIAL();
           }
@@ -5127,17 +4959,17 @@
               case CHAR_CR:
               if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
               break;
-               
+
               case CHAR_LF:
               break;


               case CHAR_VT:
               case CHAR_FF:
               case CHAR_NEL:
-#ifndef EBCDIC               
+#ifndef EBCDIC
               case 0x2028:
               case 0x2029:
-#endif  /* Not EBCDIC */ 
+#endif  /* Not EBCDIC */
               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
               break;
               }
@@ -5146,92 +4978,32 @@
             case OP_NOT_HSPACE:
             switch(c)
               {
+              HSPACE_CASES: RRETURN(MATCH_NOMATCH);
               default: break;
-              case CHAR_HT:
-              case CHAR_SPACE:
-#ifndef EBCDIC 
-              case 0xa0:      /* NBSP */
-              case 0x1680:    /* OGHAM SPACE MARK */
-              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-              case 0x2000:    /* EN QUAD */
-              case 0x2001:    /* EM QUAD */
-              case 0x2002:    /* EN SPACE */
-              case 0x2003:    /* EM SPACE */
-              case 0x2004:    /* THREE-PER-EM SPACE */
-              case 0x2005:    /* FOUR-PER-EM SPACE */
-              case 0x2006:    /* SIX-PER-EM SPACE */
-              case 0x2007:    /* FIGURE SPACE */
-              case 0x2008:    /* PUNCTUATION SPACE */
-              case 0x2009:    /* THIN SPACE */
-              case 0x200A:    /* HAIR SPACE */
-              case 0x202f:    /* NARROW NO-BREAK SPACE */
-              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-              case 0x3000:    /* IDEOGRAPHIC SPACE */
-#endif  /* Not EBCDIC */ 
-              RRETURN(MATCH_NOMATCH);
               }
             break;


             case OP_HSPACE:
             switch(c)
               {
+              HSPACE_CASES: break;
               default: RRETURN(MATCH_NOMATCH);
-              case CHAR_HT:
-              case CHAR_SPACE:
-#ifndef EBCDIC 
-              case 0xa0:      /* NBSP */
-              case 0x1680:    /* OGHAM SPACE MARK */
-              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-              case 0x2000:    /* EN QUAD */
-              case 0x2001:    /* EM QUAD */
-              case 0x2002:    /* EN SPACE */
-              case 0x2003:    /* EM SPACE */
-              case 0x2004:    /* THREE-PER-EM SPACE */
-              case 0x2005:    /* FOUR-PER-EM SPACE */
-              case 0x2006:    /* SIX-PER-EM SPACE */
-              case 0x2007:    /* FIGURE SPACE */
-              case 0x2008:    /* PUNCTUATION SPACE */
-              case 0x2009:    /* THIN SPACE */
-              case 0x200A:    /* HAIR SPACE */
-              case 0x202f:    /* NARROW NO-BREAK SPACE */
-              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-              case 0x3000:    /* IDEOGRAPHIC SPACE */
-#endif  /* Not EBCDIC */ 
-              break;
               }
             break;


             case OP_NOT_VSPACE:
             switch(c)
               {
+              VSPACE_CASES: RRETURN(MATCH_NOMATCH);
               default: break;
-              case CHAR_LF:
-              case CHAR_VT:
-              case CHAR_FF:
-              case CHAR_CR:
-              case CHAR_NEL:
-#ifndef EBCDIC               
-              case 0x2028:    /* LINE SEPARATOR */
-              case 0x2029:    /* PARAGRAPH SEPARATOR */
-#endif  /* Not EBCDIC */ 
-              RRETURN(MATCH_NOMATCH);
               }
             break;


             case OP_VSPACE:
             switch(c)
               {
+              VSPACE_CASES: break;
               default: RRETURN(MATCH_NOMATCH);
-              case CHAR_LF:
-              case CHAR_VT:
-              case CHAR_FF:
-              case CHAR_CR:
-              case CHAR_NEL:
-#ifndef EBCDIC               
-              case 0x2028:    /* LINE SEPARATOR */
-              case 0x2029:    /* PARAGRAPH SEPARATOR */
-#endif  /* Not EBCDIC */               
-              break;
               }
             break;


@@ -5332,29 +5104,10 @@
             switch(c)
               {
               default: break;
-              case CHAR_HT:
-              case CHAR_SPACE:
-#ifndef EBCDIC 
-              case 0xa0:      /* NBSP */
+              HSPACE_BYTE_CASES:
 #ifdef COMPILE_PCRE16
-              case 0x1680:    /* OGHAM SPACE MARK */
-              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-              case 0x2000:    /* EN QUAD */
-              case 0x2001:    /* EM QUAD */
-              case 0x2002:    /* EN SPACE */
-              case 0x2003:    /* EM SPACE */
-              case 0x2004:    /* THREE-PER-EM SPACE */
-              case 0x2005:    /* FOUR-PER-EM SPACE */
-              case 0x2006:    /* SIX-PER-EM SPACE */
-              case 0x2007:    /* FIGURE SPACE */
-              case 0x2008:    /* PUNCTUATION SPACE */
-              case 0x2009:    /* THIN SPACE */
-              case 0x200A:    /* HAIR SPACE */
-              case 0x202f:    /* NARROW NO-BREAK SPACE */
-              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-              case 0x3000:    /* IDEOGRAPHIC SPACE */
-#endif  /* COMPILE_PCRE16 */
-#endif  /* Not EBCDIC */
+              HSPACE_MULTIBYTE_CASES:
+#endif
               RRETURN(MATCH_NOMATCH);
               }
             break;
@@ -5363,29 +5116,10 @@
             switch(c)
               {
               default: RRETURN(MATCH_NOMATCH);
-              case CHAR_HT:
-              case CHAR_SPACE:
-#ifndef EBCDIC               
-              case 0xa0:      /* NBSP */
+              HSPACE_BYTE_CASES:
 #ifdef COMPILE_PCRE16
-              case 0x1680:    /* OGHAM SPACE MARK */
-              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-              case 0x2000:    /* EN QUAD */
-              case 0x2001:    /* EM QUAD */
-              case 0x2002:    /* EN SPACE */
-              case 0x2003:    /* EM SPACE */
-              case 0x2004:    /* THREE-PER-EM SPACE */
-              case 0x2005:    /* FOUR-PER-EM SPACE */
-              case 0x2006:    /* SIX-PER-EM SPACE */
-              case 0x2007:    /* FIGURE SPACE */
-              case 0x2008:    /* PUNCTUATION SPACE */
-              case 0x2009:    /* THIN SPACE */
-              case 0x200A:    /* HAIR SPACE */
-              case 0x202f:    /* NARROW NO-BREAK SPACE */
-              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-              case 0x3000:    /* IDEOGRAPHIC SPACE */
-#endif  /* COMPILE_PCRE16 */
-#endif  /* Not EBCDIC */
+              HSPACE_MULTIBYTE_CASES:
+#endif
               break;
               }
             break;
@@ -5394,14 +5128,9 @@
             switch(c)
               {
               default: break;
-              case CHAR_LF:
-              case CHAR_VT:
-              case CHAR_FF:
-              case CHAR_CR:
-              case CHAR_NEL:
+              VSPACE_BYTE_CASES:
 #ifdef COMPILE_PCRE16
-              case 0x2028:    /* LINE SEPARATOR */
-              case 0x2029:    /* PARAGRAPH SEPARATOR */
+              VSPACE_MULTIBYTE_CASES:
 #endif
               RRETURN(MATCH_NOMATCH);
               }
@@ -5411,14 +5140,9 @@
             switch(c)
               {
               default: RRETURN(MATCH_NOMATCH);
-              case CHAR_LF:
-              case CHAR_VT:
-              case CHAR_FF:
-              case CHAR_CR:
-              case CHAR_NEL:
+              VSPACE_BYTE_CASES:
 #ifdef COMPILE_PCRE16
-              case 0x2028:    /* LINE SEPARATOR */
-              case 0x2029:    /* PARAGRAPH SEPARATOR */
+              VSPACE_MULTIBYTE_CASES:
 #endif
               break;
               }
@@ -5651,19 +5375,19 @@
             break;
             }
           else
-            { 
-            int lgb, rgb; 
+            {
+            int lgb, rgb;
             GETCHARINCTEST(c, eptr);
-            lgb = UCD_GRAPHBREAK(c); 
+            lgb = UCD_GRAPHBREAK(c);
             while (eptr < md->end_subject)
               {
               int len = 1;
               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
-              rgb = UCD_GRAPHBREAK(c); 
+              rgb = UCD_GRAPHBREAK(c);
               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
-              lgb = rgb; 
+              lgb = rgb;
               eptr += len;
-              } 
+              }
             }
           CHECK_PARTIAL();
           }
@@ -5802,10 +5526,10 @@
               {
               if (c != CHAR_LF &&
                   (md->bsr_anycrlf ||
-                   (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL 
-#ifndef EBCDIC                    
+                   (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
+#ifndef EBCDIC
                     && c != 0x2028 && c != 0x2029
-#endif  /* Not EBCDIC */                      
+#endif  /* Not EBCDIC */
                     )))
                 break;
               eptr += len;
@@ -5827,30 +5551,8 @@
             GETCHARLEN(c, eptr, len);
             switch(c)
               {
+              HSPACE_CASES: gotspace = TRUE; break;
               default: gotspace = FALSE; break;
-              case CHAR_HT:
-              case CHAR_SPACE:
-#ifndef EBCDIC               
-              case 0xa0:      /* NBSP */
-              case 0x1680:    /* OGHAM SPACE MARK */
-              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
-              case 0x2000:    /* EN QUAD */
-              case 0x2001:    /* EM QUAD */
-              case 0x2002:    /* EN SPACE */
-              case 0x2003:    /* EM SPACE */
-              case 0x2004:    /* THREE-PER-EM SPACE */
-              case 0x2005:    /* FOUR-PER-EM SPACE */
-              case 0x2006:    /* SIX-PER-EM SPACE */
-              case 0x2007:    /* FIGURE SPACE */
-              case 0x2008:    /* PUNCTUATION SPACE */
-              case 0x2009:    /* THIN SPACE */
-              case 0x200A:    /* HAIR SPACE */
-              case 0x202f:    /* NARROW NO-BREAK SPACE */
-              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
-              case 0x3000:    /* IDEOGRAPHIC SPACE */
-#endif  /* Not EBCDIC */
-              gotspace = TRUE;
-              break;
               }
             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
             eptr += len;
@@ -5871,18 +5573,8 @@
             GETCHARLEN(c, eptr, len);
             switch(c)
               {
+              VSPACE_CASES: gotspace = TRUE; break;
               default: gotspace = FALSE; break;
-              case CHAR_LF:
-              case CHAR_VT:
-              case CHAR_FF:
-              case CHAR_CR:
-              case CHAR_NEL:
-#ifndef EBCDIC               
-              case 0x2028:    /* LINE SEPARATOR */
-              case 0x2029:    /* PARAGRAPH SEPARATOR */
-#endif  /* Not EBCDIC */ 
-              gotspace = TRUE;
-              break;
               }
             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
             eptr += len;
@@ -6074,18 +5766,17 @@
               SCHECK_PARTIAL();
               break;
               }
-            c = *eptr;
-            if (c == CHAR_HT || c == CHAR_SPACE
-#ifndef EBCDIC             
-              || c == 0xa0
+            switch(*eptr)
+              {
+              default: eptr++; break;
+              HSPACE_BYTE_CASES:
 #ifdef COMPILE_PCRE16
-              || c == 0x1680 || c == 0x180e || (c >= 0x2000 && c <= 0x200A)
-              || c == 0x202f || c == 0x205f || c == 0x3000
-#endif  /* COMPILE_PCRE16 */
-#endif  /* Not EBCDIC */
-              ) break;
-            eptr++;
+              HSPACE_MULTIBYTE_CASES:
+#endif
+              goto ENDLOOP00;
+              }
             }
+          ENDLOOP00:
           break;


           case OP_HSPACE:
@@ -6096,18 +5787,17 @@
               SCHECK_PARTIAL();
               break;
               }
-            c = *eptr;
-            if (c != CHAR_HT && c != CHAR_SPACE 
-#ifndef EBCDIC             
-              && c != 0xa0
+            switch(*eptr)
+              {
+              default: goto ENDLOOP01;
+              HSPACE_BYTE_CASES:
 #ifdef COMPILE_PCRE16
-              && c != 0x1680 && c != 0x180e && (c < 0x2000 || c > 0x200A)
-              && c != 0x202f && c != 0x205f && c != 0x3000
-#endif  /* COMPILE_PCRE16 */
-#endif  /* Not EBCDIC */
-              ) break;
-            eptr++;
+              HSPACE_MULTIBYTE_CASES:
+#endif
+              eptr++; break;
+              }
             }
+          ENDLOOP01:
           break;


           case OP_NOT_VSPACE:
@@ -6118,15 +5808,17 @@
               SCHECK_PARTIAL();
               break;
               }
-            c = *eptr;
-            if (c == CHAR_LF || c == CHAR_VT || c == CHAR_FF || 
-                c == CHAR_CR || c == CHAR_NEL
+            switch(*eptr)
+              {
+              default: eptr++; break;
+              VSPACE_BYTE_CASES:
 #ifdef COMPILE_PCRE16
-              || c == 0x2028 || c == 0x2029
+              VSPACE_MULTIBYTE_CASES:
 #endif
-              ) break;
-            eptr++;
+              goto ENDLOOP02;
+              }
             }
+          ENDLOOP02:
           break;


           case OP_VSPACE:
@@ -6137,15 +5829,17 @@
               SCHECK_PARTIAL();
               break;
               }
-            c = *eptr;
-            if (c != CHAR_LF && c != CHAR_VT && c != CHAR_FF && 
-                c != CHAR_CR && c != CHAR_NEL
+            switch(*eptr)
+              {
+              default: goto ENDLOOP03;
+              VSPACE_BYTE_CASES:
 #ifdef COMPILE_PCRE16
-              && c != 0x2028 && c != 0x2029
+              VSPACE_MULTIBYTE_CASES:
 #endif
-              ) break;
-            eptr++;
+              eptr++; break;
+              }
             }
+          ENDLOOP03:
           break;


           case OP_NOT_DIGIT:


Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h    2012-09-16 06:52:27 UTC (rev 1040)
+++ code/trunk/pcre_internal.h    2012-09-16 10:16:27 UTC (rev 1041)
@@ -529,11 +529,11 @@
 #define MAX_MARK ((1 << (sizeof(pcre_uchar)*8)) - 1)


/* When UTF encoding is being used, a character is no longer just a single
-character. The macros for character handling generate simple sequences when
-used in character-mode, and more complicated ones for UTF characters.
-GETCHARLENTEST and other macros are not used when UTF is not supported,
-so they are not defined. To make sure they can never even appear when
-UTF support is omitted, we don't even define them. */
+byte. The macros for character handling generate simple sequences when used in
+character-mode, and more complicated ones for UTF characters. GETCHARLENTEST
+and other macros are not used when UTF is not supported, so they are not
+defined. To make sure they can never even appear when UTF support is omitted,
+we don't even define them. */

#ifndef SUPPORT_UTF

@@ -832,6 +832,68 @@
#endif /* SUPPORT_UTF */


+/* Tests for Unicode horizontal and vertical whitespace characters must check a
+number of different values. Using a switch statement for this generates the
+fastest code (no loop, no memory access), and there are several places where
+this happens. In order to ensure that all the case lists remain in step, we use
+macros so that there is only one place where the lists are defined. 
+
+NOTE: These values are also used explicitly in pcre_compile.c when processing
+\h, \H, \v and \V in a character class, so any changes here should be
+duplicated there as well. They also appear in pcre_jit_compile.c. */
+
+#ifndef EBCDIC
+#define HSPACE_MULTIBYTE_CASES \
+      case 0x1680:    /* OGHAM SPACE MARK */ \
+      case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */ \
+      case 0x2000:    /* EN QUAD */ \
+      case 0x2001:    /* EM QUAD */ \
+      case 0x2002:    /* EN SPACE */ \
+      case 0x2003:    /* EM SPACE */ \
+      case 0x2004:    /* THREE-PER-EM SPACE */ \
+      case 0x2005:    /* FOUR-PER-EM SPACE */ \
+      case 0x2006:    /* SIX-PER-EM SPACE */ \
+      case 0x2007:    /* FIGURE SPACE */ \
+      case 0x2008:    /* PUNCTUATION SPACE */ \
+      case 0x2009:    /* THIN SPACE */ \
+      case 0x200A:    /* HAIR SPACE */ \
+      case 0x202f:    /* NARROW NO-BREAK SPACE */ \
+      case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */ \
+      case 0x3000     /* IDEOGRAPHIC SPACE */
+
+#define HSPACE_BYTE_CASES \
+      case CHAR_HT: \
+      case CHAR_SPACE: \
+      case 0xa0       /* NBSP */
+
+#define VSPACE_MULTIBYTE_CASES \
+      case 0x2028:    /* LINE SEPARATOR */ \
+      case 0x2029     /* PARAGRAPH SEPARATOR */
+
+#else   /* EBCDIC */
+#define HSPACE_MULTIBYTE_CASES
+#define VSPACE_MULTIBYTE_CASES
+
+#define HSPACE_BYTE_CASES \
+      case CHAR_HT: \
+      case CHAR_SPACE
+#endif  /* EBCDIC */
+
+#define VSPACE_BYTE_CASES \
+      case CHAR_LF: \
+      case CHAR_VT: \
+      case CHAR_FF: \
+      case CHAR_CR: \
+      case CHAR_NEL
+
+#define HSPACE_CASES \
+        HSPACE_BYTE_CASES: \
+        HSPACE_MULTIBYTE_CASES
+
+#define VSPACE_CASES \
+        VSPACE_BYTE_CASES: \
+        VSPACE_MULTIBYTE_CASES
+
 /* In case there is no definition of offsetof() provided - though any proper
 Standard C system should have one. */


@@ -946,15 +1008,15 @@

/* UTF-8 support is not enabled; use the platform-dependent character literals
so that PCRE works in both ASCII and EBCDIC environments, but only in non-UTF
-mode. Newline characters are problematic in EBCDIC. Though it has CR and LF
+mode. Newline characters are problematic in EBCDIC. Though it has CR and LF
characters, a common practice has been to use its NL (0x15) character as the
-line terminator in C-like processing environments. However, sometimes the LF
+line terminator in C-like processing environments. However, sometimes the LF
(0x25) character is used instead, according to this Unicode document:

http://unicode.org/standard/reports/tr13/tr13-5.html

-PCRE defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25
-instead. Whichever is *not* chosen is defined as NEL.
+PCRE defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25
+instead. Whichever is *not* chosen is defined as NEL.

In both ASCII and EBCDIC environments, CHAR_NL and CHAR_LF are synonyms for the
same code point. */
@@ -983,7 +1045,7 @@

#else /* Not EBCDIC */

-/* In ASCII/Unicode, linefeed is '\n' and we equate this to NL for
+/* In ASCII/Unicode, linefeed is '\n' and we equate this to NL for
compatibility. NEL is the Unicode newline character; make sure it is
a positive value. */

@@ -2083,7 +2145,7 @@
   int  external_flags;              /* External flag bits to be set */
   int  req_varyopt;                 /* "After variable item" flag for reqbyte */
   BOOL had_accept;                  /* (*ACCEPT) encountered */
-  BOOL had_pruneorskip;             /* (*PRUNE) or (*SKIP) encountered */ 
+  BOOL had_pruneorskip;             /* (*PRUNE) or (*SKIP) encountered */
   BOOL check_lookbehind;            /* Lookbehinds need later checking */
   int  nltype;                      /* Newline type */
   int  nllen;                       /* Newline string length */