[Pcre-svn] [1569] code/trunk: Recognize EBCDIC non-breaking …

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [1569] code/trunk: Recognize EBCDIC non-breaking space and give error for \p in a class when no
Revision: 1569
          http://vcs.pcre.org/viewvc?view=rev&revision=1569
Author:   ph10
Date:     2015-06-19 17:10:07 +0100 (Fri, 19 Jun 2015)
Log Message:
-----------
Recognize EBCDIC non-breaking space and give error for \p in a class when no 
UCP support.


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_compile.c
    code/trunk/pcre_internal.h
    code/trunk/testdata/testinputEBC
    code/trunk/testdata/testoutputEBC


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2015-06-14 15:53:41 UTC (rev 1568)
+++ code/trunk/ChangeLog    2015-06-19 16:10:07 UTC (rev 1569)
@@ -60,6 +60,14 @@


 14. The handling of \c in an EBCDIC environment has been revised so that it is
     now compatible with the specification in Perl's perlebcdic page.
+    
+15. The EBCDIC character 0x41 is a non-breaking space, equivalent to 0xa0 in
+    ASCII/Unicode. This has now been added to the list of characters that are
+    recognized as white space in EBCDIC.  
+    
+16. When PCRE was compiled without UCP support, the use of \p and \P gave an 
+    error (correctly) when used outside a class, but did not give an error 
+    within a class.



Version 8.37 28-April-2015

Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c    2015-06-14 15:53:41 UTC (rev 1568)
+++ code/trunk/pcre_compile.c    2015-06-19 16:10:07 UTC (rev 1569)
@@ -5218,9 +5218,9 @@
               cd, PRIV(vspace_list));
             continue;


-#ifdef SUPPORT_UCP
             case ESC_p:
             case ESC_P:
+#ifdef SUPPORT_UCP
               {
               BOOL negated;
               unsigned int ptype = 0, pdata = 0;
@@ -5234,6 +5234,9 @@
               class_has_8bitchar--;                /* Undo! */
               continue;
               }
+#else
+            *errorcodeptr = ERR45;
+            goto FAILED;
 #endif
             /* Unrecognized escapes are faulted if PCRE is running in its
             strict mode. By default, for compatibility with Perl, they are


Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h    2015-06-14 15:53:41 UTC (rev 1568)
+++ code/trunk/pcre_internal.h    2015-06-19 16:10:07 UTC (rev 1569)
@@ -984,7 +984,7 @@
 #ifndef EBCDIC


 #define HSPACE_LIST \
-  CHAR_HT, CHAR_SPACE, 0xa0, \
+  CHAR_HT, CHAR_SPACE, CHAR_NBSP, \
   0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \
   0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202f, 0x205f, 0x3000, \
   NOTACHAR
@@ -1010,7 +1010,7 @@
 #define HSPACE_BYTE_CASES \
   case CHAR_HT: \
   case CHAR_SPACE: \
-  case 0xa0     /* NBSP */
+  case CHAR_NBSP


#define HSPACE_CASES \
HSPACE_BYTE_CASES: \
@@ -1037,11 +1037,12 @@
/* ------ EBCDIC environments ------ */

#else
-#define HSPACE_LIST CHAR_HT, CHAR_SPACE
+#define HSPACE_LIST CHAR_HT, CHAR_SPACE, CHAR_NBSP

#define HSPACE_BYTE_CASES \
case CHAR_HT: \
- case CHAR_SPACE
+ case CHAR_SPACE: \
+ case CHAR_NBSP

#define HSPACE_CASES HSPACE_BYTE_CASES

@@ -1215,6 +1216,7 @@

 #define CHAR_ESC                    '\047'
 #define CHAR_DEL                    '\007'
+#define CHAR_NBSP                   '\101'
 #define STR_ESC                     "\047"
 #define STR_DEL                     "\007"


@@ -1229,6 +1231,7 @@
 #define CHAR_NEL                    ((unsigned char)'\x85')
 #define CHAR_ESC                    '\033'
 #define CHAR_DEL                    '\177'
+#define CHAR_NBSP                   ((unsigned char)'\xa0')


 #define STR_LF                      "\n"
 #define STR_NL                      STR_LF
@@ -1606,6 +1609,7 @@
 #define CHAR_VERTICAL_LINE          '\174'
 #define CHAR_RIGHT_CURLY_BRACKET    '\175'
 #define CHAR_TILDE                  '\176'
+#define CHAR_NBSP                   ((unsigned char)'\xa0')


 #define STR_HT                      "\011"
 #define STR_VT                      "\013"


Modified: code/trunk/testdata/testinputEBC
===================================================================
--- code/trunk/testdata/testinputEBC    2015-06-14 15:53:41 UTC (rev 1568)
+++ code/trunk/testdata/testinputEBC    2015-06-19 16:10:07 UTC (rev 1569)
@@ -29,13 +29,16 @@


 /^A\\x88/
     A B
+    A\x41B


/-- Test \H --/

 /^A\\xC8/
     AB
+    A\x42B
     ** Fail
     A B
+    A\x41B


/-- Test \R --/


Modified: code/trunk/testdata/testoutputEBC
===================================================================
--- code/trunk/testdata/testoutputEBC    2015-06-14 15:53:41 UTC (rev 1568)
+++ code/trunk/testdata/testoutputEBC    2015-06-19 16:10:07 UTC (rev 1569)
@@ -41,6 +41,8 @@
 /^A\\x88/
     A B
  0: A\x20
+    A\x41B
+ 0: AA


/-- Test \H --/

@@ -47,10 +49,14 @@
 /^A\\xC8/
     AB
  0: AB
+    A\x42B
+ 0: AB
     ** Fail
 No match
     A B
 No match
+    A\x41B
+No match


/-- Test \R --/