Revision: 1569
http://vcs.pcre.org/viewvc?view=rev&revision=1569
Author: ph10
Date: 2015-06-19 17:10:07 +0100 (Fri, 19 Jun 2015)
Log Message:
-----------
Recognize EBCDIC non-breaking space and give error for \p in a class when no
UCP support.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_compile.c
code/trunk/pcre_internal.h
code/trunk/testdata/testinputEBC
code/trunk/testdata/testoutputEBC
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-06-14 15:53:41 UTC (rev 1568)
+++ code/trunk/ChangeLog 2015-06-19 16:10:07 UTC (rev 1569)
@@ -60,6 +60,14 @@
14. The handling of \c in an EBCDIC environment has been revised so that it is
now compatible with the specification in Perl's perlebcdic page.
+
+15. The EBCDIC character 0x41 is a non-breaking space, equivalent to 0xa0 in
+ ASCII/Unicode. This has now been added to the list of characters that are
+ recognized as white space in EBCDIC.
+
+16. When PCRE was compiled without UCP support, the use of \p and \P gave an
+ error (correctly) when used outside a class, but did not give an error
+ within a class.
Version 8.37 28-April-2015
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2015-06-14 15:53:41 UTC (rev 1568)
+++ code/trunk/pcre_compile.c 2015-06-19 16:10:07 UTC (rev 1569)
@@ -5218,9 +5218,9 @@
cd, PRIV(vspace_list));
continue;
-#ifdef SUPPORT_UCP
case ESC_p:
case ESC_P:
+#ifdef SUPPORT_UCP
{
BOOL negated;
unsigned int ptype = 0, pdata = 0;
@@ -5234,6 +5234,9 @@
class_has_8bitchar--; /* Undo! */
continue;
}
+#else
+ *errorcodeptr = ERR45;
+ goto FAILED;
#endif
/* Unrecognized escapes are faulted if PCRE is running in its
strict mode. By default, for compatibility with Perl, they are
Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h 2015-06-14 15:53:41 UTC (rev 1568)
+++ code/trunk/pcre_internal.h 2015-06-19 16:10:07 UTC (rev 1569)
@@ -984,7 +984,7 @@
#ifndef EBCDIC
#define HSPACE_LIST \
- CHAR_HT, CHAR_SPACE, 0xa0, \
+ CHAR_HT, CHAR_SPACE, CHAR_NBSP, \
0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \
0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202f, 0x205f, 0x3000, \
NOTACHAR
@@ -1010,7 +1010,7 @@
#define HSPACE_BYTE_CASES \
case CHAR_HT: \
case CHAR_SPACE: \
- case 0xa0 /* NBSP */
+ case CHAR_NBSP
#define HSPACE_CASES \
HSPACE_BYTE_CASES: \
@@ -1037,11 +1037,12 @@
/* ------ EBCDIC environments ------ */
#else
-#define HSPACE_LIST CHAR_HT, CHAR_SPACE
+#define HSPACE_LIST CHAR_HT, CHAR_SPACE, CHAR_NBSP
#define HSPACE_BYTE_CASES \
case CHAR_HT: \
- case CHAR_SPACE
+ case CHAR_SPACE: \
+ case CHAR_NBSP
#define HSPACE_CASES HSPACE_BYTE_CASES
@@ -1215,6 +1216,7 @@
#define CHAR_ESC '\047'
#define CHAR_DEL '\007'
+#define CHAR_NBSP '\101'
#define STR_ESC "\047"
#define STR_DEL "\007"
@@ -1229,6 +1231,7 @@
#define CHAR_NEL ((unsigned char)'\x85')
#define CHAR_ESC '\033'
#define CHAR_DEL '\177'
+#define CHAR_NBSP ((unsigned char)'\xa0')
#define STR_LF "\n"
#define STR_NL STR_LF
@@ -1606,6 +1609,7 @@
#define CHAR_VERTICAL_LINE '\174'
#define CHAR_RIGHT_CURLY_BRACKET '\175'
#define CHAR_TILDE '\176'
+#define CHAR_NBSP ((unsigned char)'\xa0')
#define STR_HT "\011"
#define STR_VT "\013"
Modified: code/trunk/testdata/testinputEBC
===================================================================
--- code/trunk/testdata/testinputEBC 2015-06-14 15:53:41 UTC (rev 1568)
+++ code/trunk/testdata/testinputEBC 2015-06-19 16:10:07 UTC (rev 1569)
@@ -29,13 +29,16 @@
/^A\\x88/
A B
+ A\x41B
/-- Test \H --/
/^A\\xC8/
AB
+ A\x42B
** Fail
A B
+ A\x41B
/-- Test \R --/
Modified: code/trunk/testdata/testoutputEBC
===================================================================
--- code/trunk/testdata/testoutputEBC 2015-06-14 15:53:41 UTC (rev 1568)
+++ code/trunk/testdata/testoutputEBC 2015-06-19 16:10:07 UTC (rev 1569)
@@ -41,6 +41,8 @@
/^A\\x88/
A B
0: A\x20
+ A\x41B
+ 0: AA
/-- Test \H --/
@@ -47,10 +49,14 @@
/^A\\xC8/
AB
0: AB
+ A\x42B
+ 0: AB
** Fail
No match
A B
No match
+ A\x41B
+No match
/-- Test \R --/