Revision: 284
http://www.exim.org/viewvc/pcre2?view=rev&revision=284
Author: ph10
Date: 2015-06-12 17:25:23 +0100 (Fri, 12 Jun 2015)
Log Message:
-----------
Fix \a and \e in pcre2test, and \a in pcre2_compile, on EBCDIC platforms.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_compile.c
code/trunk/src/pcre2_internal.h
code/trunk/src/pcre2test.c
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-06-09 17:41:45 UTC (rev 283)
+++ code/trunk/ChangeLog 2015-06-12 16:25:23 UTC (rev 284)
@@ -155,7 +155,13 @@
an empty string was repeated, it was not identified as matching an empty string
itself. For example: /^(?:(?(1)x|)+)+$()/.
+40. In an EBCDIC environment, pcretest was mishandling the escape sequences
+\a and \e in test subject lines.
+41. In an EBCDIC environment, \a in a pattern was converted to the ASCII
+instead of the EBCDIC value.
+
+
Version 10.10 06-March-2015
---------------------------
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2015-06-09 17:41:45 UTC (rev 283)
+++ code/trunk/src/pcre2_compile.c 2015-06-12 16:25:23 UTC (rev 284)
@@ -296,7 +296,7 @@
-ESC_Z, CHAR_LEFT_SQUARE_BRACKET,
CHAR_BACKSLASH, CHAR_RIGHT_SQUARE_BRACKET,
CHAR_CIRCUMFLEX_ACCENT, CHAR_UNDERSCORE,
- CHAR_GRAVE_ACCENT, 7,
+ CHAR_GRAVE_ACCENT, ESC_a,
-ESC_b, 0,
-ESC_d, ESC_e,
ESC_f, 0,
@@ -328,7 +328,7 @@
#endif
static const short int escapes[] = {
-/* 80 */ 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,
+/* 80 */ ESC_a, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,
/* 88 */-ESC_h, 0, 0, '{', 0, 0, 0, 0,
/* 90 */ 0, 0, -ESC_k, 0, 0, ESC_n, 0, -ESC_p,
/* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0,
Modified: code/trunk/src/pcre2_internal.h
===================================================================
--- code/trunk/src/pcre2_internal.h 2015-06-09 17:41:45 UTC (rev 283)
+++ code/trunk/src/pcre2_internal.h 2015-06-12 16:25:23 UTC (rev 284)
@@ -1192,31 +1192,6 @@
/* -------------------- Definitions for compiled patterns -------------------*/
-/* Escape items that are just an encoding of a particular data value. */
-
-#ifndef ESC_e
-#define ESC_e CHAR_ESC
-#endif
-
-#ifndef ESC_f
-#define ESC_f CHAR_FF
-#endif
-
-#ifndef ESC_n
-#define ESC_n CHAR_LF
-#endif
-
-#ifndef ESC_r
-#define ESC_r CHAR_CR
-#endif
-
-/* We can't officially use ESC_t because it is a POSIX reserved identifier
-(presumably because of all the others like size_t). */
-
-#ifndef ESC_tee
-#define ESC_tee CHAR_HT
-#endif
-
/* Codes for different types of Unicode property */
#define PT_ANY 0 /* Any property - matches all chars */
@@ -1255,14 +1230,47 @@
#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
+/* Escape items that are just an encoding of a particular data value. These
+appear in the escapes[] table in pcre2_compile.c as positive numbers. */
+
+#ifndef ESC_a
+#define ESC_a CHAR_BEL
+#endif
+
+#ifndef ESC_e
+#define ESC_e CHAR_ESC
+#endif
+
+#ifndef ESC_f
+#define ESC_f CHAR_FF
+#endif
+
+#ifndef ESC_n
+#define ESC_n CHAR_LF
+#endif
+
+#ifndef ESC_r
+#define ESC_r CHAR_CR
+#endif
+
+/* We can't officially use ESC_t because it is a POSIX reserved identifier
+(presumably because of all the others like size_t). */
+
+#ifndef ESC_tee
+#define ESC_tee CHAR_HT
+#endif
+
/* These are escaped items that aren't just an encoding of a particular data
value such as \n. They must have non-zero values, as check_escape() returns 0
-for a data character. Also, they must appear in the same order as in the
-opcode definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
-corresponds to "." in DOTALL mode rather than an escape sequence. It is also
-used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In
-non-DOTALL mode, "." behaves like \N.
+for a data character. In the escapes[] table in pcre2_compile.c their values
+are negated in order to distinguish them from data values.
+They must appear here in the same order as in the opcode definitions below, up
+to ESC_z. There's a dummy for OP_ALLANY because it corresponds to "." in DOTALL
+mode rather than an escape sequence. It is also used for [^] in JavaScript
+compatibility mode, and for \C in non-utf mode. In non-DOTALL mode, "." behaves
+like \N.
+
The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.
when PCRE_UCP is set and replacement of \d etc by \p sequences is required.
They must be contiguous, and remain in order so that the replacements can be
Modified: code/trunk/src/pcre2test.c
===================================================================
--- code/trunk/src/pcre2test.c 2015-06-09 17:41:45 UTC (rev 283)
+++ code/trunk/src/pcre2test.c 2015-06-12 16:25:23 UTC (rev 284)
@@ -5181,9 +5181,9 @@
else switch ((c = *p++))
{
case '\\': break;
- case 'a': c = 7; break;
+ case 'a': c = CHAR_BEL; break;
case 'b': c = '\b'; break;
- case 'e': c = 27; break;
+ case 'e': c = CHAR_ESC; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;