[Pcre-svn] [284] code/trunk: Fix \a and \e in pcre2test, an…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [284] code/trunk: Fix \a and \e in pcre2test, and \a in pcre2_compile, on EBCDIC platforms.
Revision: 284
          http://www.exim.org/viewvc/pcre2?view=rev&revision=284
Author:   ph10
Date:     2015-06-12 17:25:23 +0100 (Fri, 12 Jun 2015)
Log Message:
-----------
Fix \a and \e in pcre2test, and \a in pcre2_compile, on EBCDIC platforms.


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/src/pcre2_compile.c
    code/trunk/src/pcre2_internal.h
    code/trunk/src/pcre2test.c


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2015-06-09 17:41:45 UTC (rev 283)
+++ code/trunk/ChangeLog    2015-06-12 16:25:23 UTC (rev 284)
@@ -155,7 +155,13 @@
 an empty string was repeated, it was not identified as matching an empty string 
 itself. For example: /^(?:(?(1)x|)+)+$()/.


+40. In an EBCDIC environment, pcretest was mishandling the escape sequences
+\a and \e in test subject lines.

+41. In an EBCDIC environment, \a in a pattern was converted to the ASCII
+instead of the EBCDIC value.
+
+
Version 10.10 06-March-2015
---------------------------


Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c    2015-06-09 17:41:45 UTC (rev 283)
+++ code/trunk/src/pcre2_compile.c    2015-06-12 16:25:23 UTC (rev 284)
@@ -296,7 +296,7 @@
      -ESC_Z,                  CHAR_LEFT_SQUARE_BRACKET,
      CHAR_BACKSLASH,          CHAR_RIGHT_SQUARE_BRACKET,
      CHAR_CIRCUMFLEX_ACCENT,  CHAR_UNDERSCORE,
-     CHAR_GRAVE_ACCENT,       7,
+     CHAR_GRAVE_ACCENT,       ESC_a,
      -ESC_b,                  0,
      -ESC_d,                  ESC_e,
      ESC_f,                   0,
@@ -328,7 +328,7 @@
 #endif


 static const short int escapes[] = {
-/*  80 */            7, -ESC_b,       0, -ESC_d, ESC_e,  ESC_f,      0,
+/*  80 */        ESC_a, -ESC_b,       0, -ESC_d, ESC_e,  ESC_f,      0,
 /*  88 */-ESC_h,     0,      0,     '{',      0,     0,      0,      0,
 /*  90 */     0,     0, -ESC_k,       0,      0, ESC_n,      0, -ESC_p,
 /*  98 */     0, ESC_r,      0,     '}',      0,     0,      0,      0,


Modified: code/trunk/src/pcre2_internal.h
===================================================================
--- code/trunk/src/pcre2_internal.h    2015-06-09 17:41:45 UTC (rev 283)
+++ code/trunk/src/pcre2_internal.h    2015-06-12 16:25:23 UTC (rev 284)
@@ -1192,31 +1192,6 @@


/* -------------------- Definitions for compiled patterns -------------------*/

-/* Escape items that are just an encoding of a particular data value. */
-
-#ifndef ESC_e
-#define ESC_e CHAR_ESC
-#endif
-
-#ifndef ESC_f
-#define ESC_f CHAR_FF
-#endif
-
-#ifndef ESC_n
-#define ESC_n CHAR_LF
-#endif
-
-#ifndef ESC_r
-#define ESC_r CHAR_CR
-#endif
-
-/* We can't officially use ESC_t because it is a POSIX reserved identifier
-(presumably because of all the others like size_t). */
-
-#ifndef ESC_tee
-#define ESC_tee CHAR_HT
-#endif
-
/* Codes for different types of Unicode property */

 #define PT_ANY        0    /* Any property - matches all chars */
@@ -1255,14 +1230,47 @@
 #define XCL_PROP      3    /* Unicode property (2-byte property code follows) */
 #define XCL_NOTPROP   4    /* Unicode inverted property (ditto) */


+/* Escape items that are just an encoding of a particular data value. These
+appear in the escapes[] table in pcre2_compile.c as positive numbers. */
+
+#ifndef ESC_a
+#define ESC_a CHAR_BEL
+#endif
+
+#ifndef ESC_e
+#define ESC_e CHAR_ESC
+#endif
+
+#ifndef ESC_f
+#define ESC_f CHAR_FF
+#endif
+
+#ifndef ESC_n
+#define ESC_n CHAR_LF
+#endif
+
+#ifndef ESC_r
+#define ESC_r CHAR_CR
+#endif
+
+/* We can't officially use ESC_t because it is a POSIX reserved identifier
+(presumably because of all the others like size_t). */
+
+#ifndef ESC_tee
+#define ESC_tee CHAR_HT
+#endif
+
/* These are escaped items that aren't just an encoding of a particular data
value such as \n. They must have non-zero values, as check_escape() returns 0
-for a data character. Also, they must appear in the same order as in the
-opcode definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
-corresponds to "." in DOTALL mode rather than an escape sequence. It is also
-used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In
-non-DOTALL mode, "." behaves like \N.
+for a data character. In the escapes[] table in pcre2_compile.c their values
+are negated in order to distinguish them from data values.

+They must appear here in the same order as in the opcode definitions below, up
+to ESC_z. There's a dummy for OP_ALLANY because it corresponds to "." in DOTALL
+mode rather than an escape sequence. It is also used for [^] in JavaScript
+compatibility mode, and for \C in non-utf mode. In non-DOTALL mode, "." behaves
+like \N.
+
The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.
when PCRE_UCP is set and replacement of \d etc by \p sequences is required.
They must be contiguous, and remain in order so that the replacements can be

Modified: code/trunk/src/pcre2test.c
===================================================================
--- code/trunk/src/pcre2test.c    2015-06-09 17:41:45 UTC (rev 283)
+++ code/trunk/src/pcre2test.c    2015-06-12 16:25:23 UTC (rev 284)
@@ -5181,9 +5181,9 @@
   else switch ((c = *p++))
     {
     case '\\': break;
-    case 'a': c =    7; break;
+    case 'a': c = CHAR_BEL; break;
     case 'b': c = '\b'; break;
-    case 'e': c =   27; break;
+    case 'e': c = CHAR_ESC; break;
     case 'f': c = '\f'; break;
     case 'n': c = '\n'; break;
     case 'r': c = '\r'; break;