Revision: 125
http://www.exim.org/viewvc/pcre2?view=rev&revision=125
Author: ph10
Date: 2014-10-26 18:00:19 +0000 (Sun, 26 Oct 2014)
Log Message:
-----------
Convert the special "EBCDIC on an ASCII system" test.
Modified Paths:
--------------
code/trunk/src/pcre2_compile.c
Added Paths:
-----------
code/trunk/testdata/testinputEBC
code/trunk/testdata/testoutputEBC
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2014-10-25 15:51:01 UTC (rev 124)
+++ code/trunk/src/pcre2_compile.c 2014-10-26 18:00:19 UTC (rev 125)
@@ -304,10 +304,17 @@
#else
/* This is the "abnormal" table for EBCDIC systems without UTF-8 support.
-It runs from 'a' to '9'. */
+It runs from 'a' to '9'. For some minimal testing of EBCDIC features, the code
+is sometimes compiled on an ASCII system. In this case, we must not use CHAR_a
+because it is defined as 'a', which of course picks up the ASCII value. */
+#if 'a' == 0x81 /* Check for a real EBCDIC environment */
#define ESCAPES_FIRST CHAR_a
#define ESCAPES_LAST CHAR_9
+#else /* Testing in an ASCII environment */
+#define ESCAPES_FIRST ((unsigned char)'\x81') /* EBCDIC 'a' */
+#define ESCAPES_LAST ((unsigned char)'\xf9') /* EBCDIC '9' */
+#endif
static const short int escapes[] = {
/* 80 */ 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,
@@ -328,7 +335,7 @@
/* F8 */ 0, 0
};
-#endif
+#endif /* EBCDIC */
/* Table of special "verbs" like (*PRUNE). This is a short table, so it is
Added: code/trunk/testdata/testinputEBC
===================================================================
--- code/trunk/testdata/testinputEBC (rev 0)
+++ code/trunk/testdata/testinputEBC 2014-10-26 18:00:19 UTC (rev 125)
@@ -0,0 +1,121 @@
+# This is a specialized test for checking, when PCRE2 is compiled with the
+# EBCDIC option but in an ASCII environment, that newline and white space
+# functionality is working. It catches cases where explicit values such as 0x0a
+# have been used instead of names like CHAR_LF. Needless to say, it is not a
+# genuine EBCDIC test! In patterns, alphabetic characters that follow a
+# backslash must be in EBCDIC code. In data, NL, NEL, LF, ESC, and DEL must be
+# in EBCDIC, but can of course be specified as escapes.
+
+# Test default newline and variations
+
+/^A/m
+ ABC
+ 12\x15ABC
+
+/^A/m,newline=any
+ 12\x15ABC
+ 12\x0dABC
+ 12\x0d\x15ABC
+ 12\x25ABC
+
+/^A/m,newline=anycrlf
+ 12\x15ABC
+ 12\x0dABC
+ 12\x0d\x15ABC
+ ** Fail
+ 12\x25ABC
+
+# Test \h
+
+/^A\\x88/
+ A B
+
+# Test \H
+
+/^A\\xC8/
+ AB
+ ** Fail
+ A B
+
+# Test \R
+
+/^A\\xD9/
+ A\x15B
+ A\x0dB
+ A\x25B
+ A\x0bB
+ A\x0cB
+ ** Fail
+ A B
+
+# Test \v
+
+/^A\\xA5/
+ A\x15B
+ A\x0dB
+ A\x25B
+ A\x0bB
+ A\x0cB
+ ** Fail
+ A B
+
+# Test \V
+
+/^A\\xE5/
+ A B
+ ** Fail
+ A\x15B
+ A\x0dB
+ A\x25B
+ A\x0bB
+ A\x0cB
+
+# For repeated items, use an atomic group so that the output is the same
+# for DFA matching (otherwise it may show multiple matches).
+
+# Test \h+
+
+/^A(?>\\x88+)/
+ A B
+
+# Test \H+
+
+/^A(?>\\xC8+)/
+ AB
+ ** Fail
+ A B
+
+# Test \R+
+
+/^A(?>\\xD9+)/
+ A\x15B
+ A\x0dB
+ A\x25B
+ A\x0bB
+ A\x0cB
+ ** Fail
+ A B
+
+# Test \v+
+
+/^A(?>\\xA5+)/
+ A\x15B
+ A\x0dB
+ A\x25B
+ A\x0bB
+ A\x0cB
+ ** Fail
+ A B
+
+# Test \V+
+
+/^A(?>\\xE5+)/
+ A B
+ ** Fail
+ A\x15B
+ A\x0dB
+ A\x25B
+ A\x0bB
+ A\x0cB
+
+# End
Added: code/trunk/testdata/testoutputEBC
===================================================================
--- code/trunk/testdata/testoutputEBC (rev 0)
+++ code/trunk/testdata/testoutputEBC 2014-10-26 18:00:19 UTC (rev 125)
@@ -0,0 +1,182 @@
+# This is a specialized test for checking, when PCRE2 is compiled with the
+# EBCDIC option but in an ASCII environment, that newline and white space
+# functionality is working. It catches cases where explicit values such as 0x0a
+# have been used instead of names like CHAR_LF. Needless to say, it is not a
+# genuine EBCDIC test! In patterns, alphabetic characters that follow a
+# backslash must be in EBCDIC code. In data, NL, NEL, LF, ESC, and DEL must be
+# in EBCDIC, but can of course be specified as escapes.
+
+# Test default newline and variations
+
+/^A/m
+ ABC
+ 0: A
+ 12\x15ABC
+ 0: A
+
+/^A/m,newline=any
+ 12\x15ABC
+ 0: A
+ 12\x0dABC
+ 0: A
+ 12\x0d\x15ABC
+ 0: A
+ 12\x25ABC
+ 0: A
+
+/^A/m,newline=anycrlf
+ 12\x15ABC
+ 0: A
+ 12\x0dABC
+ 0: A
+ 12\x0d\x15ABC
+ 0: A
+ ** Fail
+No match
+ 12\x25ABC
+No match
+
+# Test \h
+
+/^A\\x88/
+ A B
+ 0: A\x20
+
+# Test \H
+
+/^A\\xC8/
+ AB
+ 0: AB
+ ** Fail
+No match
+ A B
+No match
+
+# Test \R
+
+/^A\\xD9/
+ A\x15B
+ 0: A\x15
+ A\x0dB
+ 0: A\x0d
+ A\x25B
+ 0: A\x25
+ A\x0bB
+ 0: A\x0b
+ A\x0cB
+ 0: A\x0c
+ ** Fail
+No match
+ A B
+No match
+
+# Test \v
+
+/^A\\xA5/
+ A\x15B
+ 0: A\x15
+ A\x0dB
+ 0: A\x0d
+ A\x25B
+ 0: A\x25
+ A\x0bB
+ 0: A\x0b
+ A\x0cB
+ 0: A\x0c
+ ** Fail
+No match
+ A B
+No match
+
+# Test \V
+
+/^A\\xE5/
+ A B
+ 0: A\x20
+ ** Fail
+No match
+ A\x15B
+No match
+ A\x0dB
+No match
+ A\x25B
+No match
+ A\x0bB
+No match
+ A\x0cB
+No match
+
+# For repeated items, use an atomic group so that the output is the same
+# for DFA matching (otherwise it may show multiple matches).
+
+# Test \h+
+
+/^A(?>\\x88+)/
+ A B
+ 0: A\x20
+
+# Test \H+
+
+/^A(?>\\xC8+)/
+ AB
+ 0: AB
+ ** Fail
+No match
+ A B
+No match
+
+# Test \R+
+
+/^A(?>\\xD9+)/
+ A\x15B
+ 0: A\x15
+ A\x0dB
+ 0: A\x0d
+ A\x25B
+ 0: A\x25
+ A\x0bB
+ 0: A\x0b
+ A\x0cB
+ 0: A\x0c
+ ** Fail
+No match
+ A B
+No match
+
+# Test \v+
+
+/^A(?>\\xA5+)/
+ A\x15B
+ 0: A\x15
+ A\x0dB
+ 0: A\x0d
+ A\x25B
+ 0: A\x25
+ A\x0bB
+ 0: A\x0b
+ A\x0cB
+ 0: A\x0c
+ ** Fail
+No match
+ A B
+No match
+
+# Test \V+
+
+/^A(?>\\xE5+)/
+ A B
+ 0: A\x20B
+ ** Fail
+No match
+ A\x15B
+No match
+ A\x0dB
+No match
+ A\x25B
+No match
+ A\x0bB
+No match
+ A\x0cB
+No match
+
+# End