Revision: 274
http://www.exim.org/viewvc/pcre2?view=rev&revision=274
Author: ph10
Date: 2015-05-20 18:44:17 +0100 (Wed, 20 May 2015)
Log Message:
-----------
Make pcre2test give an error for \P, \p, and \X after #forbid_utf.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/doc/pcre2test.1
code/trunk/src/pcre2_compile.c
code/trunk/src/pcre2_internal.h
code/trunk/src/pcre2test.c
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-05-19 16:56:39 UTC (rev 273)
+++ code/trunk/ChangeLog 2015-05-20 17:44:17 UTC (rev 274)
@@ -134,7 +134,10 @@
34. Give an error for an empty subpattern name such as (?'').
+35. Make pcre2test give an error if a pattern that follows #forbud_utf contains
+\P, \p, or \X.
+
Version 10.10 06-March-2015
---------------------------
Modified: code/trunk/doc/pcre2test.1
===================================================================
--- code/trunk/doc/pcre2test.1 2015-05-19 16:56:39 UTC (rev 273)
+++ code/trunk/doc/pcre2test.1 2015-05-20 17:44:17 UTC (rev 274)
@@ -1,4 +1,4 @@
-.TH PCRE2TEST 1 "22 April 2015" "PCRE 10.20"
+.TH PCRE2TEST 1 "20 May 2015" "PCRE 10.20"
.SH NAME
pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS
@@ -237,13 +237,19 @@
#forbid_utf
.sp
Subsequent patterns automatically have the PCRE2_NEVER_UTF and PCRE2_NEVER_UCP
-options set, which locks out the use of UTF and Unicode property features. This
-is a trigger guard that is used in test files to ensure that UTF or Unicode
-property tests are not accidentally added to files that are used when Unicode
-support is not included in the library. This effect can also be obtained by the
-use of \fB#pattern\fP; the difference is that \fB#forbid_utf\fP cannot be
-unset, and the automatic options are not displayed in pattern information, to
-avoid cluttering up test output.
+options set, which locks out the use of the PCRE2_UTF and PCRE2_UCP options and
+the use of (*UTF) and (*UCP) at the start of patterns. This command also forces
+an error if a subsequent pattern contains any occurrences of \eP, \ep, or \eX,
+which are still supported when PCRE2_UTF is not set, but which require Unicode
+property support to be included in the library.
+.P
+This is a trigger guard that is used in test files to ensure that UTF or
+Unicode property tests are not accidentally added to files that are used when
+Unicode support is not included in the library. Setting PCRE2_NEVER_UTF and
+PCRE2_NEVER_UCP as a default can also be obtained by the use of \fB#pattern\fP;
+the difference is that \fB#forbid_utf\fP cannot be unset, and the automatic
+options are not displayed in pattern information, to avoid cluttering up test
+output.
.sp
#load <filename>
.sp
@@ -1445,6 +1451,6 @@
.rs
.sp
.nf
-Last updated: 22 April 2015
+Last updated: 20 May 2015
Copyright (c) 1997-2015 University of Cambridge.
.fi
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2015-05-19 16:56:39 UTC (rev 273)
+++ code/trunk/src/pcre2_compile.c 2015-05-20 17:44:17 UTC (rev 274)
@@ -1729,8 +1729,12 @@
else if ((i = escapes[c - ESCAPES_FIRST]) != 0)
{
- if (i > 0) c = (uint32_t)i; /* Positive is a data character */
- else escape = -i; /* Else return a special escape */
+ if (i > 0) c = (uint32_t)i; else /* Positive is a data character */
+ {
+ escape = -i; /* Else return a special escape */
+ if (escape == ESC_P || escape == ESC_p || escape == ESC_X)
+ cb->external_flags |= PCRE2_HASBKPORX; /* Note \P, \p, or \X */
+ }
}
/* Escapes that need further processing, including those that are unknown. */
Modified: code/trunk/src/pcre2_internal.h
===================================================================
--- code/trunk/src/pcre2_internal.h 2015-05-19 16:56:39 UTC (rev 273)
+++ code/trunk/src/pcre2_internal.h 2015-05-20 17:44:17 UTC (rev 274)
@@ -525,6 +525,7 @@
#define PCRE2_NE_ATST_SET 0x00020000 /* (*NOTEMPTY_ATSTART) used) together */
#define PCRE2_DEREF_TABLES 0x00040000 /* Release character tables. */
#define PCRE2_NOJIT 0x00080000 /* (*NOJIT) used */
+#define PCRE2_HASBKPORX 0x00100000 /* contains \P, \p, or \X */
#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
Modified: code/trunk/src/pcre2test.c
===================================================================
--- code/trunk/src/pcre2test.c 2015-05-19 16:56:39 UTC (rev 273)
+++ code/trunk/src/pcre2test.c 2015-05-20 17:44:17 UTC (rev 274)
@@ -4492,7 +4492,21 @@
fprintf(outfile, "\n");
return PR_SKIP;
}
+
+/* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
+locked out at compile time, but we must also check for occurrences of \P, \p,
+and \X, which are only supported when Unicode is supported. */
+if (forbid_utf != 0)
+ {
+ if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0)
+ {
+ fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the "
+ "#forbid_utf command\n");
+ return PR_SKIP;
+ }
+ }
+
/* Remember the maximum lookbehind, for partial matching. */
if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)