[Pcre-svn] [594] code/trunk: Add use_length to pcre2test.

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [594] code/trunk: Add use_length to pcre2test.
Revision: 594
          http://www.exim.org/viewvc/pcre2?view=rev&revision=594
Author:   ph10
Date:     2016-11-04 10:53:43 +0000 (Fri, 04 Nov 2016)
Log Message:
-----------
Add use_length to pcre2test.


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/doc/pcre2test.1
    code/trunk/src/pcre2test.c


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2016-11-03 18:25:51 UTC (rev 593)
+++ code/trunk/ChangeLog    2016-11-04 10:53:43 UTC (rev 594)
@@ -147,7 +147,9 @@


21. Make pcre2test -C list valgrind support when it is enabled.

+22. Add the use_length modifier to pcre2test.

+
Version 10.22 29-July-2016
--------------------------


Modified: code/trunk/doc/pcre2test.1
===================================================================
--- code/trunk/doc/pcre2test.1    2016-11-03 18:25:51 UTC (rev 593)
+++ code/trunk/doc/pcre2test.1    2016-11-04 10:53:43 UTC (rev 594)
@@ -1,4 +1,4 @@
-.TH PCRE2TEST 1 "02 August 2016" "PCRE 10.23"
+.TH PCRE2TEST 1 "04 November 2016" "PCRE 10.23"
 .SH NAME
 pcre2test - a program for testing Perl-compatible regular expressions.
 .SH SYNOPSIS
@@ -580,6 +580,7 @@
       pushcopy                  push a copy onto the stack
       stackguard=<number>       test the stackguard feature
       tables=[0|1|2]            select internal tables
+      use_length                do not zero-terminate the pattern 
       utf8_input                treat input as UTF-8 
 .sp
 The effects of these modifiers are described in the following sections.
@@ -658,6 +659,18 @@
 default values).
 .
 .
+.SS "Specifying the pattern's length"
+.rs
+.sp
+By default, patterns are passed to the compiling functions as zero-terminated
+strings. When using the POSIX wrapper API, there is no other option. However,
+when using PCRE2's native API, patterns can be passed by length instead of
+being zero-terminated. The \fBuse_length\fP modifier causes this to happen. 
+Using a length happens automatically (whether or not \fBuse_length\fP is set)
+when \fBhex\fP is set, because patterns specified in hexadecimal may contain
+binary zeros.
+.
+.
 .SS "Specifying pattern characters in hexadecimal"
 .rs
 .sp
@@ -679,10 +692,10 @@
 the delimiter within a substring. The \fBhex\fP and \fBexpand\fP modifiers are
 mutually exclusive.
 .P
-By default, \fBpcre2test\fP passes patterns as zero-terminated strings to
-\fBpcre2_compile()\fP, giving the length as PCRE2_ZERO_TERMINATED. However, for
-patterns specified with the \fBhex\fP modifier, the actual length of the
-pattern is passed.
+The POSIX API cannot be used with patterns specified in hexadecimal because 
+they may contain binary zeros, which conflicts with \fBregcomp()\fP's 
+requirement for a zero-terminated string. Such patterns are always passed to 
+\fBpcre2_compile()\fP as a string with a length, not as zero-terminated.
 .
 .
 .SS "Specifying wide characters in 16-bit and 32-bit modes"
@@ -1734,6 +1747,6 @@
 .rs
 .sp
 .nf
-Last updated: 02 August 2016
+Last updated: 04 November 2016
 Copyright (c) 1997-2016 University of Cambridge.
 .fi


Modified: code/trunk/src/pcre2test.c
===================================================================
--- code/trunk/src/pcre2test.c    2016-11-03 18:25:51 UTC (rev 593)
+++ code/trunk/src/pcre2test.c    2016-11-04 10:53:43 UTC (rev 594)
@@ -418,7 +418,7 @@
 #define CTL_FULLBINCODE                  0x00001000u
 #define CTL_GETALL                       0x00002000u
 #define CTL_GLOBAL                       0x00004000u
-#define CTL_HEXPAT                       0x00008000u
+#define CTL_HEXPAT                       0x00008000u  /* Same word as USE_LENGTH */
 #define CTL_INFO                         0x00010000u
 #define CTL_JITFAST                      0x00020000u
 #define CTL_JITVERIFY                    0x00040000u
@@ -430,9 +430,10 @@
 #define CTL_PUSH                         0x01000000u
 #define CTL_PUSHCOPY                     0x02000000u
 #define CTL_STARTCHAR                    0x04000000u
-#define CTL_UTF8_INPUT                   0x08000000u
-#define CTL_ZERO_TERMINATE               0x10000000u
-/* Spare                                 0x20000000u  */
+#define CTL_USE_LENGTH                   0x08000000u  /* Same word as HEXPAT */
+#define CTL_UTF8_INPUT                   0x10000000u
+#define CTL_ZERO_TERMINATE               0x20000000u
+
 #define CTL_NL_SET                       0x40000000u  /* Informational */
 #define CTL_BSR_SET                      0x80000000u  /* Informational */


@@ -620,6 +621,7 @@
   { "tables",                     MOD_PAT,  MOD_INT, 0,                          PO(tables_id) },
   { "ucp",                        MOD_PATP, MOD_OPT, PCRE2_UCP,                  PO(options) },
   { "ungreedy",                   MOD_PAT,  MOD_OPT, PCRE2_UNGREEDY,             PO(options) },
+  { "use_length",                 MOD_PAT,  MOD_CTL, CTL_USE_LENGTH,             PO(control) },
   { "use_offset_limit",           MOD_PAT,  MOD_OPT, PCRE2_USE_OFFSET_LIMIT,     PO(options) },
   { "utf",                        MOD_PATP, MOD_OPT, PCRE2_UTF,                  PO(options) },
   { "utf8_input",                 MOD_PAT,  MOD_CTL, CTL_UTF8_INPUT,             PO(control) },
@@ -649,7 +651,8 @@


#define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
- CTL_JITVERIFY|CTL_MEMORY|CTL_PUSH|CTL_PUSHCOPY|CTL_BSR_SET|CTL_NL_SET)
+ CTL_JITVERIFY|CTL_MEMORY|CTL_PUSH|CTL_PUSHCOPY|CTL_BSR_SET|CTL_NL_SET| \
+ CTL_USE_LENGTH)

#define PUSH_SUPPORTED_COMPILE_CONTROLS2 (0)

@@ -661,7 +664,7 @@
/* Controls that are forbidden with #pop or #popcopy. */

#define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
- CTL_PUSHCOPY)
+ CTL_PUSHCOPY|CTL_USE_LENGTH)

/* Pattern controls that are mutually exclusive. At present these are all in
the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
@@ -671,6 +674,7 @@
CTL_POSIX | CTL_HEXPAT,
CTL_POSIX | CTL_PUSH,
CTL_POSIX | CTL_PUSHCOPY,
+ CTL_POSIX | CTL_USE_LENGTH,
CTL_EXPAND | CTL_HEXPAT };

/* Data controls that are mutually exclusive. At present these are all in the
@@ -3681,7 +3685,7 @@
static void
show_controls(uint32_t controls, uint32_t controls2, const char *before)
{
-fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
before,
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
@@ -3716,6 +3720,7 @@
((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
+ ((controls & CTL_USE_LENGTH) != 0)? " use_length" : "",
((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "",
((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
}
@@ -4976,12 +4981,13 @@
}

/* The pattern is now in pbuffer[8|16|32], with the length in code units in
-patlen. By default, however, we pass a zero-terminated pattern. The length is
-passed only if we had a hex pattern. When valgrind is supported, arrange for
-the unused part of the buffer to be marked as no access. */
+patlen. By default we pass a zero-terminated pattern, but a length is passed if
+"use_length" was specified or this is a hex pattern (which might contain binary
+zeros). When valgrind is supported, arrange for the unused part of the buffer
+to be marked as no access. */

valgrind_access_length = patlen;
-if ((pat_patctl.control & CTL_HEXPAT) == 0)
+if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) == 0)
{
patlen = PCRE2_ZERO_TERMINATED;
valgrind_access_length += 1; /* For the terminating zero */