Revision: 840
http://www.exim.org/viewvc/pcre2?view=rev&revision=840
Author: ph10
Date: 2017-07-12 17:34:49 +0100 (Wed, 12 Jul 2017)
Log Message:
-----------
Document experimental pattern conversion functions and remove unimplemented
features.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/Makefile.am
code/trunk/doc/html/index.html
code/trunk/doc/html/pcre2api.html
code/trunk/doc/html/pcre2test.html
code/trunk/doc/index.html.src
code/trunk/doc/pcre2.txt
code/trunk/doc/pcre2api.3
code/trunk/doc/pcre2test.1
code/trunk/doc/pcre2test.txt
code/trunk/src/pcre2.h
code/trunk/src/pcre2.h.in
code/trunk/src/pcre2_convert.c
code/trunk/src/pcre2test.c
code/trunk/testdata/testinput25
code/trunk/testdata/testoutput25
Added Paths:
-----------
code/trunk/doc/html/pcre2_convert_context_copy.html
code/trunk/doc/html/pcre2_convert_context_create.html
code/trunk/doc/html/pcre2_convert_context_free.html
code/trunk/doc/html/pcre2_converted_pattern_free.html
code/trunk/doc/html/pcre2_pattern_convert.html
code/trunk/doc/html/pcre2_set_glob_escape.html
code/trunk/doc/html/pcre2_set_glob_separator.html
code/trunk/doc/html/pcre2convert.html
code/trunk/doc/pcre2_convert_context_copy.3
code/trunk/doc/pcre2_convert_context_create.3
code/trunk/doc/pcre2_convert_context_free.3
code/trunk/doc/pcre2_converted_pattern_free.3
code/trunk/doc/pcre2_pattern_convert.3
code/trunk/doc/pcre2_set_glob_escape.3
code/trunk/doc/pcre2_set_glob_separator.3
code/trunk/doc/pcre2convert.3
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2017-07-05 08:55:49 UTC (rev 839)
+++ code/trunk/ChangeLog 2017-07-12 16:34:49 UTC (rev 840)
@@ -216,7 +216,10 @@
49. Update extended grapheme breaking rules to the latest set that are in
Unicode Standard Annex #29.
+50. Added experimental foreign pattern conversion facilities
+(pcre2_pattern_convert() and friends).
+
Version 10.23 14-February-2017
------------------------------
Modified: code/trunk/Makefile.am
===================================================================
--- code/trunk/Makefile.am 2017-07-05 08:55:49 UTC (rev 839)
+++ code/trunk/Makefile.am 2017-07-12 16:34:49 UTC (rev 840)
@@ -36,6 +36,10 @@
doc/html/pcre2_compile_context_create.html \
doc/html/pcre2_compile_context_free.html \
doc/html/pcre2_config.html \
+ doc/html/pcre2_convert_context_copy.html \
+ doc/html/pcre2_convert_context_create.html \
+ doc/html/pcre2_convert_context_free.html \
+ doc/html/pcre2_converted_pattern_free.html \
doc/html/pcre2_dfa_match.html \
doc/html/pcre2_general_context_copy.html \
doc/html/pcre2_general_context_create.html \
@@ -59,6 +63,7 @@
doc/html/pcre2_match_data_create.html \
doc/html/pcre2_match_data_create_from_pattern.html \
doc/html/pcre2_match_data_free.html \
+ doc/html/pcre2_pattern_convert.html \
doc/html/pcre2_pattern_info.html \
doc/html/pcre2_serialize_decode.html \
doc/html/pcre2_serialize_encode.html \
@@ -70,6 +75,8 @@
doc/html/pcre2_set_compile_extra_options.html \
doc/html/pcre2_set_compile_recursion_guard.html \
doc/html/pcre2_set_depth_limit.html \
+ doc/html/pcre2_set_glob_escape.html \
+ doc/html/pcre2_set_glob_separator.html \
doc/html/pcre2_set_heap_limit.html \
doc/html/pcre2_set_match_limit.html \
doc/html/pcre2_set_max_pattern_length.html \
@@ -94,6 +101,7 @@
doc/html/pcre2build.html \
doc/html/pcre2callout.html \
doc/html/pcre2compat.html \
+ doc/html/pcre2convert.html \
doc/html/pcre2demo.html \
doc/html/pcre2grep.html \
doc/html/pcre2jit.html \
@@ -121,6 +129,10 @@
doc/pcre2_compile_context_create.3 \
doc/pcre2_compile_context_free.3 \
doc/pcre2_config.3 \
+ doc/pcre2_convert_context_copy.3 \
+ doc/pcre2_convert_context_create.3 \
+ doc/pcre2_convert_context_free.3 \
+ doc/pcre2_converted_pattern_free.3 \
doc/pcre2_dfa_match.3 \
doc/pcre2_general_context_copy.3 \
doc/pcre2_general_context_create.3 \
@@ -144,6 +156,7 @@
doc/pcre2_match_data_create.3 \
doc/pcre2_match_data_create_from_pattern.3 \
doc/pcre2_match_data_free.3 \
+ doc/pcre2_pattern_convert.3 \
doc/pcre2_pattern_info.3 \
doc/pcre2_serialize_decode.3 \
doc/pcre2_serialize_encode.3 \
@@ -155,6 +168,8 @@
doc/pcre2_set_compile_extra_options.3 \
doc/pcre2_set_compile_recursion_guard.3 \
doc/pcre2_set_depth_limit.3 \
+ doc/pcre2_set_glob_escape.3 \
+ doc/pcre2_set_glob_separator.3 \
doc/pcre2_set_heap_limit.3 \
doc/pcre2_set_match_limit.3 \
doc/pcre2_set_max_pattern_length.3 \
@@ -179,6 +194,7 @@
doc/pcre2build.3 \
doc/pcre2callout.3 \
doc/pcre2compat.3 \
+ doc/pcre2convert.3 \
doc/pcre2demo.3 \
doc/pcre2grep.1 \
doc/pcre2jit.3 \
Modified: code/trunk/doc/html/index.html
===================================================================
--- code/trunk/doc/html/index.html 2017-07-05 08:55:49 UTC (rev 839)
+++ code/trunk/doc/html/index.html 2017-07-12 16:34:49 UTC (rev 840)
@@ -35,6 +35,9 @@
<tr><td><a href="pcre2compat.html">pcre2compat</a></td>
<td> Compability with Perl</td></tr>
+<tr><td><a href="pcre2convert.html">pcre2convert</a></td>
+ <td> Experimental foreign pattern conversion functions</td></tr>
+
<tr><td><a href="pcre2demo.html">pcre2demo</a></td>
<td> A demonstration C program that uses the PCRE2 library</td></tr>
@@ -112,6 +115,18 @@
<tr><td><a href="pcre2_config.html">pcre2_config</a></td>
<td> Show build-time configuration options</td></tr>
+<tr><td><a href="pcre2_convert_context_copy.html">pcre2_convert_context_copy</a></td>
+ <td> Copy a convert context</td></tr>
+
+<tr><td><a href="pcre2_convert_context_create.html">pcre2_convert_context_create</a></td>
+ <td> Create a convert context</td></tr>
+
+<tr><td><a href="pcre2_convert_context_free.html">pcre2_convert_context_free</a></td>
+ <td> Free a convert context</td></tr>
+
+<tr><td><a href="pcre2_converted_pattern_free.html">pcre2_converted_pattern_free</a></td>
+ <td> Free converted foreign pattern</td></tr>
+
<tr><td><a href="pcre2_dfa_match.html">pcre2_dfa_match</a></td>
<td> Match a compiled pattern to a subject string
(DFA algorithm; <i>not</i> Perl compatible)</td></tr>
@@ -183,6 +198,9 @@
<tr><td><a href="pcre2_match_data_free.html">pcre2_match_data_free</a></td>
<td> Free a match data block</td></tr>
+<tr><td><a href="pcre2_pattern_convert.html">pcre2_pattern_convert</a></td>
+ <td> Experimental foreign pattern converter</td></tr>
+
<tr><td><a href="pcre2_pattern_info.html">pcre2_pattern_info</a></td>
<td> Extract information about a pattern</td></tr>
@@ -216,6 +234,12 @@
<tr><td><a href="pcre2_set_depth_limit.html">pcre2_set_depth_limit</a></td>
<td> Set the match backtracking depth limit</td></tr>
+<tr><td><a href="pcre2_set_glob_escape.html">pcre2_set_glob_escape</a></td>
+ <td> Set glob escape character</td></tr>
+
+<tr><td><a href="pcre2_set_glob_separator.html">pcre2_set_glob_separator</a></td>
+ <td> Set glob separator character</td></tr>
+
<tr><td><a href="pcre2_set_heap_limit.html">pcre2_set_heap_limit</a></td>
<td> Set the match backtracking heap limit</td></tr>
Added: code/trunk/doc/html/pcre2_convert_context_copy.html
===================================================================
--- code/trunk/doc/html/pcre2_convert_context_copy.html (rev 0)
+++ code/trunk/doc/html/pcre2_convert_context_copy.html 2017-07-12 16:34:49 UTC (rev 840)
@@ -0,0 +1,40 @@
+<html>
+<head>
+<title>pcre2_convert_context_copy specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2_convert_context_copy man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<br><b>
+SYNOPSIS
+</b><br>
+<P>
+<b>#include <pcre2.h></b>
+</P>
+<P>
+<b>pcre2_convert_context *pcre2_convert_context_copy(</b>
+<b> pcre2_convert_context *<i>cvcontext</i>);</b>
+</P>
+<br><b>
+DESCRIPTION
+</b><br>
+<P>
+This function is part of an experimental set of pattern conversion functions.
+It makes a new copy of a convert context, using the memory allocation function
+that was used for the original context. The result is NULL if the memory cannot
+be obtained.
+</P>
+<P>
+The pattern conversion functions are described in the
+<a href="pcre2convert.html"><b>pcre2convert</b></a>
+documentation.
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
Added: code/trunk/doc/html/pcre2_convert_context_create.html
===================================================================
--- code/trunk/doc/html/pcre2_convert_context_create.html (rev 0)
+++ code/trunk/doc/html/pcre2_convert_context_create.html 2017-07-12 16:34:49 UTC (rev 840)
@@ -0,0 +1,41 @@
+<html>
+<head>
+<title>pcre2_convert_context_create specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2_convert_context_create man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<br><b>
+SYNOPSIS
+</b><br>
+<P>
+<b>#include <pcre2.h></b>
+</P>
+<P>
+<b>pcre2_convert_context *pcre2_convert_context_create(</b>
+<b> pcre2_general_context *<i>gcontext</i>);</b>
+</P>
+<br><b>
+DESCRIPTION
+</b><br>
+<P>
+This function is part of an experimental set of pattern conversion functions.
+It creates and initializes a new convert context. If its argument is
+NULL, <b>malloc()</b> is used to get the necessary memory; otherwise the memory
+allocation function within the general context is used. The result is NULL if
+the memory could not be obtained.
+</P>
+<P>
+The pattern conversion functions are described in the
+<a href="pcre2convert.html"><b>pcre2convert</b></a>
+documentation.
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
Added: code/trunk/doc/html/pcre2_convert_context_free.html
===================================================================
--- code/trunk/doc/html/pcre2_convert_context_free.html (rev 0)
+++ code/trunk/doc/html/pcre2_convert_context_free.html 2017-07-12 16:34:49 UTC (rev 840)
@@ -0,0 +1,39 @@
+<html>
+<head>
+<title>pcre2_convert_context_free specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2_convert_context_free man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<br><b>
+SYNOPSIS
+</b><br>
+<P>
+<b>#include <pcre2.h></b>
+</P>
+<P>
+<b>void pcre2_convert_context_free(pcre2_convert_context *<i>cvcontext</i>);</b>
+</P>
+<br><b>
+DESCRIPTION
+</b><br>
+<P>
+This function is part of an experimental set of pattern conversion functions.
+It frees the memory occupied by a convert context, using the memory
+freeing function from the general context with which it was created, or
+<b>free()</b> if that was not set.
+</P>
+<P>
+The pattern conversion functions are described in the
+<a href="pcre2convert.html"><b>pcre2convert</b></a>
+documentation.
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
Added: code/trunk/doc/html/pcre2_converted_pattern_free.html
===================================================================
--- code/trunk/doc/html/pcre2_converted_pattern_free.html (rev 0)
+++ code/trunk/doc/html/pcre2_converted_pattern_free.html 2017-07-12 16:34:49 UTC (rev 840)
@@ -0,0 +1,39 @@
+<html>
+<head>
+<title>pcre2_converted_pattern_free specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2_converted_pattern_free man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<br><b>
+SYNOPSIS
+</b><br>
+<P>
+<b>#include <pcre2.h></b>
+</P>
+<P>
+<b>void pcre2_converted_pattern_free(PCRE2_UCHAR *<i>converted_pattern</i>);</b>
+</P>
+<br><b>
+DESCRIPTION
+</b><br>
+<P>
+This function is part of an experimental set of pattern conversion functions.
+It frees the memory occupied by a converted pattern that was obtained by
+calling <b>pcre2_pattern_convert()</b> with arguments that caused it to place
+the converted pattern into newly obtained heap memory.
+</P>
+<P>
+The pattern conversion functions are described in the
+<a href="pcre2convert.html"><b>pcre2convert</b></a>
+documentation.
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
Added: code/trunk/doc/html/pcre2_pattern_convert.html
===================================================================
--- code/trunk/doc/html/pcre2_pattern_convert.html (rev 0)
+++ code/trunk/doc/html/pcre2_pattern_convert.html 2017-07-12 16:34:49 UTC (rev 840)
@@ -0,0 +1,70 @@
+<html>
+<head>
+<title>pcre2_pattern_convert specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2_pattern_convert man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<br><b>
+SYNOPSIS
+</b><br>
+<P>
+<b>#include <pcre2.h></b>
+</P>
+<P>
+<b>int pcre2_pattern_convert(PCRE2_SPTR <i>pattern</i>, PCRE2_SIZE <i>length</i>,</b>
+<b> uint32_t <i>options</i>, PCRE2_UCHAR **<i>buffer</i>,</b>
+<b> PCRE2_SIZE *<i>blength</i>, pcre2_convert_context *<i>cvcontext</i>);</b>
+</P>
+<br><b>
+DESCRIPTION
+</b><br>
+<P>
+This function is part of an experimental set of pattern conversion functions.
+It converts a foreign pattern (for example, a glob) into a PCRE2 regular
+expression pattern. Its arguments are:
+<pre>
+ <i>pattern</i> The foreign pattern
+ <i>length</i> The length of the input pattern or PCRE2_ZERO_TERMINATED
+ <i>options</i> Option bits
+ <i>buffer</i> Pointer to pointer to output buffer, or NULL
+ <i>blength</i> Pointer to output length field
+ <i>cvcontext</i> Pointer to a convert context or NULL
+</pre>
+The length of the converted pattern (excluding the terminating zero) is
+returned via <i>blength</i>. If <i>buffer</i> is NULL, the function just returns
+the output length. If <i>buffer</i> points to a NULL pointer, heap memory is
+obtained for the converted pattern, using the allocator in the context if
+present (or else <b>malloc()</b>), and the field pointed to by <i>buffer</i> is
+updated. If <i>buffer</i> points to a non-NULL field, that must point to a
+buffer whose size is in the variable pointed to by <i>blength</i>. This value is
+updated.
+</P>
+<P>
+The option bits are:
+<pre>
+ PCRE2_CONVERT_UTF Input is UTF
+ PCRE2_CONVERT_NO_UTF_CHECK Do not check UTF validity
+ PCRE2_CONVERT_POSIX_BASIC Convert POSIX basic pattern
+ PCRE2_CONVERT_POSIX_EXTENDED Convert POSIX extended pattern
+ PCRE2_CONVERT_GLOB ) Convert
+ PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR ) various types
+ PCRE2_CONVERT_GLOB_NO_STARSTAR ) of glob
+</pre>
+The return value from <b>pcre2_pattern_convert()</b> is zero on success or a
+non-zero PCRE2 error code.
+</P>
+<P>
+The pattern conversion functions are described in the
+<a href="pcre2convert.html"><b>pcre2convert</b></a>
+documentation.
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
Added: code/trunk/doc/html/pcre2_set_glob_escape.html
===================================================================
--- code/trunk/doc/html/pcre2_set_glob_escape.html (rev 0)
+++ code/trunk/doc/html/pcre2_set_glob_escape.html 2017-07-12 16:34:49 UTC (rev 840)
@@ -0,0 +1,43 @@
+<html>
+<head>
+<title>pcre2_set_glob_escape specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2_set_glob_escape man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<br><b>
+SYNOPSIS
+</b><br>
+<P>
+<b>#include <pcre2.h></b>
+</P>
+<P>
+<b>int pcre2_set_glob_escape(pcre2_convert_context *<i>cvcontext</i>,</b>
+<b> uint32_t <i>escape_char</i>);</b>
+</P>
+<br><b>
+DESCRIPTION
+</b><br>
+<P>
+This function is part of an experimental set of pattern conversion functions.
+It sets the escape character that is used when converting globs. The second
+argument must either be zero (meaning there is no escape character) or a
+punctuation character whose code point is less than 256. The default is grave
+accent if running under Windows, otherwise backslash. The result of the
+function is zero for success or PCRE2_ERROR_BADDATA if the second argument is
+invalid.
+</P>
+<P>
+The pattern conversion functions are described in the
+<a href="pcre2convert.html"><b>pcre2convert</b></a>
+documentation.
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
Added: code/trunk/doc/html/pcre2_set_glob_separator.html
===================================================================
--- code/trunk/doc/html/pcre2_set_glob_separator.html (rev 0)
+++ code/trunk/doc/html/pcre2_set_glob_separator.html 2017-07-12 16:34:49 UTC (rev 840)
@@ -0,0 +1,42 @@
+<html>
+<head>
+<title>pcre2_set_glob_separator specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2_set_glob_separator man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<br><b>
+SYNOPSIS
+</b><br>
+<P>
+<b>#include <pcre2.h></b>
+</P>
+<P>
+<b>int pcre2_set_glob_separator(pcre2_convert_context *<i>cvcontext</i>,</b>
+<b> uint32_t <i>separator_char</i>);</b>
+</P>
+<br><b>
+DESCRIPTION
+</b><br>
+<P>
+This function is part of an experimental set of pattern conversion functions.
+It sets the component separator character that is used when converting globs.
+The second argument must one of the characters forward slash, backslash, or
+dot. The default is backslash when running under Windows, otherwise forward
+slash. The result of the function is zero for success or PCRE2_ERROR_BADDATA if
+the second argument is invalid.
+</P>
+<P>
+The pattern conversion functions are described in the
+<a href="pcre2convert.html"><b>pcre2convert</b></a>
+documentation.
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
Modified: code/trunk/doc/html/pcre2api.html
===================================================================
--- code/trunk/doc/html/pcre2api.html 2017-07-05 08:55:49 UTC (rev 839)
+++ code/trunk/doc/html/pcre2api.html 2017-07-12 16:34:49 UTC (rev 840)
@@ -24,37 +24,38 @@
<li><a name="TOC9" href="#SEC9">PCRE2 NATIVE API SERIALIZATION FUNCTIONS</a>
<li><a name="TOC10" href="#SEC10">PCRE2 NATIVE API AUXILIARY FUNCTIONS</a>
<li><a name="TOC11" href="#SEC11">PCRE2 NATIVE API OBSOLETE FUNCTIONS</a>
-<li><a name="TOC12" href="#SEC12">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a>
-<li><a name="TOC13" href="#SEC13">PCRE2 API OVERVIEW</a>
-<li><a name="TOC14" href="#SEC14">STRING LENGTHS AND OFFSETS</a>
-<li><a name="TOC15" href="#SEC15">NEWLINES</a>
-<li><a name="TOC16" href="#SEC16">MULTITHREADING</a>
-<li><a name="TOC17" href="#SEC17">PCRE2 CONTEXTS</a>
-<li><a name="TOC18" href="#SEC18">CHECKING BUILD-TIME OPTIONS</a>
-<li><a name="TOC19" href="#SEC19">COMPILING A PATTERN</a>
-<li><a name="TOC20" href="#SEC20">COMPILATION ERROR CODES</a>
-<li><a name="TOC21" href="#SEC21">JUST-IN-TIME (JIT) COMPILATION</a>
-<li><a name="TOC22" href="#SEC22">LOCALE SUPPORT</a>
-<li><a name="TOC23" href="#SEC23">INFORMATION ABOUT A COMPILED PATTERN</a>
-<li><a name="TOC24" href="#SEC24">INFORMATION ABOUT A PATTERN'S CALLOUTS</a>
-<li><a name="TOC25" href="#SEC25">SERIALIZATION AND PRECOMPILING</a>
-<li><a name="TOC26" href="#SEC26">THE MATCH DATA BLOCK</a>
-<li><a name="TOC27" href="#SEC27">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a>
-<li><a name="TOC28" href="#SEC28">NEWLINE HANDLING WHEN MATCHING</a>
-<li><a name="TOC29" href="#SEC29">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a>
-<li><a name="TOC30" href="#SEC30">OTHER INFORMATION ABOUT A MATCH</a>
-<li><a name="TOC31" href="#SEC31">ERROR RETURNS FROM <b>pcre2_match()</b></a>
-<li><a name="TOC32" href="#SEC32">OBTAINING A TEXTUAL ERROR MESSAGE</a>
-<li><a name="TOC33" href="#SEC33">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a>
-<li><a name="TOC34" href="#SEC34">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a>
-<li><a name="TOC35" href="#SEC35">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>
-<li><a name="TOC36" href="#SEC36">CREATING A NEW STRING WITH SUBSTITUTIONS</a>
-<li><a name="TOC37" href="#SEC37">DUPLICATE SUBPATTERN NAMES</a>
-<li><a name="TOC38" href="#SEC38">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a>
-<li><a name="TOC39" href="#SEC39">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>
-<li><a name="TOC40" href="#SEC40">SEE ALSO</a>
-<li><a name="TOC41" href="#SEC41">AUTHOR</a>
-<li><a name="TOC42" href="#SEC42">REVISION</a>
+<li><a name="TOC12" href="#SEC12">PCRE2 EXPERIMENTAL PATTERN CONVERSION FUNCTIONS</a>
+<li><a name="TOC13" href="#SEC13">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a>
+<li><a name="TOC14" href="#SEC14">PCRE2 API OVERVIEW</a>
+<li><a name="TOC15" href="#SEC15">STRING LENGTHS AND OFFSETS</a>
+<li><a name="TOC16" href="#SEC16">NEWLINES</a>
+<li><a name="TOC17" href="#SEC17">MULTITHREADING</a>
+<li><a name="TOC18" href="#SEC18">PCRE2 CONTEXTS</a>
+<li><a name="TOC19" href="#SEC19">CHECKING BUILD-TIME OPTIONS</a>
+<li><a name="TOC20" href="#SEC20">COMPILING A PATTERN</a>
+<li><a name="TOC21" href="#SEC21">COMPILATION ERROR CODES</a>
+<li><a name="TOC22" href="#SEC22">JUST-IN-TIME (JIT) COMPILATION</a>
+<li><a name="TOC23" href="#SEC23">LOCALE SUPPORT</a>
+<li><a name="TOC24" href="#SEC24">INFORMATION ABOUT A COMPILED PATTERN</a>
+<li><a name="TOC25" href="#SEC25">INFORMATION ABOUT A PATTERN'S CALLOUTS</a>
+<li><a name="TOC26" href="#SEC26">SERIALIZATION AND PRECOMPILING</a>
+<li><a name="TOC27" href="#SEC27">THE MATCH DATA BLOCK</a>
+<li><a name="TOC28" href="#SEC28">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a>
+<li><a name="TOC29" href="#SEC29">NEWLINE HANDLING WHEN MATCHING</a>
+<li><a name="TOC30" href="#SEC30">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a>
+<li><a name="TOC31" href="#SEC31">OTHER INFORMATION ABOUT A MATCH</a>
+<li><a name="TOC32" href="#SEC32">ERROR RETURNS FROM <b>pcre2_match()</b></a>
+<li><a name="TOC33" href="#SEC33">OBTAINING A TEXTUAL ERROR MESSAGE</a>
+<li><a name="TOC34" href="#SEC34">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a>
+<li><a name="TOC35" href="#SEC35">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a>
+<li><a name="TOC36" href="#SEC36">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>
+<li><a name="TOC37" href="#SEC37">CREATING A NEW STRING WITH SUBSTITUTIONS</a>
+<li><a name="TOC38" href="#SEC38">DUPLICATE SUBPATTERN NAMES</a>
+<li><a name="TOC39" href="#SEC39">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a>
+<li><a name="TOC40" href="#SEC40">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>
+<li><a name="TOC41" href="#SEC41">SEE ALSO</a>
+<li><a name="TOC42" href="#SEC42">AUTHOR</a>
+<li><a name="TOC43" href="#SEC43">REVISION</a>
</ul>
<P>
<b>#include <pcre2.h></b>
@@ -334,8 +335,44 @@
replaced by <b>pcre2_set_depth_limit()</b>; the second is no longer needed and
has no effect (it always returns zero).
</P>
-<br><a name="SEC12" href="#TOC1">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a><br>
+<br><a name="SEC12" href="#TOC1">PCRE2 EXPERIMENTAL PATTERN CONVERSION FUNCTIONS</a><br>
<P>
+<b>pcre2_convert_context *pcre2_convert_context_create(</b>
+<b> pcre2_general_context *<i>gcontext</i>);</b>
+<br>
+<br>
+<b>pcre2_convert_context *pcre2_convert_context_copy(</b>
+<b> pcre2_convert_context *<i>cvcontext</i>);</b>
+<br>
+<br>
+<b>void pcre2_convert_context_free(pcre2_convert_context *<i>cvcontext</i>);</b>
+<br>
+<br>
+<b>int pcre2_set_glob_escape(pcre2_convert_context *<i>cvcontext</i>,</b>
+<b> uint32_t <i>escape_char</i>);</b>
+<br>
+<br>
+<b>int pcre2_set_glob_separator(pcre2_convert_context *<i>cvcontext</i>,</b>
+<b> uint32_t <i>separator_char</i>);</b>
+<br>
+<br>
+<b>int pcre2_pattern_convert(PCRE2_SPTR <i>pattern</i>, PCRE2_SIZE <i>length</i>,</b>
+<b> uint32_t <i>options</i>, PCRE2_UCHAR **<i>buffer</i>,</b>
+<b> PCRE2_SIZE *<i>blength</i>, pcre2_convert_context *<i>cvcontext</i>);</b>
+<br>
+<br>
+<b>void pcre2_converted_pattern_free(PCRE2_UCHAR *<i>converted_pattern</i>);</b>
+<br>
+<br>
+These functions provide a way of converting non-PCRE2 patterns into
+patterns that can be processed by <b>pcre2_compile()</b>. This facility is
+experimental and may be changed in future releases. At present, "globs" and
+POSIX basic and extended patterns can be converted. Details are given in the
+<a href="pcre2convert.html"><b>pcre2convert</b></a>
+documentation.
+</P>
+<br><a name="SEC13" href="#TOC1">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a><br>
+<P>
There are three PCRE2 libraries, supporting 8-bit, 16-bit, and 32-bit code
units, respectively. However, there is just one header file, <b>pcre2.h</b>.
This contains the function prototypes and other definitions for all three
@@ -395,7 +432,7 @@
PCRE2 documents, functions and data types are described using their generic
names, without the _8, _16, or _32 suffix.
</P>
-<br><a name="SEC13" href="#TOC1">PCRE2 API OVERVIEW</a><br>
+<br><a name="SEC14" href="#TOC1">PCRE2 API OVERVIEW</a><br>
<P>
PCRE2 has its own native API, which is described in this document. There are
also some wrapper functions for the 8-bit library that correspond to the
@@ -503,7 +540,7 @@
blocks of various sorts. In all cases, if one of these functions is called with
a NULL argument, it does nothing.
</P>
-<br><a name="SEC14" href="#TOC1">STRING LENGTHS AND OFFSETS</a><br>
+<br><a name="SEC15" href="#TOC1">STRING LENGTHS AND OFFSETS</a><br>
<P>
The PCRE2 API uses string lengths and offsets into strings of code units in
several places. These values are always of type PCRE2_SIZE, which is an
@@ -513,7 +550,7 @@
Therefore, the longest string that can be handled is one less than this
maximum.
<a name="newlines"></a></P>
-<br><a name="SEC15" href="#TOC1">NEWLINES</a><br>
+<br><a name="SEC16" href="#TOC1">NEWLINES</a><br>
<P>
PCRE2 supports five different conventions for indicating line breaks in
strings: a single CR (carriage return) character, a single LF (linefeed)
@@ -548,7 +585,7 @@
the \n or \r escape sequences, nor does it affect what \R matches; this has
its own separate convention.
</P>
-<br><a name="SEC16" href="#TOC1">MULTITHREADING</a><br>
+<br><a name="SEC17" href="#TOC1">MULTITHREADING</a><br>
<P>
In a multithreaded application it is important to keep thread-specific data
separate from data that can be shared between threads. The PCRE2 library code
@@ -628,7 +665,7 @@
information such as the name of a (*MARK) setting. Each thread must provide its
own copy of this memory.
</P>
-<br><a name="SEC17" href="#TOC1">PCRE2 CONTEXTS</a><br>
+<br><a name="SEC18" href="#TOC1">PCRE2 CONTEXTS</a><br>
<P>
Some PCRE2 functions have a lot of parameters, many of which are used only by
specialist applications, for example, those that use custom memory management
@@ -1013,7 +1050,7 @@
less than the limit set by the caller of <b>pcre2_match()</b> or
<b>pcre2_dfa_match()</b> or, if no such limit is set, less than the default.
</P>
-<br><a name="SEC18" href="#TOC1">CHECKING BUILD-TIME OPTIONS</a><br>
+<br><a name="SEC19" href="#TOC1">CHECKING BUILD-TIME OPTIONS</a><br>
<P>
<b>int pcre2_config(uint32_t <i>what</i>, void *<i>where</i>);</b>
</P>
@@ -1150,7 +1187,7 @@
returned. This is the length of the string plus one unit for the terminating
zero.
<a name="compiling"></a></P>
-<br><a name="SEC19" href="#TOC1">COMPILING A PATTERN</a><br>
+<br><a name="SEC20" href="#TOC1">COMPILING A PATTERN</a><br>
<P>
<b>pcre2_code *pcre2_compile(PCRE2_SPTR <i>pattern</i>, PCRE2_SIZE <i>length</i>,</b>
<b> uint32_t <i>options</i>, int *<i>errorcode</i>, PCRE2_SIZE *<i>erroroffset,</i></b>
@@ -1741,7 +1778,7 @@
PCRE2_EXTRA_MATCH_LINE
</pre>
This option is provided for use by the <b>-x</b> option of <b>pcre2grep</b>. It
-causes the pattern only to match complete lines. This is achieved by
+causes the pattern only to match complete lines. This is achieved by
automatically inserting the code for "^(?:" at the start of the compiled
pattern and ")$" at the end. Thus, when PCRE2_MULTILINE is set, the matched
line may be in the middle of the subject string. This option can be used with
@@ -1756,7 +1793,7 @@
used with PCRE2_LITERAL. However, it is ignored if PCRE2_EXTRA_MATCH_LINE is
also set.
</P>
-<br><a name="SEC20" href="#TOC1">COMPILATION ERROR CODES</a><br>
+<br><a name="SEC21" href="#TOC1">COMPILATION ERROR CODES</a><br>
<P>
There are nearly 100 positive error codes that <b>pcre2_compile()</b> may return
(via <i>errorcode</i>) if it finds an error in the pattern. There are also some
@@ -1769,7 +1806,7 @@
<a href="#geterrormessage">below)</a>
can be called to obtain a textual error message from any error code.
<a name="jitcompiling"></a></P>
-<br><a name="SEC21" href="#TOC1">JUST-IN-TIME (JIT) COMPILATION</a><br>
+<br><a name="SEC22" href="#TOC1">JUST-IN-TIME (JIT) COMPILATION</a><br>
<P>
<b>int pcre2_jit_compile(pcre2_code *<i>code</i>, uint32_t <i>options</i>);</b>
<br>
@@ -1807,7 +1844,7 @@
benefit of faster execution might be offset by a much slower compilation time.
Most (but not all) patterns can be optimized by the JIT compiler.
<a name="localesupport"></a></P>
-<br><a name="SEC22" href="#TOC1">LOCALE SUPPORT</a><br>
+<br><a name="SEC23" href="#TOC1">LOCALE SUPPORT</a><br>
<P>
PCRE2 handles caseless matching, and determines whether characters are letters,
digits, or whatever, by reference to a set of tables, indexed by character code
@@ -1863,7 +1900,7 @@
compilation and matching both happen in the same locale, but different patterns
can be processed in different locales.
<a name="infoaboutpattern"></a></P>
-<br><a name="SEC23" href="#TOC1">INFORMATION ABOUT A COMPILED PATTERN</a><br>
+<br><a name="SEC24" href="#TOC1">INFORMATION ABOUT A COMPILED PATTERN</a><br>
<P>
<b>int pcre2_pattern_info(const pcre2 *<i>code</i>, uint32_t <i>what</i>, void *<i>where</i>);</b>
</P>
@@ -2188,7 +2225,7 @@
calculates the size has to over-estimate. Processing a pattern with the JIT
compiler does not alter the value returned by this option.
<a name="infoaboutcallouts"></a></P>
-<br><a name="SEC24" href="#TOC1">INFORMATION ABOUT A PATTERN'S CALLOUTS</a><br>
+<br><a name="SEC25" href="#TOC1">INFORMATION ABOUT A PATTERN'S CALLOUTS</a><br>
<P>
<b>int pcre2_callout_enumerate(const pcre2_code *<i>code</i>,</b>
<b> int (*<i>callback</i>)(pcre2_callout_enumerate_block *, void *),</b>
@@ -2207,7 +2244,7 @@
<a href="pcre2callout.html"><b>pcre2callout</b></a>
documentation, which also gives further details about callouts.
</P>
-<br><a name="SEC25" href="#TOC1">SERIALIZATION AND PRECOMPILING</a><br>
+<br><a name="SEC26" href="#TOC1">SERIALIZATION AND PRECOMPILING</a><br>
<P>
It is possible to save compiled patterns on disc or elsewhere, and reload them
later, subject to a number of restrictions. The functions whose names begin
@@ -2216,7 +2253,7 @@
<a href="pcre2serialize.html"><b>pcre2serialize</b></a>
documentation.
<a name="matchdatablock"></a></P>
-<br><a name="SEC26" href="#TOC1">THE MATCH DATA BLOCK</a><br>
+<br><a name="SEC27" href="#TOC1">THE MATCH DATA BLOCK</a><br>
<P>
<b>pcre2_match_data *pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
<b> pcre2_general_context *<i>gcontext</i>);</b>
@@ -2287,7 +2324,7 @@
When a match data block itself is no longer needed, it should be freed by
calling <b>pcre2_match_data_free()</b>.
</P>
-<br><a name="SEC27" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
+<br><a name="SEC28" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
<P>
<b>int pcre2_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
@@ -2525,7 +2562,7 @@
<a href="pcre2partial.html"><b>pcre2partial</b></a>
documentation.
</P>
-<br><a name="SEC28" href="#TOC1">NEWLINE HANDLING WHEN MATCHING</a><br>
+<br><a name="SEC29" href="#TOC1">NEWLINE HANDLING WHEN MATCHING</a><br>
<P>
When PCRE2 is built, a default newline convention is set; this is usually the
standard convention for the operating system. The default can be overridden in
@@ -2565,7 +2602,7 @@
Notwithstanding the above, anomalous effects may still occur when CRLF is a
valid newline sequence and explicit \r or \n escapes appear in the pattern.
<a name="matchedstrings"></a></P>
-<br><a name="SEC29" href="#TOC1">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a><br>
+<br><a name="SEC30" href="#TOC1">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a><br>
<P>
<b>uint32_t pcre2_get_ovector_count(pcre2_match_data *<i>match_data</i>);</b>
<br>
@@ -2664,7 +2701,7 @@
<b>pcre2_match()</b>. The other elements retain whatever values they previously
had.
<a name="matchotherdata"></a></P>
-<br><a name="SEC30" href="#TOC1">OTHER INFORMATION ABOUT A MATCH</a><br>
+<br><a name="SEC31" href="#TOC1">OTHER INFORMATION ABOUT A MATCH</a><br>
<P>
<b>PCRE2_SPTR pcre2_get_mark(pcre2_match_data *<i>match_data</i>);</b>
<br>
@@ -2714,7 +2751,7 @@
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
page.
<a name="errorlist"></a></P>
-<br><a name="SEC31" href="#TOC1">ERROR RETURNS FROM <b>pcre2_match()</b></a><br>
+<br><a name="SEC32" href="#TOC1">ERROR RETURNS FROM <b>pcre2_match()</b></a><br>
<P>
If <b>pcre2_match()</b> fails, it returns a negative number. This can be
converted to a text string by calling the <b>pcre2_get_error_message()</b>
@@ -2820,7 +2857,7 @@
recursions between two different subpatterns, cannot be detected until matching
is attempted.
<a name="geterrormessage"></a></P>
-<br><a name="SEC32" href="#TOC1">OBTAINING A TEXTUAL ERROR MESSAGE</a><br>
+<br><a name="SEC33" href="#TOC1">OBTAINING A TEXTUAL ERROR MESSAGE</a><br>
<P>
<b>int pcre2_get_error_message(int <i>errorcode</i>, PCRE2_UCHAR *<i>buffer</i>,</b>
<b> PCRE2_SIZE <i>bufflen</i>);</b>
@@ -2841,7 +2878,7 @@
a trailing zero), and the negative error code PCRE2_ERROR_NOMEMORY is returned.
None of the messages are very long; a buffer size of 120 code units is ample.
<a name="extractbynumber"></a></P>
-<br><a name="SEC33" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
+<br><a name="SEC34" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
<P>
<b>int pcre2_substring_length_bynumber(pcre2_match_data *<i>match_data</i>,</b>
<b> uint32_t <i>number</i>, PCRE2_SIZE *<i>length</i>);</b>
@@ -2938,7 +2975,7 @@
(abc)|(def) and the subject is "def", and the ovector contains at least two
capturing slots, substring number 1 is unset.
</P>
-<br><a name="SEC34" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
+<br><a name="SEC35" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
<P>
<b>int pcre2_substring_list_get(pcre2_match_data *<i>match_data</i>,</b>
<b>" PCRE2_UCHAR ***<i>listptr</i>, PCRE2_SIZE **<i>lengthsptr</i>);</b>
@@ -2977,7 +3014,7 @@
appropriate offset in the ovector, which contain PCRE2_UNSET for unset
substrings, or by calling <b>pcre2_substring_length_bynumber()</b>.
<a name="extractbyname"></a></P>
-<br><a name="SEC35" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
+<br><a name="SEC36" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
<P>
<b>int pcre2_substring_number_from_name(const pcre2_code *<i>code</i>,</b>
<b> PCRE2_SPTR <i>name</i>);</b>
@@ -3037,7 +3074,7 @@
numbers. For this reason, the use of different names for subpatterns of the
same number causes an error at compile time.
</P>
-<br><a name="SEC36" href="#TOC1">CREATING A NEW STRING WITH SUBSTITUTIONS</a><br>
+<br><a name="SEC37" href="#TOC1">CREATING A NEW STRING WITH SUBSTITUTIONS</a><br>
<P>
<b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
@@ -3244,7 +3281,7 @@
"Obtaining a textual error message"
<a href="#geterrormessage">above).</a>
</P>
-<br><a name="SEC37" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
+<br><a name="SEC38" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
<P>
<b>int pcre2_substring_nametable_scan(const pcre2_code *<i>code</i>,</b>
<b> PCRE2_SPTR <i>name</i>, PCRE2_SPTR *<i>first</i>, PCRE2_SPTR *<i>last</i>);</b>
@@ -3289,7 +3326,7 @@
relevant entries for the name, you can extract each of their numbers, and hence
the captured data.
</P>
-<br><a name="SEC38" href="#TOC1">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a><br>
+<br><a name="SEC39" href="#TOC1">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a><br>
<P>
The traditional matching function uses a similar algorithm to Perl, which stops
when it finds the first match at a given point in the subject. If you want to
@@ -3307,7 +3344,7 @@
other alternatives. Ultimately, when it runs out of matches,
<b>pcre2_match()</b> will yield PCRE2_ERROR_NOMATCH.
<a name="dfamatch"></a></P>
-<br><a name="SEC39" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
+<br><a name="SEC40" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
<P>
<b>int pcre2_dfa_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
@@ -3503,13 +3540,13 @@
should contain data about the previous partial match. If any of these checks
fail, this error is given.
</P>
-<br><a name="SEC40" href="#TOC1">SEE ALSO</a><br>
+<br><a name="SEC41" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcre2build</b>(3), <b>pcre2callout</b>(3), <b>pcre2demo(3)</b>,
<b>pcre2matching</b>(3), <b>pcre2partial</b>(3), <b>pcre2posix</b>(3),
<b>pcre2sample</b>(3), <b>pcre2unicode</b>(3).
</P>
-<br><a name="SEC41" href="#TOC1">AUTHOR</a><br>
+<br><a name="SEC42" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
@@ -3518,9 +3555,9 @@
Cambridge, England.
<br>
</P>
-<br><a name="SEC42" href="#TOC1">REVISION</a><br>
+<br><a name="SEC43" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 16 June 2017
+Last updated: 10 July 2017
<br>
Copyright © 1997-2017 University of Cambridge.
<br>
Added: code/trunk/doc/html/pcre2convert.html
===================================================================
--- code/trunk/doc/html/pcre2convert.html (rev 0)
+++ code/trunk/doc/html/pcre2convert.html 2017-07-12 16:34:49 UTC (rev 840)
@@ -0,0 +1,190 @@
+<html>
+<head>
+<title>pcre2convert specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2convert man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<ul>
+<li><a name="TOC1" href="#SEC1">EXPERIMENTAL PATTERN CONVERSION FUNCTIONS</a>
+<li><a name="TOC2" href="#SEC2">THE CONVERT CONTEXT</a>
+<li><a name="TOC3" href="#SEC3">THE CONVERSION FUNCTION</a>
+<li><a name="TOC4" href="#SEC4">CONVERTING GLOBS</a>
+<li><a name="TOC5" href="#SEC5">CONVERTING POSIX PATTERNS</a>
+<li><a name="TOC6" href="#SEC6">AUTHOR</a>
+<li><a name="TOC7" href="#SEC7">REVISION</a>
+</ul>
+<br><a name="SEC1" href="#TOC1">EXPERIMENTAL PATTERN CONVERSION FUNCTIONS</a><br>
+<P>
+This document describes a set of functions that can be used to convert
+"foreign" patterns into PCRE2 regular expressions. This facility is currently
+experimental, and may be changed in future releases. Two kinds of pattern,
+globs and POSIX patterns, are supported.
+</P>
+<br><a name="SEC2" href="#TOC1">THE CONVERT CONTEXT</a><br>
+<P>
+<b>pcre2_convert_context *pcre2_convert_context_create(</b>
+<b> pcre2_general_context *<i>gcontext</i>);</b>
+<br>
+<br>
+<b>pcre2_convert_context *pcre2_convert_context_copy(</b>
+<b> pcre2_convert_context *<i>cvcontext</i>);</b>
+<br>
+<br>
+<b>void pcre2_convert_context_free(pcre2_convert_context *<i>cvcontext</i>);</b>
+<br>
+<br>
+<b>int pcre2_set_glob_escape(pcre2_convert_context *<i>cvcontext</i>,</b>
+<b> uint32_t <i>escape_char</i>);</b>
+<br>
+<br>
+<b>int pcre2_set_glob_separator(pcre2_convert_context *<i>cvcontext</i>,</b>
+<b> uint32_t <i>separator_char</i>);</b>
+<br>
+<br>
+A convert context is used to hold parameters that affect the way that pattern
+conversion works. Like all PCRE2 contexts, you need to use a context only if
+you want to override the defaults. There are the usual create, copy, and free
+functions. If custom memory management functions are set in a general context
+that is passed to <b>pcre2_convert_context_create()</b>, they are used for all
+memory management within the conversion functions.
+</P>
+<P>
+There are only two parameters in the convert context at present. Both apply
+only to glob conversions. The escape character defaults to grave accent under
+Windows, otherwise backslash. It can be set to zero, meaning no escape
+character, or to any punctuation character with a code point less than 256.
+The separator character defaults to backslash under Windows, otherwise forward
+slash. It can be set to forward slash, backslash, or dot.
+</P>
+<P>
+The two setting functions return zero on success, or PCRE2_ERROR_BADDATA if
+their second argument is invalid.
+</P>
+<br><a name="SEC3" href="#TOC1">THE CONVERSION FUNCTION</a><br>
+<P>
+<b>int pcre2_pattern_convert(PCRE2_SPTR <i>pattern</i>, PCRE2_SIZE <i>length</i>,</b>
+<b> uint32_t <i>options</i>, PCRE2_UCHAR **<i>buffer</i>,</b>
+<b> PCRE2_SIZE *<i>blength</i>, pcre2_convert_context *<i>cvcontext</i>);</b>
+<br>
+<br>
+<b>void pcre2_converted_pattern_free(PCRE2_UCHAR *<i>converted_pattern</i>);</b>
+<br>
+<br>
+The first two arguments of <b>pcre2_pattern_convert()</b> define the foreign
+pattern that is to be converted. The length may be given as
+PCRE2_ZERO_TERMINATED. The <b>options</b> argument defines how the pattern is to
+be processed. If the input is UTF, the PCRE2_CONVERT_UTF option should be set.
+PCRE2_CONVERT_NO_UTF_CHECK may also be set if you are sure the input is valid.
+One or more of the glob options, or one of the following POSIX options must be
+set to define the type of conversion that is required:
+<pre>
+ PCRE2_CONVERT_GLOB
+ PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR
+ PCRE2_CONVERT_GLOB_NO_STARSTAR
+ PCRE2_CONVERT_POSIX_BASIC
+ PCRE2_CONVERT_POSIX_EXTENDED
+</pre>
+Details of the conversions are given below. The <b>buffer</b> and <b>blength</b>
+arguments define how the output is handled:
+</P>
+<P>
+If <b>buffer</b> is NULL, the function just returns the length of the converted
+pattern via <b>blength</b>. This is one less than the length of buffer needed,
+because a terminating zero is always added to the output.
+</P>
+<P>
+If <b>buffer</b> points to a NULL pointer, an output buffer is obtained using
+the allocator in the context or <b>malloc()</b> if no context is supplied. A
+pointer to this buffer is placed in the variable to which <b>buffer</b> points.
+When no longer needed the output buffer must be freed by calling
+<b>pcre2_converted_pattern_free()</b>.
+</P>
+<P>
+If <b>buffer</b> points to a non-NULL pointer, <b>blength</b> must be set to the
+actual length of the buffer provided (in code units).
+</P>
+<P>
+In all cases, after successful conversion, the variable pointed to by
+<b>blength</b> is updated to the length actually used (in code units), excluding
+the terminating zero that is always added.
+</P>
+<P>
+If an error occurs, the length (via <b>blength</b>) is set to the offset
+within the input pattern where the error was detected. Only gross syntax errors
+are caught; there are plenty of errors that will get passed on for
+<b>pcre2_compile()</b> to discover.
+</P>
+<P>
+The return from <b>pcre2_pattern_convert()</b> is zero on success or a non-zero
+PCRE2 error code. Note that PCRE2 error codes may be positive or negative:
+<b>pcre2_compile()</b> uses mostly positive codes and <b>pcre2_match()</b>
+negative ones; <b>pcre2_convert()</b> uses existing codes of both kinds. A
+textual error message can be obtained by calling
+<b>pcre2_get_error_message()</b>.
+</P>
+<br><a name="SEC4" href="#TOC1">CONVERTING GLOBS</a><br>
+<P>
+Globs are used to match file names, and consequently have the concept of a
+"path separator", which defaults to backslash under Windows and forward slash
+otherwise. If PCRE2_CONVERT_GLOB is set, the wildcards * and ? are not
+permitted to match separator characters, but the double-star (**) feature
+(which does match separators) is supported.
+</P>
+<P>
+PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR matches globs with wildcards allowed to
+match separator characters. PCRE2_GLOB_NO_STARSTAR matches globs with the
+double-star feature disabled. These options may be given together.
+</P>
+<br><a name="SEC5" href="#TOC1">CONVERTING POSIX PATTERNS</a><br>
+<P>
+POSIX defines two kinds of regular expression pattern: basic and extended.
+These can be processed by setting PCRE2_CONVERT_POSIX_BASIC or
+PCRE2_CONVERT_POSIX_EXTENDED, respectively.
+</P>
+<P>
+In POSIX patterns, backslash is not special in a character class. Unmatched
+closing parentheses are treated as literals.
+</P>
+<P>
+In basic patterns, ? + | {} and () must be escaped to be recognized
+as metacharacters outside a character class. If the first character in the
+pattern is * it is treated as a literal. ^ is a metacharacter only at the start
+of a branch.
+</P>
+<P>
+In extended patterns, a backslash not in a character class always
+makes the next character literal, whatever it is. There are no backreferences.
+</P>
+<P>
+Note: POSIX mandates that the longest possible match at the first matching
+position must be found. This is not what <b>pcre2_match()</b> does; it yields
+the first match that is found. An application can use <b>pcre2_dfa_match()</b>
+to find the longest match, but that does not support backreferences (but then
+neither do POSIX extended patterns).
+</P>
+<br><a name="SEC6" href="#TOC1">AUTHOR</a><br>
+<P>
+Philip Hazel
+<br>
+University Computing Service
+<br>
+Cambridge, England.
+<br>
+</P>
+<br><a name="SEC7" href="#TOC1">REVISION</a><br>
+<P>
+Last updated: 12 July 2017
+<br>
+Copyright © 1997-2017 University of Cambridge.
+<br>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
Modified: code/trunk/doc/html/pcre2test.html
===================================================================
--- code/trunk/doc/html/pcre2test.html 2017-07-05 08:55:49 UTC (rev 839)
+++ code/trunk/doc/html/pcre2test.html 2017-07-12 16:34:49 UTC (rev 840)
@@ -630,6 +630,10 @@
bsr=[anycrlf|unicode] specify \R handling
/B bincode show binary code without lengths
callout_info show callout information
+ convert=<options> request foreign pattern conversion
+ convert_glob_escape=c set glob escape character
+ convert_glob_separator=c set glob separator character
+ convert_length set convert buffer length
debug same as info,fullbincode
framesize show matching frame size
fullbincode show binary code with lengths
@@ -1065,6 +1069,41 @@
<b>replace</b>, which causes an error. Note that <b>jitverify</b>, which is
allowed, does not carry through to any subsequent matching that uses a stacked
pattern.
+</P>
+<br><b>
+Testing foreign pattern conversion
+</b><br>
+<P>
+The experimental foreign pattern conversion functions in PCRE2 can be tested by
+setting the <b>convert</b> modifier. Its argument is a colon-separated list of
+options, which set the equivalent option for the <b>pcre2_pattern_convert()</b>
+function:
+<pre>
+ glob PCRE2_CONVERT_GLOB
+ glob_no_starstar PCRE2_CONVERT_GLOB_NO_STARSTAR
+ glob_no_wild_separator PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR
+ posix_basic PCRE2_CONVERT_POSIX_BASIC
+ posix_extended PCRE2_CONVERT_POSIX_EXTENDED
+ unset Unset all options
+</pre>
+The "unset" value is useful for turning off a default that has been set by a
+<b>#pattern</b> command. When one of these options is set, the input pattern is
+passed to <b>pcre2_pattern_convert()</b>. If the conversion is successful, the
+result is reflected in the output and then passed to <b>pcre2_compile()</b>. The
+normal <b>utf</b> and <b>no_utf_check</b> options, if set, cause the
+PCRE2_CONVERT_UTF and PCRE2_CONVERT_NO_UTF_CHECK options to be passed to
+<b>pcre2_pattern_convert()</b>.
+</P>
+<P>
+By default, the conversion function is allowed to allocate a buffer for its
+output. However, if the <b>convert_length</b> modifier is set to a value greater
+than zero, <b>pcre2test</b> passes a buffer of the given length. This makes it
+possible to test the length check.
+</P>
+<P>
+The <b>convert_glob_escape</b> and <b>convert_glob_separator</b> modifiers can be
+used to specify the escape and separator characters for glob processing,
+overriding the defaults, which are operating-system dependent.
<a name="subjectmodifiers"></a></P>
<br><a name="SEC11" href="#TOC1">SUBJECT MODIFIERS</a><br>
<P>
@@ -1866,7 +1905,7 @@
</P>
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 02 July 2017
+Last updated: 12 July 2017
<br>
Copyright © 1997-2017 University of Cambridge.
<br>
Modified: code/trunk/doc/index.html.src
===================================================================
--- code/trunk/doc/index.html.src 2017-07-05 08:55:49 UTC (rev 839)
+++ code/trunk/doc/index.html.src 2017-07-12 16:34:49 UTC (rev 840)
@@ -35,6 +35,9 @@
<tr><td><a href="pcre2compat.html">pcre2compat</a></td>
<td> Compability with Perl</td></tr>
+<tr><td><a href="pcre2convert.html">pcre2convert</a></td>
+ <td> Experimental foreign pattern conversion functions</td></tr>
+
<tr><td><a href="pcre2demo.html">pcre2demo</a></td>
<td> A demonstration C program that uses the PCRE2 library</td></tr>
@@ -112,6 +115,18 @@
<tr><td><a href="pcre2_config.html">pcre2_config</a></td>
<td> Show build-time configuration options</td></tr>
+<tr><td><a href="pcre2_convert_context_copy.html">pcre2_convert_context_copy</a></td>
+ <td> Copy a convert context</td></tr>
+
+<tr><td><a href="pcre2_convert_context_create.html">pcre2_convert_context_create</a></td>
+ <td> Create a convert context</td></tr>
+
+<tr><td><a href="pcre2_convert_context_free.html">pcre2_convert_context_free</a></td>
+ <td> Free a convert context</td></tr>
+
+<tr><td><a href="pcre2_converted_pattern_free.html">pcre2_converted_pattern_free</a></td>
+ <td> Free converted foreign pattern</td></tr>
+
<tr><td><a href="pcre2_dfa_match.html">pcre2_dfa_match</a></td>
<td> Match a compiled pattern to a subject string
(DFA algorithm; <i>not</i> Perl compatible)</td></tr>
@@ -183,6 +198,9 @@
<tr><td><a href="pcre2_match_data_free.html">pcre2_match_data_free</a></td>
<td> Free a match data block</td></tr>
+<tr><td><a href="pcre2_pattern_convert.html">pcre2_pattern_convert</a></td>
+ <td> Experimental foreign pattern converter</td></tr>
+
<tr><td><a href="pcre2_pattern_info.html">pcre2_pattern_info</a></td>
<td> Extract information about a pattern</td></tr>
@@ -216,6 +234,12 @@
<tr><td><a href="pcre2_set_depth_limit.html">pcre2_set_depth_limit</a></td>
<td> Set the match backtracking depth limit</td></tr>
+<tr><td><a href="pcre2_set_glob_escape.html">pcre2_set_glob_escape</a></td>
+ <td> Set glob escape character</td></tr>
+
+<tr><td><a href="pcre2_set_glob_separator.html">pcre2_set_glob_separator</a></td>
+ <td> Set glob separator character</td></tr>
+
<tr><td><a href="pcre2_set_heap_limit.html">pcre2_set_heap_limit</a></td>
<td> Set the match backtracking heap limit</td></tr>
Modified: code/trunk/doc/pcre2.txt
===================================================================
--- code/trunk/doc/pcre2.txt 2017-07-05 08:55:49 UTC (rev 839)
+++ code/trunk/doc/pcre2.txt 2017-07-12 16:34:49 UTC (rev 840)
@@ -413,6 +413,35 @@
needed and has no effect (it always returns zero).
+PCRE2 EXPERIMENTAL PATTERN CONVERSION FUNCTIONS
+
+ pcre2_convert_context *pcre2_convert_context_create(
+ pcre2_general_context *gcontext);
+
+ pcre2_convert_context *pcre2_convert_context_copy(
+ pcre2_convert_context *cvcontext);
+
+ void pcre2_convert_context_free(pcre2_convert_context *cvcontext);
+
+ int pcre2_set_glob_escape(pcre2_convert_context *cvcontext,
+ uint32_t escape_char);
+
+ int pcre2_set_glob_separator(pcre2_convert_context *cvcontext,
+ uint32_t separator_char);
+
+ int pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE length,
+ uint32_t options, PCRE2_UCHAR **buffer,
+ PCRE2_SIZE *blength, pcre2_convert_context *cvcontext);
+
+ void pcre2_converted_pattern_free(PCRE2_UCHAR *converted_pattern);
+
+ These functions provide a way of converting non-PCRE2 patterns into
+ patterns that can be processed by pcre2_compile(). This facility is
+ experimental and may be changed in future releases. At present, "globs"
+ and POSIX basic and extended patterns can be converted. Details are
+ given in the pcre2convert documentation.
+
+
PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES
There are three PCRE2 libraries, supporting 8-bit, 16-bit, and 32-bit
@@ -3400,7 +3429,7 @@
REVISION
- Last updated: 16 June 2017
+ Last updated: 10 July 2017
Copyright (c) 1997-2017 University of Cambridge.
------------------------------------------------------------------------------
Added: code/trunk/doc/pcre2_convert_context_copy.3
===================================================================
--- code/trunk/doc/pcre2_convert_context_copy.3 (rev 0)
+++ code/trunk/doc/pcre2_convert_context_copy.3 2017-07-12 16:34:49 UTC (rev 840)
@@ -0,0 +1,26 @@
+.TH PCRE2_CONVERT_CONTEXT_COPY 3 "10 July 2017" "PCRE2 10.30"
+.SH NAME
+PCRE2 - Perl-compatible regular expressions (revised API)
+.SH SYNOPSIS
+.rs
+.sp
+.B #include <pcre2.h>
+.PP
+.nf
+.B pcre2_convert_context *pcre2_convert_context_copy(
+.B " pcre2_convert_context *\fIcvcontext\fP);"
+.fi
+.
+.SH DESCRIPTION
+.rs
+.sp
+This function is part of an experimental set of pattern conversion functions.
+It makes a new copy of a convert context, using the memory allocation function
+that was used for the original context. The result is NULL if the memory cannot
+be obtained.
+.P
+The pattern conversion functions are described in the
+.\" HREF
+\fBpcre2convert\fP
+.\"
+documentation.
Added: code/trunk/doc/pcre2_convert_context_create.3
===================================================================
--- code/trunk/doc/pcre2_convert_context_create.3 (rev 0)
+++ code/trunk/doc/pcre2_convert_context_create.3 2017-07-12 16:34:49 UTC (rev 840)
@@ -0,0 +1,27 @@
+.TH PCRE2_CONVERT_CONTEXT_CREATE 3 "10 July 2017" "PCRE2 10.30"
+.SH NAME
+PCRE2 - Perl-compatible regular expressions (revised API)
+.SH SYNOPSIS
+.rs
+.sp
+.B #include <pcre2.h>
+.PP
+.nf
+.B pcre2_convert_context *pcre2_convert_context_create(
+.B " pcre2_general_context *\fIgcontext\fP);"
+.fi
+.
+.SH DESCRIPTION
+.rs
+.sp
+This function is part of an experimental set of pattern conversion functions.
+It creates and initializes a new convert context. If its argument is
+NULL, \fBmalloc()\fP is used to get the necessary memory; otherwise the memory
+allocation function within the general context is used. The result is NULL if
+the memory could not be obtained.
+.P
+The pattern conversion functions are described in the
+.\" HREF
+\fBpcre2convert\fP
+.\"
+documentation.
Added: code/trunk/doc/pcre2_convert_context_free.3
===================================================================
--- code/trunk/doc/pcre2_convert_context_free.3 (rev 0)
+++ code/trunk/doc/pcre2_convert_context_free.3 2017-07-12 16:34:49 UTC (rev 840)
@@ -0,0 +1,25 @@
+.TH PCRE2_CONVERT_CONTEXT_FREE 3 "10 July 2017" "PCRE2 10.30"
+.SH NAME
+PCRE2 - Perl-compatible regular expressions (revised API)
+.SH SYNOPSIS
+.rs
+.sp
+.B #include <pcre2.h>
+.PP
+.nf
+.B void pcre2_convert_context_free(pcre2_convert_context *\fIcvcontext\fP);
+.fi
+.
+.SH DESCRIPTION
+.rs
+.sp
+This function is part of an experimental set of pattern conversion functions.
+It frees the memory occupied by a convert context, using the memory
+freeing function from the general context with which it was created, or
+\fBfree()\fP if that was not set.
+.P
+The pattern conversion functions are described in the
+.\" HREF
+\fBpcre2convert\fP
+.\"
+documentation.
Added: code/trunk/doc/pcre2_converted_pattern_free.3
===================================================================
--- code/trunk/doc/pcre2_converted_pattern_free.3 (rev 0)
+++ code/trunk/doc/pcre2_converted_pattern_free.3 2017-07-12 16:34:49 UTC (rev 840)
@@ -0,0 +1,25 @@
+.TH PCRE2_CONVERTED_PATTERN_FREE 3 "11 July 2017" "PCRE2 10.30"
+.SH NAME
+PCRE2 - Perl-compatible regular expressions (revised API)
+.SH SYNOPSIS
+.rs
+.sp
+.B #include <pcre2.h>
+.PP
+.nf
+.B void pcre2_converted_pattern_free(PCRE2_UCHAR *\fIconverted_pattern\fP);
+.fi
+.
+.SH DESCRIPTION
+.rs
+.sp
+This function is part of an experimental set of pattern conversion functions.
+It frees the memory occupied by a converted pattern that was obtained by
+calling \fBpcre2_pattern_convert()\fP with arguments that caused it to place
+the converted pattern into newly obtained heap memory.
+.P
+The pattern conversion functions are described in the
+.\" HREF
+\fBpcre2convert\fP
+.\"
+documentation.
Added: code/trunk/doc/pcre2_pattern_convert.3
===================================================================
--- code/trunk/doc/pcre2_pattern_convert.3 (rev 0)
+++ code/trunk/doc/pcre2_pattern_convert.3 2017-07-12 16:34:49 UTC (rev 840)
@@ -0,0 +1,55 @@
+.TH PCRE2_PATTERN_CONVERT 3 "11 July 2017" "PCRE2 10.30"
+.SH NAME
+PCRE2 - Perl-compatible regular expressions (revised API)
+.SH SYNOPSIS
+.rs
+.sp
+.B #include <pcre2.h>
+.PP
+.nf
+.B int pcre2_pattern_convert(PCRE2_SPTR \fIpattern\fP, PCRE2_SIZE \fIlength\fP,
+.B " uint32_t \fIoptions\fP, PCRE2_UCHAR **\fIbuffer\fP,"
+.B " PCRE2_SIZE *\fIblength\fP, pcre2_convert_context *\fIcvcontext\fP);"
+.fi
+.
+.SH DESCRIPTION
+.rs
+.sp
+This function is part of an experimental set of pattern conversion functions.
+It converts a foreign pattern (for example, a glob) into a PCRE2 regular
+expression pattern. Its arguments are:
+.sp
+ \fIpattern\fP The foreign pattern
+ \fIlength\fP The length of the input pattern or PCRE2_ZERO_TERMINATED
+ \fIoptions\fP Option bits
+ \fIbuffer\fP Pointer to pointer to output buffer, or NULL
+ \fIblength\fP Pointer to output length field
+ \fIcvcontext\fP Pointer to a convert context or NULL
+.sp
+The length of the converted pattern (excluding the terminating zero) is
+returned via \fIblength\fP. If \fIbuffer\fP is NULL, the function just returns
+the output length. If \fIbuffer\fP points to a NULL pointer, heap memory is
+obtained for the converted pattern, using the allocator in the context if
+present (or else \fBmalloc()\fP), and the field pointed to by \fIbuffer\fP is
+updated. If \fIbuffer\fP points to a non-NULL field, that must point to a
+buffer whose size is in the variable pointed to by \fIblength\fP. This value is
+updated.
+.P
+The option bits are:
+.sp
+ PCRE2_CONVERT_UTF Input is UTF
+ PCRE2_CONVERT_NO_UTF_CHECK Do not check UTF validity
+ PCRE2_CONVERT_POSIX_BASIC Convert POSIX basic pattern
+ PCRE2_CONVERT_POSIX_EXTENDED Convert POSIX extended pattern
+ PCRE2_CONVERT_GLOB ) Convert
+ PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR ) various types
+ PCRE2_CONVERT_GLOB_NO_STARSTAR ) of glob
+.sp
+The return value from \fBpcre2_pattern_convert()\fP is zero on success or a
+non-zero PCRE2 error code.
+.P
+The pattern conversion functions are described in the
+.\" HREF
+\fBpcre2convert\fP
+.\"
+documentation.
Added: code/trunk/doc/pcre2_set_glob_escape.3
===================================================================
--- code/trunk/doc/pcre2_set_glob_escape.3 (rev 0)
+++ code/trunk/doc/pcre2_set_glob_escape.3 2017-07-12 16:34:49 UTC (rev 840)
@@ -0,0 +1,29 @@
+.TH PCRE2_SET_GLOB_ESCAPE 3 "11 July 2017" "PCRE2 10.30"
+.SH NAME
+PCRE2 - Perl-compatible regular expressions (revised API)
+.SH SYNOPSIS
+.rs
+.sp
+.B #include <pcre2.h>
+.PP
+.nf
+.B int pcre2_set_glob_escape(pcre2_convert_context *\fIcvcontext\fP,
+.B " uint32_t \fIescape_char\fP);"
+.fi
+.
+.SH DESCRIPTION
+.rs
+.sp
+This function is part of an experimental set of pattern conversion functions.
+It sets the escape character that is used when converting globs. The second
+argument must either be zero (meaning there is no escape character) or a
+punctuation character whose code point is less than 256. The default is grave
+accent if running under Windows, otherwise backslash. The result of the
+function is zero for success or PCRE2_ERROR_BADDATA if the second argument is
+invalid.
+.P
+The pattern conversion functions are described in the
+.\" HREF
+\fBpcre2convert\fP
+.\"
+documentation.
Added: code/trunk/doc/pcre2_set_glob_separator.3
===================================================================
--- code/trunk/doc/pcre2_set_glob_separator.3 (rev 0)
+++ code/trunk/doc/pcre2_set_glob_separator.3 2017-07-12 16:34:49 UTC (rev 840)
@@ -0,0 +1,28 @@
+.TH PCRE2_SET_GLOB_SEPARATOR 3 "11 July 2017" "PCRE2 10.30"
+.SH NAME
+PCRE2 - Perl-compatible regular expressions (revised API)
+.SH SYNOPSIS
+.rs
+.sp
+.B #include <pcre2.h>
+.PP
+.nf
+.B int pcre2_set_glob_separator(pcre2_convert_context *\fIcvcontext\fP,
+.B " uint32_t \fIseparator_char\fP);"
+.fi
+.
+.SH DESCRIPTION
+.rs
+.sp
+This function is part of an experimental set of pattern conversion functions.
+It sets the component separator character that is used when converting globs.
+The second argument must one of the characters forward slash, backslash, or
+dot. The default is backslash when running under Windows, otherwise forward
+slash. The result of the function is zero for success or PCRE2_ERROR_BADDATA if
+the second argument is invalid.
+.P
+The pattern conversion functions are described in the
+.\" HREF
+\fBpcre2convert\fP
+.\"
+documentation.
Modified: code/trunk/doc/pcre2api.3
===================================================================
--- code/trunk/doc/pcre2api.3 2017-07-05 08:55:49 UTC (rev 839)
+++ code/trunk/doc/pcre2api.3 2017-07-12 16:34:49 UTC (rev 840)
@@ -1,4 +1,4 @@
-.TH PCRE2API 3 "16 June 2017" "PCRE2 10.30"
+.TH PCRE2API 3 "10 July 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.sp
@@ -272,6 +272,41 @@
has no effect (it always returns zero).
.
.
+.SH "PCRE2 EXPERIMENTAL PATTERN CONVERSION FUNCTIONS"
+.rs
+.sp
+.nf
+.B pcre2_convert_context *pcre2_convert_context_create(
+.B " pcre2_general_context *\fIgcontext\fP);"
+.sp
+.B pcre2_convert_context *pcre2_convert_context_copy(
+.B " pcre2_convert_context *\fIcvcontext\fP);"
+.sp
+.B void pcre2_convert_context_free(pcre2_convert_context *\fIcvcontext\fP);
+.sp
+.B int pcre2_set_glob_escape(pcre2_convert_context *\fIcvcontext\fP,
+.B " uint32_t \fIescape_char\fP);"
+.sp
+.B int pcre2_set_glob_separator(pcre2_convert_context *\fIcvcontext\fP,
+.B " uint32_t \fIseparator_char\fP);"
+.sp
+.B int pcre2_pattern_convert(PCRE2_SPTR \fIpattern\fP, PCRE2_SIZE \fIlength\fP,
+.B " uint32_t \fIoptions\fP, PCRE2_UCHAR **\fIbuffer\fP,"
+.B " PCRE2_SIZE *\fIblength\fP, pcre2_convert_context *\fIcvcontext\fP);"
+.sp
+.B void pcre2_converted_pattern_free(PCRE2_UCHAR *\fIconverted_pattern\fP);
+.fi
+.sp
+These functions provide a way of converting non-PCRE2 patterns into
+patterns that can be processed by \fBpcre2_compile()\fP. This facility is
+experimental and may be changed in future releases. At present, "globs" and
+POSIX basic and extended patterns can be converted. Details are given in the
+.\" HREF
+\fBpcre2convert\fP
+.\"
+documentation.
+.
+.
.SH "PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES"
.rs
.sp
@@ -1695,7 +1730,7 @@
PCRE2_EXTRA_MATCH_LINE
.sp
This option is provided for use by the \fB-x\fP option of \fBpcre2grep\fP. It
-causes the pattern only to match complete lines. This is achieved by
+causes the pattern only to match complete lines. This is achieved by
automatically inserting the code for "^(?:" at the start of the compiled
pattern and ")$" at the end. Thus, when PCRE2_MULTILINE is set, the matched
line may be in the middle of the subject string. This option can be used with
@@ -3539,6 +3574,6 @@
.rs
.sp
.nf
-Last updated: 16 June 2017
+Last updated: 10 July 2017
Copyright (c) 1997-2017 University of Cambridge.
.fi
Added: code/trunk/doc/pcre2convert.3
===================================================================
--- code/trunk/doc/pcre2convert.3 (rev 0)
+++ code/trunk/doc/pcre2convert.3 2017-07-12 16:34:49 UTC (rev 840)
@@ -0,0 +1,163 @@
+.TH PCRE2CONVERT 3 "12 July 2017" "PCRE2 10.30"
+.SH NAME
+PCRE2 - Perl-compatible regular expressions (revised API)
+.SH "EXPERIMENTAL PATTERN CONVERSION FUNCTIONS"
+.rs
+.sp
+This document describes a set of functions that can be used to convert
+"foreign" patterns into PCRE2 regular expressions. This facility is currently
+experimental, and may be changed in future releases. Two kinds of pattern,
+globs and POSIX patterns, are supported.
+.
+.
+.SH "THE CONVERT CONTEXT"
+.rs
+.sp
+.nf
+.B pcre2_convert_context *pcre2_convert_context_create(
+.B " pcre2_general_context *\fIgcontext\fP);"
+.sp
+.B pcre2_convert_context *pcre2_convert_context_copy(
+.B " pcre2_convert_context *\fIcvcontext\fP);"
+.sp
+.B void pcre2_convert_context_free(pcre2_convert_context *\fIcvcontext\fP);
+.sp
+.B int pcre2_set_glob_escape(pcre2_convert_context *\fIcvcontext\fP,
+.B " uint32_t \fIescape_char\fP);"
+.sp
+.B int pcre2_set_glob_separator(pcre2_convert_context *\fIcvcontext\fP,
+.B " uint32_t \fIseparator_char\fP);"
+.fi
+.sp
+A convert context is used to hold parameters that affect the way that pattern
+conversion works. Like all PCRE2 contexts, you need to use a context only if
+you want to override the defaults. There are the usual create, copy, and free
+functions. If custom memory management functions are set in a general context
+that is passed to \fBpcre2_convert_context_create()\fP, they are used for all
+memory management within the conversion functions.
+.P
+There are only two parameters in the convert context at present. Both apply
+only to glob conversions. The escape character defaults to grave accent under
+Windows, otherwise backslash. It can be set to zero, meaning no escape
+character, or to any punctuation character with a code point less than 256.
+The separator character defaults to backslash under Windows, otherwise forward
+slash. It can be set to forward slash, backslash, or dot.
+.P
+The two setting functions return zero on success, or PCRE2_ERROR_BADDATA if
+their second argument is invalid.
+.
+.
+.SH "THE CONVERSION FUNCTION"
+.rs
+.sp
+.nf
+.B int pcre2_pattern_convert(PCRE2_SPTR \fIpattern\fP, PCRE2_SIZE \fIlength\fP,
+.B " uint32_t \fIoptions\fP, PCRE2_UCHAR **\fIbuffer\fP,"
+.B " PCRE2_SIZE *\fIblength\fP, pcre2_convert_context *\fIcvcontext\fP);"
+.sp
+.B void pcre2_converted_pattern_free(PCRE2_UCHAR *\fIconverted_pattern\fP);
+.fi
+.sp
+The first two arguments of \fBpcre2_pattern_convert()\fP define the foreign
+pattern that is to be converted. The length may be given as
+PCRE2_ZERO_TERMINATED. The \fBoptions\fP argument defines how the pattern is to
+be processed. If the input is UTF, the PCRE2_CONVERT_UTF option should be set.
+PCRE2_CONVERT_NO_UTF_CHECK may also be set if you are sure the input is valid.
+One or more of the glob options, or one of the following POSIX options must be
+set to define the type of conversion that is required:
+.sp
+ PCRE2_CONVERT_GLOB
+ PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR
+ PCRE2_CONVERT_GLOB_NO_STARSTAR
+ PCRE2_CONVERT_POSIX_BASIC
+ PCRE2_CONVERT_POSIX_EXTENDED
+.sp
+Details of the conversions are given below. The \fBbuffer\fP and \fBblength\fP
+arguments define how the output is handled:
+.P
+If \fBbuffer\fP is NULL, the function just returns the length of the converted
+pattern via \fBblength\fP. This is one less than the length of buffer needed,
+because a terminating zero is always added to the output.
+.P
+If \fBbuffer\fP points to a NULL pointer, an output buffer is obtained using
+the allocator in the context or \fBmalloc()\fP if no context is supplied. A
+pointer to this buffer is placed in the variable to which \fBbuffer\fP points.
+When no longer needed the output buffer must be freed by calling
+\fBpcre2_converted_pattern_free()\fP.
+.P
+If \fBbuffer\fP points to a non-NULL pointer, \fBblength\fP must be set to the
+actual length of the buffer provided (in code units).
+.P
+In all cases, after successful conversion, the variable pointed to by
+\fBblength\fP is updated to the length actually used (in code units), excluding
+the terminating zero that is always added.
+.P
+If an error occurs, the length (via \fBblength\fP) is set to the offset
+within the input pattern where the error was detected. Only gross syntax errors
+are caught; there are plenty of errors that will get passed on for
+\fBpcre2_compile()\fP to discover.
+.P
+The return from \fBpcre2_pattern_convert()\fP is zero on success or a non-zero
+PCRE2 error code. Note that PCRE2 error codes may be positive or negative:
+\fBpcre2_compile()\fP uses mostly positive codes and \fBpcre2_match()\fP
+negative ones; \fBpcre2_convert()\fP uses existing codes of both kinds. A
+textual error message can be obtained by calling
+\fBpcre2_get_error_message()\fP.
+.
+.
+.SH "CONVERTING GLOBS"
+.rs
+.sp
+Globs are used to match file names, and consequently have the concept of a
+"path separator", which defaults to backslash under Windows and forward slash
+otherwise. If PCRE2_CONVERT_GLOB is set, the wildcards * and ? are not
+permitted to match separator characters, but the double-star (**) feature
+(which does match separators) is supported.
+.P
+PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR matches globs with wildcards allowed to
+match separator characters. PCRE2_GLOB_NO_STARSTAR matches globs with the
+double-star feature disabled. These options may be given together.
+.
+.
+.SH "CONVERTING POSIX PATTERNS"
+.rs
+.sp
+POSIX defines two kinds of regular expression pattern: basic and extended.
+These can be processed by setting PCRE2_CONVERT_POSIX_BASIC or
+PCRE2_CONVERT_POSIX_EXTENDED, respectively.
+.P
+In POSIX patterns, backslash is not special in a character class. Unmatched
+closing parentheses are treated as literals.
+.P
+In basic patterns, ? + | {} and () must be escaped to be recognized
+as metacharacters outside a character class. If the first character in the
+pattern is * it is treated as a literal. ^ is a metacharacter only at the start
+of a branch.
+.P
+In extended patterns, a backslash not in a character class always
+makes the next character literal, whatever it is. There are no backreferences.
+.P
+Note: POSIX mandates that the longest possible match at the first matching
+position must be found. This is not what \fBpcre2_match()\fP does; it yields
+the first match that is found. An application can use \fBpcre2_dfa_match()\fP
+to find the longest match, but that does not support backreferences (but then
+neither do POSIX extended patterns).
+.
+.
+.SH AUTHOR
+.rs
+.sp
+.nf
+Philip Hazel
+University Computing Service
+Cambridge, England.
+.fi
+.
+.
+.SH REVISION
+.rs
+.sp
+.nf
+Last updated: 12 July 2017
+Copyright (c) 1997-2017 University of Cambridge.
+.fi
Modified: code/trunk/doc/pcre2test.1
===================================================================
--- code/trunk/doc/pcre2test.1 2017-07-05 08:55:49 UTC (rev 839)
+++ code/trunk/doc/pcre2test.1 2017-07-12 16:34:49 UTC (rev 840)
@@ -1,4 +1,4 @@
-.TH PCRE2TEST 1 "02 July 2017" "PCRE 10.30"
+.TH PCRE2TEST 1 "12 July 2017" "PCRE 10.30"
.SH NAME
pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS
@@ -592,6 +592,10 @@
bsr=[anycrlf|unicode] specify \eR handling
/B bincode show binary code without lengths
callout_info show callout information
+ convert=<options> request foreign pattern conversion
+ convert_glob_escape=c set glob escape character
+ convert_glob_separator=c set glob separator character
+ convert_length set convert buffer length
debug same as info,fullbincode
framesize show matching frame size
fullbincode show binary code with lengths
@@ -1035,6 +1039,39 @@
pattern.
.
.
+.SS "Testing foreign pattern conversion"
+.rs
+.sp
+The experimental foreign pattern conversion functions in PCRE2 can be tested by
+setting the \fBconvert\fP modifier. Its argument is a colon-separated list of
+options, which set the equivalent option for the \fBpcre2_pattern_convert()\fP
+function:
+.sp
+ glob PCRE2_CONVERT_GLOB
+ glob_no_starstar PCRE2_CONVERT_GLOB_NO_STARSTAR
+ glob_no_wild_separator PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR
+ posix_basic PCRE2_CONVERT_POSIX_BASIC
+ posix_extended PCRE2_CONVERT_POSIX_EXTENDED
+ unset Unset all options
+.sp
+The "unset" value is useful for turning off a default that has been set by a
+\fB#pattern\fP command. When one of these options is set, the input pattern is
+passed to \fBpcre2_pattern_convert()\fP. If the conversion is successful, the
+result is reflected in the output and then passed to \fBpcre2_compile()\fP. The
+normal \fButf\fP and \fBno_utf_check\fP options, if set, cause the
+PCRE2_CONVERT_UTF and PCRE2_CONVERT_NO_UTF_CHECK options to be passed to
+\fBpcre2_pattern_convert()\fP.
+.P
+By default, the conversion function is allowed to allocate a buffer for its
+output. However, if the \fBconvert_length\fP modifier is set to a value greater
+than zero, \fBpcre2test\fP passes a buffer of the given length. This makes it
+possible to test the length check.
+.P
+The \fBconvert_glob_escape\fP and \fBconvert_glob_separator\fP modifiers can be
+used to specify the escape and separator characters for glob processing,
+overriding the defaults, which are operating-system dependent.
+.
+.
.\" HTML <a name="subjectmodifiers"></a>
.SH "SUBJECT MODIFIERS"
.rs
@@ -1850,6 +1887,6 @@
.rs
.sp
.nf
-Last updated: 02 July 2017
+Last updated: 12 July 2017
Copyright (c) 1997-2017 University of Cambridge.
.fi
Modified: code/trunk/doc/pcre2test.txt
===================================================================
--- code/trunk/doc/pcre2test.txt 2017-07-05 08:55:49 UTC (rev 839)
+++ code/trunk/doc/pcre2test.txt 2017-07-12 16:34:49 UTC (rev 840)
@@ -570,6 +570,10 @@
bsr=[anycrlf|unicode] specify \R handling
/B bincode show binary code without lengths
callout_info show callout information
+ convert=<options> request foreign pattern conversion
+ convert_glob_escape=c set glob escape character
+ convert_glob_separator=c set glob separator character
+ convert_length set convert buffer length
debug same as info,fullbincode
framesize show matching frame size
fullbincode show binary code with lengths
@@ -953,7 +957,38 @@
that jitverify, which is allowed, does not carry through to any subse-
quent matching that uses a stacked pattern.
+ Testing foreign pattern conversion
+ The experimental foreign pattern conversion functions in PCRE2 can be
+ tested by setting the convert modifier. Its argument is a colon-sepa-
+ rated list of options, which set the equivalent option for the
+ pcre2_pattern_convert() function:
+
+ glob PCRE2_CONVERT_GLOB
+ glob_no_starstar PCRE2_CONVERT_GLOB_NO_STARSTAR
+ glob_no_wild_separator PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR
+ posix_basic PCRE2_CONVERT_POSIX_BASIC
+ posix_extended PCRE2_CONVERT_POSIX_EXTENDED
+ unset Unset all options
+
+ The "unset" value is useful for turning off a default that has been set
+ by a #pattern command. When one of these options is set, the input pat-
+ tern is passed to pcre2_pattern_convert(). If the conversion is suc-
+ cessful, the result is reflected in the output and then passed to
+ pcre2_compile(). The normal utf and no_utf_check options, if set, cause
+ the PCRE2_CONVERT_UTF and PCRE2_CONVERT_NO_UTF_CHECK options to be
+ passed to pcre2_pattern_convert().
+
+ By default, the conversion function is allowed to allocate a buffer for
+ its output. However, if the convert_length modifier is set to a value
+ greater than zero, pcre2test passes a buffer of the given length. This
+ makes it possible to test the length check.
+
+ The convert_glob_escape and convert_glob_separator modifiers can be
+ used to specify the escape and separator characters for glob process-
+ ing, overriding the defaults, which are operating-system dependent.
+
+
SUBJECT MODIFIERS
The modifiers that can appear in subject lines and the #subject command
@@ -1692,5 +1727,5 @@
REVISION
- Last updated: 02 July 2017
+ Last updated: 12 July 2017
Copyright (c) 1997-2017 University of Cambridge.
Modified: code/trunk/src/pcre2.h
===================================================================
--- code/trunk/src/pcre2.h 2017-07-05 08:55:49 UTC (rev 839)
+++ code/trunk/src/pcre2.h 2017-07-12 16:34:49 UTC (rev 840)
@@ -193,8 +193,6 @@
#define PCRE2_CONVERT_GLOB 0x00000010u
#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000030u
#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000050u
-#define PCRE2_CONVERT_GLOB_BASIC 0x00000070u
-#define PCRE2_CONVERT_GLOB_IGNORE_DOT_START 0x00000080u
/* Newline and \R settings, for use in compile contexts. The newline values
must be kept in step with values set in config.h and both sets must all be
Modified: code/trunk/src/pcre2.h.in
===================================================================
--- code/trunk/src/pcre2.h.in 2017-07-05 08:55:49 UTC (rev 839)
+++ code/trunk/src/pcre2.h.in 2017-07-12 16:34:49 UTC (rev 840)
@@ -193,8 +193,6 @@
#define PCRE2_CONVERT_GLOB 0x00000010u
#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000030u
#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000050u
-#define PCRE2_CONVERT_GLOB_BASIC 0x00000070u
-#define PCRE2_CONVERT_GLOB_IGNORE_DOT_START 0x00000080u
/* Newline and \R settings, for use in compile contexts. The newline values
must be kept in step with values set in config.h and both sets must all be
Modified: code/trunk/src/pcre2_convert.c
===================================================================
--- code/trunk/src/pcre2_convert.c 2017-07-05 08:55:49 UTC (rev 839)
+++ code/trunk/src/pcre2_convert.c 2017-07-12 16:34:49 UTC (rev 840)
@@ -49,7 +49,6 @@
PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED)
#define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \
- PCRE2_CONVERT_GLOB_IGNORE_DOT_START| \
PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \
PCRE2_CONVERT_GLOB_NO_STARSTAR| \
TYPE_OPTIONS)
Modified: code/trunk/src/pcre2test.c
===================================================================
--- code/trunk/src/pcre2test.c 2017-07-05 08:55:49 UTC (rev 839)
+++ code/trunk/src/pcre2test.c 2017-07-12 16:34:49 UTC (rev 840)
@@ -401,8 +401,6 @@
static convertstruct convertlist[] = {
{ "glob", PCRE2_CONVERT_GLOB },
- { "glob_basic", PCRE2_CONVERT_GLOB_BASIC },
- { "glob_ignore_dot_start", PCRE2_CONVERT_GLOB_IGNORE_DOT_START },
{ "glob_no_starstar", PCRE2_CONVERT_GLOB_NO_STARSTAR },
{ "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR },
{ "posix_basic", PCRE2_CONVERT_POSIX_BASIC },
Modified: code/trunk/testdata/testinput25
===================================================================
--- code/trunk/testdata/testinput25 2017-07-05 08:55:49 UTC (rev 839)
+++ code/trunk/testdata/testinput25 2017-07-12 16:34:49 UTC (rev 840)
@@ -8,7 +8,7 @@
# Set the glob separator explicitly so that different OS defaults are not a
# problem. Then test various errors.
-#pattern convert=glob_basic,convert_glob_escape=\,convert_glob_separator=/
+#pattern convert=glob,convert_glob_escape=\,convert_glob_separator=/
# The fact that this one works in 13 bytes in the 8-bit library shows that the
# output is in UTF-8, though pcre2test shows the character as an escape.
Modified: code/trunk/testdata/testoutput25
===================================================================
--- code/trunk/testdata/testoutput25 2017-07-05 08:55:49 UTC (rev 839)
+++ code/trunk/testdata/testoutput25 2017-07-12 16:34:49 UTC (rev 840)
@@ -8,7 +8,7 @@
# Set the glob separator explicitly so that different OS defaults are not a
# problem. Then test various errors.
-#pattern convert=glob_basic,convert_glob_escape=\,convert_glob_separator=/
+#pattern convert=glob,convert_glob_escape=\,convert_glob_separator=/
# The fact that this one works in 13 bytes in the 8-bit library shows that the
# output is in UTF-8, though pcre2test shows the character as an escape.