[Pcre-svn] [678] code/trunk: Documentation for JIT support.

著者: Subversion repository
日付:
To: pcre-svn
題目: [Pcre-svn] [678] code/trunk: Documentation for JIT support.

Revision: 678

          http://vcs.pcre.org/viewvc?view=rev&revision=678
Author:   ph10
Date:     2011-08-28 16:23:03 +0100 (Sun, 28 Aug 2011)

Log Message:
-----------
Documentation for JIT support.

Modified Paths:
--------------
    code/trunk/Makefile.am
    code/trunk/PrepareRelease
    code/trunk/README
    code/trunk/doc/html/index.html
    code/trunk/doc/html/pcre.html
    code/trunk/doc/html/pcreapi.html
    code/trunk/doc/html/pcrecompat.html
    code/trunk/doc/html/pcrepattern.html
    code/trunk/doc/index.html.src
    code/trunk/doc/pcre.3
    code/trunk/doc/pcre.txt
    code/trunk/doc/pcre_config.3
    code/trunk/doc/pcre_dfa_exec.3
    code/trunk/doc/pcre_exec.3
    code/trunk/doc/pcre_fullinfo.3
    code/trunk/doc/pcre_study.3
    code/trunk/doc/pcreapi.3
    code/trunk/doc/pcrebuild.3
    code/trunk/doc/pcrecallout.3
    code/trunk/doc/pcrecompat.3
    code/trunk/doc/pcrepartial.3
    code/trunk/doc/pcrepattern.3
    code/trunk/doc/pcreprecompile.3
    code/trunk/doc/pcrestack.3
    code/trunk/doc/pcretest.1
    code/trunk/doc/perltest.txt
    code/trunk/pcre_jit_compile.c

Added Paths:
-----------
    code/trunk/doc/html/pcre_assign_jit_stack.html
    code/trunk/doc/html/pcre_free_study.html
    code/trunk/doc/html/pcre_jit_stack_alloc.html
    code/trunk/doc/html/pcre_jit_stack_free.html
    code/trunk/doc/html/pcrejit.html
    code/trunk/doc/html/pcrelimits.html
    code/trunk/doc/html/pcreunicode.html
    code/trunk/doc/pcre_assign_jit_stack.3
    code/trunk/doc/pcre_free_study.3
    code/trunk/doc/pcre_jit_stack_alloc.3
    code/trunk/doc/pcre_jit_stack_free.3
    code/trunk/doc/pcrejit.3
    code/trunk/doc/pcrelimits.3
    code/trunk/doc/pcreunicode.3

Modified: code/trunk/Makefile.am
===================================================================
--- code/trunk/Makefile.am    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/Makefile.am    2011-08-28 15:23:03 UTC (rev 678)
@@ -18,6 +18,7 @@
   doc/html/index.html \
   doc/html/pcre.html \
   doc/html/pcre-config.html \
+  doc/html/pcre_assign_jit_stack.html \
   doc/html/pcre_compile.html \
   doc/html/pcre_compile2.html \
   doc/html/pcre_config.html \
@@ -25,6 +26,7 @@
   doc/html/pcre_copy_substring.html \
   doc/html/pcre_dfa_exec.html \
   doc/html/pcre_exec.html \
+  doc/html/pcre_free_study.html \
   doc/html/pcre_free_substring.html \
   doc/html/pcre_free_substring_list.html \
   doc/html/pcre_fullinfo.html \
@@ -34,6 +36,8 @@
   doc/html/pcre_get_substring.html \
   doc/html/pcre_get_substring_list.html \
   doc/html/pcre_info.html \
+  doc/html/pcre_jit_stack_alloc.html \
+  doc/html/pcre_jit_stack_free.html \
   doc/html/pcre_maketables.html \
   doc/html/pcre_refcount.html \
   doc/html/pcre_study.html \
@@ -44,6 +48,8 @@
   doc/html/pcrecompat.html \
   doc/html/pcredemo.html \
   doc/html/pcregrep.html \
+  doc/html/pcrejit.html \
+  doc/html/pcrelimits.html \
   doc/html/pcrematching.html \
   doc/html/pcrepartial.html \
   doc/html/pcrepattern.html \
@@ -53,7 +59,8 @@
   doc/html/pcresample.html \
   doc/html/pcrestack.html \
   doc/html/pcresyntax.html \
-  doc/html/pcretest.html
+  doc/html/pcretest.html \
+  doc/html/pcreunicode.html

pcrecpp_html = doc/html/pcrecpp.html
dist_noinst_DATA = $(pcrecpp_html)
@@ -378,6 +385,7 @@
dist_man_MANS = \
doc/pcre.3 \
doc/pcre-config.1 \
+ doc/pcre_assign_jit_stack.3 \
doc/pcre_compile.3 \
doc/pcre_compile2.3 \
doc/pcre_config.3 \
@@ -385,6 +393,7 @@
doc/pcre_copy_substring.3 \
doc/pcre_dfa_exec.3 \
doc/pcre_exec.3 \
+ doc/pcre_free_study.3 \
doc/pcre_free_substring.3 \
doc/pcre_free_substring_list.3 \
doc/pcre_fullinfo.3 \
@@ -394,6 +403,8 @@
doc/pcre_get_substring.3 \
doc/pcre_get_substring_list.3 \
doc/pcre_info.3 \
+ doc/pcre_jit_stack_alloc.3 \
+ doc/pcre_jit_stack_free.3 \
doc/pcre_maketables.3 \
doc/pcre_refcount.3 \
doc/pcre_study.3 \
@@ -403,6 +414,8 @@
doc/pcrecallout.3 \
doc/pcrecompat.3 \
doc/pcregrep.1 \
+ doc/pcrejit.3 \
+ doc/pcrelimits.3 \
doc/pcrematching.3 \
doc/pcrepartial.3 \
doc/pcrepattern.3 \
@@ -412,7 +425,8 @@
doc/pcresample.3 \
doc/pcrestack.3 \
doc/pcresyntax.3 \
- doc/pcretest.1
+ doc/pcretest.1 \
+ doc/pcreunicode.3

pcrecpp_man = doc/pcrecpp.3
EXTRA_DIST += $(pcrecpp_man)

Modified: code/trunk/PrepareRelease
===================================================================
--- code/trunk/PrepareRelease    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/PrepareRelease    2011-08-28 15:23:03 UTC (rev 678)
@@ -49,7 +49,7 @@
 This file contains a concatenation of the PCRE man pages, converted to plain
 text format for ease of searching with a text editor, or for use on systems
 that do not have a man page processor. The small individual files that give
-synopses of each function in the library have not been included. Neither has 
+synopses of each function in the library have not been included. Neither has
 the pcredemo program. There are separate text files for the pcregrep and
 pcretest commands.
 -----------------------------------------------------------------------------
@@ -59,8 +59,9 @@

 echo "Making pcre.txt"
 for file in pcre pcrebuild pcrematching pcreapi pcrecallout pcrecompat \
-            pcrepattern pcresyntax pcrepartial pcreprecompile \
-            pcreperform pcreposix pcrecpp pcresample pcrestack ; do
+            pcrepattern pcresyntax pcreunicode pcrejit pcrepartial \
+            pcreprecompile pcreperform pcreposix pcrecpp pcresample \
+            pcrelimits pcrestack ; do
   echo "  Processing $file.3"
   nroff -c -man $file.3 >$file.rawtxt
   ../CleanTxt <$file.rawtxt >>pcre.txt
@@ -103,7 +104,7 @@
             ".  hy \\\\n(HY\n" .
             "..\n" .
             ".\n" .
-            ".EX\n" ; 
+            ".EX\n" ;
   while (<IN>)
     {
     s/\\/\\e/g;
@@ -111,7 +112,7 @@
     }
   print OUT ".EE\n";
   close(IN);
-  close(OUT);    
+  close(OUT);
 END
 if [ $? != 0 ] ; then exit 1; fi

@@ -136,10 +137,12 @@
   base=`basename $file .3`
   toc=-toc
   if [ `expr $base : '.*_'` -ne 0 ] ; then toc="" ; fi
-  if [ "$base" = "pcresample" ] || \
-     [ "$base" = "pcrestack" ]  || \
-     [ "$base" = "pcrecompat" ] || \
-     [ "$base" = "pcreperform" ] ; then
+  if [ "$base" = "pcresample" ]  || \
+     [ "$base" = "pcrestack" ]   || \
+     [ "$base" = "pcrecompat" ]  || \
+     [ "$base" = "pcrelimits" ]  || \
+     [ "$base" = "pcreperform" ] || \
+     [ "$base" = "pcreunicode" ] ; then
     toc=""
   fi
   echo "  Making $base.html"

Modified: code/trunk/README
===================================================================
--- code/trunk/README    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/README    2011-08-28 15:23:03 UTC (rev 678)
@@ -176,7 +176,9 @@

. If you want to include support for just-in-time compiling, which can give
large performance improvements on certain platforms, add --enable-jit to the
- "configure" command.
+ "configure" command. This support is available only for certain hardware
+ architectures. If you try to enable it on an unsupported architecture, there
+ will be a compile time error.

. If you want to make use of the support for UTF-8 Unicode character strings in
PCRE, you must add --enable-utf8 to the "configure" command. Without it, the
@@ -837,4 +839,4 @@
Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
-Last updated: 23 August 2011
+Last updated: 27 August 2011

Modified: code/trunk/doc/html/index.html
===================================================================
--- code/trunk/doc/html/index.html    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/doc/html/index.html    2011-08-28 15:23:03 UTC (rev 678)
@@ -1,10 +1,10 @@
 <html>
-<!-- This is a manually maintained file that is the root of the HTML version of
-     the PCRE documentation. When the HTML documents are built from the man
-     page versions, the entire doc/html directory is emptied, this file is then
-     copied into doc/html/index.html, and the remaining files therein are
+<!-- This is a manually maintained file that is the root of the HTML version of 
+     the PCRE documentation. When the HTML documents are built from the man 
+     page versions, the entire doc/html directory is emptied, this file is then 
+     copied into doc/html/index.html, and the remaining files therein are 
      created by the 132html script.
--->
+-->      
 <head>
 <title>PCRE specification</title>
 </head>
@@ -42,6 +42,12 @@
 <tr><td><a href="pcregrep.html">pcregrep</a></td>
     <td>&nbsp;&nbsp;The <b>pcregrep</b> command</td></tr>

+<tr><td><a href="pcrejit.html">pcrejit</a></td>
+    <td>&nbsp;&nbsp;Discussion of the just-in-time optimization support</td></tr>
+
+<tr><td><a href="pcrelimits.html">pcrelimits</a></td>
+    <td>&nbsp;&nbsp;Details of size and other limits</td></tr>
+
 <tr><td><a href="pcrematching.html">pcrematching</a></td>
     <td>&nbsp;&nbsp;Discussion of the two matching algorithms</td></tr>

@@ -71,14 +77,17 @@

 <tr><td><a href="pcretest.html">pcretest</a></td>
     <td>&nbsp;&nbsp;The <b>pcretest</b> command for testing PCRE</td></tr>
+
+<tr><td><a href="pcreunicode.html">pcreunicode</a></td>
+    <td>&nbsp;&nbsp;Discussion of Unicode and UTF-8 support</td></tr>
 </table>

<p>
-There are also individual pages that summarize the interface for each function
+There are also individual pages that summarize the interface for each function
in the library:
</p>

-<table>
+<table>

 <tr><td><a href="pcre_compile.html">pcre_compile</a></td>
     <td>&nbsp;&nbsp;Compile a regular expression</td></tr>
@@ -129,7 +138,7 @@

 <tr><td><a href="pcre_maketables.html">pcre_maketables</a></td>
     <td>&nbsp;&nbsp;Build character tables in current locale</td></tr>
-
+    
 <tr><td><a href="pcre_refcount.html">pcre_refcount</a></td>
     <td>&nbsp;&nbsp;Maintain reference count in compiled pattern</td></tr>

Modified: code/trunk/doc/html/pcre.html
===================================================================
--- code/trunk/doc/html/pcre.html    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/doc/html/pcre.html    2011-08-28 15:23:03 UTC (rev 678)
@@ -15,10 +15,8 @@
 <ul>
 <li><a name="TOC1" href="#SEC1">INTRODUCTION</a>
 <li><a name="TOC2" href="#SEC2">USER DOCUMENTATION</a>
-<li><a name="TOC3" href="#SEC3">LIMITATIONS</a>
-<li><a name="TOC4" href="#SEC4">UTF-8 AND UNICODE PROPERTY SUPPORT</a>
-<li><a name="TOC5" href="#SEC5">AUTHOR</a>
-<li><a name="TOC6" href="#SEC6">REVISION</a>
+<li><a name="TOC3" href="#SEC3">AUTHOR</a>
+<li><a name="TOC4" href="#SEC4">REVISION</a>
 </ul>
 <br><a name="SEC1" href="#TOC1">INTRODUCTION</a><br>
 <P>
@@ -100,6 +98,8 @@
   pcrecpp           details of the C++ wrapper
   pcredemo          a demonstration C program that uses PCRE
   pcregrep          description of the <b>pcregrep</b> command
+  pcrejit           discussion of the just-in-time optimization support 
+  pcrelimits        details of size and other limits 
   pcrematching      discussion of the two matching algorithms
   pcrepartial       details of the partial matching facility
   pcrepattern       syntax and semantics of supported regular expressions
@@ -110,192 +110,13 @@
   pcrestack         discussion of stack usage
   pcresyntax        quick syntax reference
   pcretest          description of the <b>pcretest</b> testing command
+  pcreunicode       discussion of Unicode and UTF-8 support 
 </pre>
 In addition, in the "man" and HTML formats, there is a short page for each
 C library function, listing its arguments and results.
 </P>
-<br><a name="SEC3" href="#TOC1">LIMITATIONS</a><br>
+<br><a name="SEC3" href="#TOC1">AUTHOR</a><br>
 <P>
-There are some size limitations in PCRE but it is hoped that they will never in
-practice be relevant.
-</P>
-<P>
-The maximum length of a compiled pattern is 65539 (sic) bytes if PCRE is
-compiled with the default internal linkage size of 2. If you want to process
-regular expressions that are truly enormous, you can compile PCRE with an
-internal linkage size of 3 or 4 (see the <b>README</b> file in the source
-distribution and the
-<a href="pcrebuild.html"><b>pcrebuild</b></a>
-documentation for details). In these cases the limit is substantially larger.
-However, the speed of execution is slower.
-</P>
-<P>
-All values in repeating quantifiers must be less than 65536.
-</P>
-<P>
-There is no limit to the number of parenthesized subpatterns, but there can be
-no more than 65535 capturing subpatterns.
-</P>
-<P>
-The maximum length of name for a named subpattern is 32 characters, and the
-maximum number of named subpatterns is 10000.
-</P>
-<P>
-The maximum length of a subject string is the largest positive number that an
-integer variable can hold. However, when using the traditional matching
-function, PCRE uses recursion to handle subpatterns and indefinite repetition.
-This means that the available stack space may limit the size of a subject
-string that can be processed by certain patterns. For a discussion of stack
-issues, see the
-<a href="pcrestack.html"><b>pcrestack</b></a>
-documentation.
-<a name="utf8support"></a></P>
-<br><a name="SEC4" href="#TOC1">UTF-8 AND UNICODE PROPERTY SUPPORT</a><br>
-<P>
-From release 3.3, PCRE has had some support for character strings encoded in
-the UTF-8 format. For release 4.0 this was greatly extended to cover most
-common requirements, and in release 5.0 additional support for Unicode general
-category properties was added.
-</P>
-<P>
-In order process UTF-8 strings, you must build PCRE to include UTF-8 support in
-the code, and, in addition, you must call
-<a href="pcre_compile.html"><b>pcre_compile()</b></a>
-with the PCRE_UTF8 option flag, or the pattern must start with the sequence
-(*UTF8). When either of these is the case, both the pattern and any subject
-strings that are matched against it are treated as UTF-8 strings instead of
-strings of 1-byte characters.
-</P>
-<P>
-If you compile PCRE with UTF-8 support, but do not use it at run time, the
-library will be a bit bigger, but the additional run time overhead is limited
-to testing the PCRE_UTF8 flag occasionally, so should not be very big.
-</P>
-<P>
-If PCRE is built with Unicode character property support (which implies UTF-8
-support), the escape sequences \p{..}, \P{..}, and \X are supported.
-The available properties that can be tested are limited to the general
-category properties such as Lu for an upper case letter or Nd for a decimal
-number, the Unicode script names such as Arabic or Han, and the derived
-properties Any and L&. A full list is given in the
-<a href="pcrepattern.html"><b>pcrepattern</b></a>
-documentation. Only the short names for properties are supported. For example,
-\p{L} matches a letter. Its Perl synonym, \p{Letter}, is not supported.
-Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
-compatibility with Perl 5.6. PCRE does not support this.
-<a name="utf8strings"></a></P>
-<br><b>
-Validity of UTF-8 strings
-</b><br>
-<P>
-When you set the PCRE_UTF8 flag, the strings passed as patterns and subjects
-are (by default) checked for validity on entry to the relevant functions. From
-release 7.3 of PCRE, the check is according the rules of RFC 3629, which are
-themselves derived from the Unicode specification. Earlier releases of PCRE
-followed the rules of RFC 2279, which allows the full range of 31-bit values (0
-to 0x7FFFFFFF). The current check allows only values in the range U+0 to
-U+10FFFF, excluding U+D800 to U+DFFF.
-</P>
-<P>
-The excluded code points are the "Low Surrogate Area" of Unicode, of which the
-Unicode Standard says this: "The Low Surrogate Area does not contain any
-character assignments, consequently no character code charts or namelists are
-provided for this area. Surrogates are reserved for use with UTF-16 and then
-must be used in pairs." The code points that are encoded by UTF-16 pairs are
-available as independent code points in the UTF-8 encoding. (In other words,
-the whole surrogate thing is a fudge for UTF-16 which unfortunately messes up
-UTF-8.)
-</P>
-<P>
-If an invalid UTF-8 string is passed to PCRE, an error return is given. At
-compile time, the only additional information is the offset to the first byte
-of the failing character. The runtime functions (<b>pcre_exec()</b> and
-<b>pcre_dfa_exec()</b>), pass back this information as well as a more detailed
-reason code if the caller has provided memory in which to do this.
-</P>
-<P>
-In some situations, you may already know that your strings are valid, and
-therefore want to skip these checks in order to improve performance. If you set
-the PCRE_NO_UTF8_CHECK flag at compile time or at run time, PCRE assumes that
-the pattern or subject it is given (respectively) contains only valid UTF-8
-codes. In this case, it does not diagnose an invalid UTF-8 string.
-</P>
-<P>
-If you pass an invalid UTF-8 string when PCRE_NO_UTF8_CHECK is set, what
-happens depends on why the string is invalid. If the string conforms to the
-"old" definition of UTF-8 (RFC 2279), it is processed as a string of characters
-in the range 0 to 0x7FFFFFFF. In other words, apart from the initial validity
-test, PCRE (when in UTF-8 mode) handles strings according to the more liberal
-rules of RFC 2279. However, if the string does not even conform to RFC 2279,
-the result is undefined. Your program may crash.
-</P>
-<P>
-If you want to process strings of values in the full range 0 to 0x7FFFFFFF,
-encoded in a UTF-8-like manner as per the old RFC, you can set
-PCRE_NO_UTF8_CHECK to bypass the more restrictive test. However, in this
-situation, you will have to apply your own validity check.
-</P>
-<br><b>
-General comments about UTF-8 mode
-</b><br>
-<P>
-1. An unbraced hexadecimal escape sequence (such as \xb3) matches a two-byte
-UTF-8 character if the value is greater than 127.
-</P>
-<P>
-2. Octal numbers up to \777 are recognized, and match two-byte UTF-8
-characters for values greater than \177.
-</P>
-<P>
-3. Repeat quantifiers apply to complete UTF-8 characters, not to individual
-bytes, for example: \x{100}{3}.
-</P>
-<P>
-4. The dot metacharacter matches one UTF-8 character instead of a single byte.
-</P>
-<P>
-5. The escape sequence \C can be used to match a single byte in UTF-8 mode,
-but its use can lead to some strange effects. This facility is not available in
-the alternative matching function, <b>pcre_dfa_exec()</b>.
-</P>
-<P>
-6. The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly
-test characters of any code value, but, by default, the characters that PCRE
-recognizes as digits, spaces, or word characters remain the same set as before,
-all with values less than 256. This remains true even when PCRE is built to
-include Unicode property support, because to do otherwise would slow down PCRE
-in many common cases. Note in particular that this applies to \b and \B,
-because they are defined in terms of \w and \W. If you really want to test
-for a wider sense of, say, "digit", you can use explicit Unicode property tests
-such as \p{Nd}. Alternatively, if you set the PCRE_UCP option, the way that
-the character escapes work is changed so that Unicode properties are used to
-determine which characters match. There are more details in the section on
-<a href="pcrepattern.html#genericchartypes">generic character types</a>
-in the
-<a href="pcrepattern.html"><b>pcrepattern</b></a>
-documentation.
-</P>
-<P>
-7. Similarly, characters that match the POSIX named character classes are all
-low-valued characters, unless the PCRE_UCP option is set.
-</P>
-<P>
-8. However, the horizontal and vertical whitespace matching escapes (\h, \H,
-\v, and \V) do match all the appropriate Unicode characters, whether or not
-PCRE_UCP is set.
-</P>
-<P>
-9. Case-insensitive matching applies only to characters whose values are less
-than 128, unless PCRE is built with Unicode property support. Even when Unicode
-property support is available, PCRE still uses its own character tables when
-checking the case of low-valued characters, so as not to degrade performance.
-The Unicode property information is used only for characters with higher
-values. Furthermore, PCRE supports case-insensitive matching only when there is
-a one-to-one mapping between a letter's cases. There are a small number of
-many-to-one mappings in Unicode; these are not supported by PCRE.
-</P>
-<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
-<P>
 Philip Hazel
 <br>
 University Computing Service
@@ -308,9 +129,9 @@
 taken it away. If you want to email me, use my two initials, followed by the
 two digits 10, at the domain cam.ac.uk.
 </P>
-<br><a name="SEC6" href="#TOC1">REVISION</a><br>
+<br><a name="SEC4" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 07 May 2011
+Last updated: 24 August 2011
 <br>
 Copyright &copy; 1997-2011 University of Cambridge.
 <br>

Added: code/trunk/doc/html/pcre_assign_jit_stack.html
===================================================================

Added: code/trunk/doc/html/pcre_free_study.html
===================================================================

Added: code/trunk/doc/html/pcre_jit_stack_alloc.html
===================================================================

Added: code/trunk/doc/html/pcre_jit_stack_free.html
===================================================================

Modified: code/trunk/doc/html/pcreapi.html
===================================================================
--- code/trunk/doc/html/pcreapi.html    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/doc/html/pcreapi.html    2011-08-28 15:23:03 UTC (rev 678)
@@ -706,9 +706,7 @@
 available only when PCRE is built to include UTF-8 support. If not, the use
 of this option provokes an error. Details of how this option changes the
 behaviour of PCRE are given in the
-<a href="pcre.html#utf8support">section on UTF-8 support</a>
-in the main
-<a href="pcre.html"><b>pcre</b></a>
+<a href="pcreunicode.html"><b>pcreunicode</b></a>
 page.
 <pre>
   PCRE_NO_UTF8_CHECK

Modified: code/trunk/doc/html/pcrecompat.html
===================================================================
--- code/trunk/doc/html/pcrecompat.html    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/doc/html/pcrecompat.html    2011-08-28 15:23:03 UTC (rev 678)
@@ -23,9 +23,7 @@
 <P>
 1. PCRE has only a subset of Perl's UTF-8 and Unicode support. Details of what
 it does have are given in the
-<a href="pcre.html#utf8support">section on UTF-8 support</a>
-in the main
-<a href="pcre.html"><b>pcre</b></a>
+<a href="pcreunicode.html"><b>pcreunicode</b></a>
 page.
 </P>
 <P>
@@ -197,7 +195,7 @@
 REVISION
 </b><br>
 <P>
-Last updated: 24 July 2011
+Last updated: 24 August 2011
 <br>
 Copyright &copy; 1997-2011 University of Cambridge.
 <br>

Added: code/trunk/doc/html/pcrejit.html
===================================================================
--- code/trunk/doc/html/pcrejit.html                            (rev 0)
+++ code/trunk/doc/html/pcrejit.html    2011-08-28 15:23:03 UTC (rev 678)
@@ -0,0 +1,19 @@
+<html>
+<head>
+<title>pcrejit specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcrejit man page</h1>
+<p>
+Return to the <a href="index.html">PCRE index page</a>.
+</p>
+<p>
+This page is part of the PCRE HTML documentation. It was generated automatically
+from the original man page. If there is any nonsense in it, please consult the
+man page, in case the conversion went wrong.
+<br>
+<ul>
+</ul>
+<p>
+Return to the <a href="index.html">PCRE index page</a>.
+</p>

Added: code/trunk/doc/html/pcrelimits.html
===================================================================
--- code/trunk/doc/html/pcrelimits.html                            (rev 0)
+++ code/trunk/doc/html/pcrelimits.html    2011-08-28 15:23:03 UTC (rev 678)
@@ -0,0 +1,74 @@
+<html>
+<head>
+<title>pcrelimits specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcrelimits man page</h1>
+<p>
+Return to the <a href="index.html">PCRE index page</a>.
+</p>
+<p>
+This page is part of the PCRE HTML documentation. It was generated automatically
+from the original man page. If there is any nonsense in it, please consult the
+man page, in case the conversion went wrong.
+<br>
+<br><b>
+SIZE AND OTHER LIMITATIONS
+</b><br>
+<P>
+There are some size limitations in PCRE but it is hoped that they will never in
+practice be relevant.
+</P>
+<P>
+The maximum length of a compiled pattern is 65539 (sic) bytes if PCRE is
+compiled with the default internal linkage size of 2. If you want to process
+regular expressions that are truly enormous, you can compile PCRE with an
+internal linkage size of 3 or 4 (see the <b>README</b> file in the source
+distribution and the
+<a href="pcrebuild.html"><b>pcrebuild</b></a>
+documentation for details). In these cases the limit is substantially larger.
+However, the speed of execution is slower.
+</P>
+<P>
+All values in repeating quantifiers must be less than 65536.
+</P>
+<P>
+There is no limit to the number of parenthesized subpatterns, but there can be
+no more than 65535 capturing subpatterns.
+</P>
+<P>
+The maximum length of name for a named subpattern is 32 characters, and the
+maximum number of named subpatterns is 10000.
+</P>
+<P>
+The maximum length of a subject string is the largest positive number that an
+integer variable can hold. However, when using the traditional matching
+function, PCRE uses recursion to handle subpatterns and indefinite repetition.
+This means that the available stack space may limit the size of a subject
+string that can be processed by certain patterns. For a discussion of stack
+issues, see the
+<a href="pcrestack.html"><b>pcrestack</b></a>
+documentation.
+</P>
+<br><b>
+AUTHOR
+</b><br>
+<P>
+Philip Hazel
+<br>
+University Computing Service
+<br>
+Cambridge CB2 3QH, England.
+<br>
+</P>
+<br><b>
+REVISION
+</b><br>
+<P>
+Last updated: 24 August 2011
+<br>
+Copyright &copy; 1997-2011 University of Cambridge.
+<br>
+<p>
+Return to the <a href="index.html">PCRE index page</a>.
+</p>

Modified: code/trunk/doc/html/pcrepattern.html
===================================================================
--- code/trunk/doc/html/pcrepattern.html    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/doc/html/pcrepattern.html    2011-08-28 15:23:03 UTC (rev 678)
@@ -72,9 +72,7 @@
 option. This feature is not Perl-compatible. How setting UTF-8 mode affects
 pattern matching is mentioned in several places below. There is also a summary
 of UTF-8 features in the
-<a href="pcre.html#utf8support">section on UTF-8 support</a>
-in the main
-<a href="pcre.html"><b>pcre</b></a>
+<a href="pcreunicode.html"><b>pcreunicode</b></a>
 page.
 </P>
 <P>
@@ -2740,7 +2738,7 @@
 </P>
 <br><a name="SEC28" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 24 July 2011
+Last updated: 24 August 2011
 <br>
 Copyright &copy; 1997-2011 University of Cambridge.
 <br>

Added: code/trunk/doc/html/pcreunicode.html
===================================================================
--- code/trunk/doc/html/pcreunicode.html                            (rev 0)
+++ code/trunk/doc/html/pcreunicode.html    2011-08-28 15:23:03 UTC (rev 678)
@@ -0,0 +1,177 @@
+<html>
+<head>
+<title>pcreunicode specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcreunicode man page</h1>
+<p>
+Return to the <a href="index.html">PCRE index page</a>.
+</p>
+<p>
+This page is part of the PCRE HTML documentation. It was generated automatically
+from the original man page. If there is any nonsense in it, please consult the
+man page, in case the conversion went wrong.
+<br>
+<br><b>
+UTF-8 AND UNICODE PROPERTY SUPPORT
+</b><br>
+<P>
+In order process UTF-8 strings, you must build PCRE to include UTF-8 support in
+the code, and, in addition, you must call
+<a href="pcre_compile.html"><b>pcre_compile()</b></a>
+with the PCRE_UTF8 option flag, or the pattern must start with the sequence
+(*UTF8). When either of these is the case, both the pattern and any subject
+strings that are matched against it are treated as UTF-8 strings instead of
+strings of 1-byte characters. PCRE does not support any other formats (in 
+particular, it does not support UTF-16).
+</P>
+<P>
+If you compile PCRE with UTF-8 support, but do not use it at run time, the
+library will be a bit bigger, but the additional run time overhead is limited
+to testing the PCRE_UTF8 flag occasionally, so should not be very big.
+</P>
+<P>
+If PCRE is built with Unicode character property support (which implies UTF-8
+support), the escape sequences \p{..}, \P{..}, and \X are supported.
+The available properties that can be tested are limited to the general
+category properties such as Lu for an upper case letter or Nd for a decimal
+number, the Unicode script names such as Arabic or Han, and the derived
+properties Any and L&. A full list is given in the
+<a href="pcrepattern.html"><b>pcrepattern</b></a>
+documentation. Only the short names for properties are supported. For example,
+\p{L} matches a letter. Its Perl synonym, \p{Letter}, is not supported.
+Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
+compatibility with Perl 5.6. PCRE does not support this.
+<a name="utf8strings"></a></P>
+<br><b>
+Validity of UTF-8 strings
+</b><br>
+<P>
+When you set the PCRE_UTF8 flag, the strings passed as patterns and subjects
+are (by default) checked for validity on entry to the relevant functions. From
+release 7.3 of PCRE, the check is according the rules of RFC 3629, which are
+themselves derived from the Unicode specification. Earlier releases of PCRE
+followed the rules of RFC 2279, which allows the full range of 31-bit values (0
+to 0x7FFFFFFF). The current check allows only values in the range U+0 to
+U+10FFFF, excluding U+D800 to U+DFFF.
+</P>
+<P>
+The excluded code points are the "Low Surrogate Area" of Unicode, of which the
+Unicode Standard says this: "The Low Surrogate Area does not contain any
+character assignments, consequently no character code charts or namelists are
+provided for this area. Surrogates are reserved for use with UTF-16 and then
+must be used in pairs." The code points that are encoded by UTF-16 pairs are
+available as independent code points in the UTF-8 encoding. (In other words,
+the whole surrogate thing is a fudge for UTF-16 which unfortunately messes up
+UTF-8.)
+</P>
+<P>
+If an invalid UTF-8 string is passed to PCRE, an error return is given. At
+compile time, the only additional information is the offset to the first byte
+of the failing character. The runtime functions <b>pcre_exec()</b> and
+<b>pcre_dfa_exec()</b> also pass back this information, as well as a more
+detailed reason code if the caller has provided memory in which to do this.
+</P>
+<P>
+In some situations, you may already know that your strings are valid, and
+therefore want to skip these checks in order to improve performance. If you set
+the PCRE_NO_UTF8_CHECK flag at compile time or at run time, PCRE assumes that
+the pattern or subject it is given (respectively) contains only valid UTF-8
+codes. In this case, it does not diagnose an invalid UTF-8 string.
+</P>
+<P>
+If you pass an invalid UTF-8 string when PCRE_NO_UTF8_CHECK is set, what
+happens depends on why the string is invalid. If the string conforms to the
+"old" definition of UTF-8 (RFC 2279), it is processed as a string of characters
+in the range 0 to 0x7FFFFFFF. In other words, apart from the initial validity
+test, PCRE (when in UTF-8 mode) handles strings according to the more liberal
+rules of RFC 2279. However, if the string does not even conform to RFC 2279,
+the result is undefined. Your program may crash.
+</P>
+<P>
+If you want to process strings of values in the full range 0 to 0x7FFFFFFF,
+encoded in a UTF-8-like manner as per the old RFC, you can set
+PCRE_NO_UTF8_CHECK to bypass the more restrictive test. However, in this
+situation, you will have to apply your own validity check.
+</P>
+<br><b>
+General comments about UTF-8 mode
+</b><br>
+<P>
+1. An unbraced hexadecimal escape sequence (such as \xb3) matches a two-byte
+UTF-8 character if the value is greater than 127.
+</P>
+<P>
+2. Octal numbers up to \777 are recognized, and match two-byte UTF-8
+characters for values greater than \177.
+</P>
+<P>
+3. Repeat quantifiers apply to complete UTF-8 characters, not to individual
+bytes, for example: \x{100}{3}.
+</P>
+<P>
+4. The dot metacharacter matches one UTF-8 character instead of a single byte.
+</P>
+<P>
+5. The escape sequence \C can be used to match a single byte in UTF-8 mode,
+but its use can lead to some strange effects. This facility is not available in
+the alternative matching function, <b>pcre_dfa_exec()</b>.
+</P>
+<P>
+6. The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly
+test characters of any code value, but, by default, the characters that PCRE
+recognizes as digits, spaces, or word characters remain the same set as before,
+all with values less than 256. This remains true even when PCRE is built to
+include Unicode property support, because to do otherwise would slow down PCRE
+in many common cases. Note in particular that this applies to \b and \B,
+because they are defined in terms of \w and \W. If you really want to test
+for a wider sense of, say, "digit", you can use explicit Unicode property tests
+such as \p{Nd}. Alternatively, if you set the PCRE_UCP option, the way that
+the character escapes work is changed so that Unicode properties are used to
+determine which characters match. There are more details in the section on
+<a href="pcrepattern.html#genericchartypes">generic character types</a>
+in the
+<a href="pcrepattern.html"><b>pcrepattern</b></a>
+documentation.
+</P>
+<P>
+7. Similarly, characters that match the POSIX named character classes are all
+low-valued characters, unless the PCRE_UCP option is set.
+</P>
+<P>
+8. However, the horizontal and vertical whitespace matching escapes (\h, \H,
+\v, and \V) do match all the appropriate Unicode characters, whether or not
+PCRE_UCP is set.
+</P>
+<P>
+9. Case-insensitive matching applies only to characters whose values are less
+than 128, unless PCRE is built with Unicode property support. Even when Unicode
+property support is available, PCRE still uses its own character tables when
+checking the case of low-valued characters, so as not to degrade performance.
+The Unicode property information is used only for characters with higher
+values. Furthermore, PCRE supports case-insensitive matching only when there is
+a one-to-one mapping between a letter's cases. There are a small number of
+many-to-one mappings in Unicode; these are not supported by PCRE.
+</P>
+<br><b>
+AUTHOR
+</b><br>
+<P>
+Philip Hazel
+<br>
+University Computing Service
+<br>
+Cambridge CB2 3QH, England.
+<br>
+</P>
+<br><b>
+REVISION
+</b><br>
+<P>
+Last updated: 24 August 2011
+<br>
+Copyright &copy; 1997-2011 University of Cambridge.
+<br>
+<p>
+Return to the <a href="index.html">PCRE index page</a>.
+</p>

Modified: code/trunk/doc/index.html.src
===================================================================
--- code/trunk/doc/index.html.src    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/doc/index.html.src    2011-08-28 15:23:03 UTC (rev 678)
@@ -42,6 +42,12 @@
 <tr><td><a href="pcregrep.html">pcregrep</a></td>
     <td>&nbsp;&nbsp;The <b>pcregrep</b> command</td></tr>

+<tr><td><a href="pcrejit.html">pcrejit</a></td>
+    <td>&nbsp;&nbsp;Discussion of the just-in-time optimization support</td></tr>
+
+<tr><td><a href="pcrelimits.html">pcrelimits</a></td>
+    <td>&nbsp;&nbsp;Details of size and other limits</td></tr>
+
 <tr><td><a href="pcrematching.html">pcrematching</a></td>
     <td>&nbsp;&nbsp;Discussion of the two matching algorithms</td></tr>

@@ -71,6 +77,9 @@

 <tr><td><a href="pcretest.html">pcretest</a></td>
     <td>&nbsp;&nbsp;The <b>pcretest</b> command for testing PCRE</td></tr>
+
+<tr><td><a href="pcreunicode.html">pcreunicode</a></td>
+    <td>&nbsp;&nbsp;Discussion of Unicode and UTF-8 support</td></tr>
 </table>

<p>
@@ -80,6 +89,9 @@

 <table>

+<tr><td><a href="pcre_assign_jit_stack.html">pcre_assign_jit_stack</a></td>
+    <td>&nbsp;&nbsp;Assign stack for JIT matching</td></tr>
+
 <tr><td><a href="pcre_compile.html">pcre_compile</a></td>
     <td>&nbsp;&nbsp;Compile a regular expression</td></tr>

@@ -99,6 +111,9 @@
     <td>&nbsp;&nbsp;Match a compiled pattern to a subject string
     (DFA algorithm; <i>not</i> Perl compatible)</td></tr>

+<tr><td><a href="pcre_free_study.html">pcre_free_study</a></td>
+    <td>&nbsp;&nbsp;Free study data</td></tr>
+
 <tr><td><a href="pcre_exec.html">pcre_exec</a></td>
     <td>&nbsp;&nbsp;Match a compiled pattern to a subject string
     (Perl compatible)</td></tr>
@@ -127,6 +142,12 @@
 <tr><td><a href="pcre_info.html">pcre_info</a></td>
     <td>&nbsp;&nbsp;Obsolete information extraction function</td></tr>

+<tr><td><a href="pcre_jit_stack_alloc.html">pcre_jit_stack_alloc</a></td>
+    <td>&nbsp;&nbsp;Create a stack for JIT matching</td></tr>
+
+<tr><td><a href="pcre_jit_stack_free.html">pcre_jit_stack_free</a></td>
+    <td>&nbsp;&nbsp;Free a JIT matching stack</td></tr>
+
 <tr><td><a href="pcre_maketables.html">pcre_maketables</a></td>
     <td>&nbsp;&nbsp;Build character tables in current locale</td></tr>

Modified: code/trunk/doc/pcre.3
===================================================================
--- code/trunk/doc/pcre.3    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/doc/pcre.3    2011-08-28 15:23:03 UTC (rev 678)
@@ -95,6 +95,8 @@
   pcrecpp           details of the C++ wrapper
   pcredemo          a demonstration C program that uses PCRE
   pcregrep          description of the \fBpcregrep\fP command
+  pcrejit           discussion of the just-in-time optimization support 
+  pcrelimits        details of size and other limits 
   pcrematching      discussion of the two matching algorithms
   pcrepartial       details of the partial matching facility
 .\" JOIN
@@ -107,189 +109,12 @@
   pcrestack         discussion of stack usage
   pcresyntax        quick syntax reference
   pcretest          description of the \fBpcretest\fP testing command
+  pcreunicode       discussion of Unicode and UTF-8 support 
 .sp
 In addition, in the "man" and HTML formats, there is a short page for each
 C library function, listing its arguments and results.
 .
 .
-.SH LIMITATIONS
-.rs
-.sp
-There are some size limitations in PCRE but it is hoped that they will never in
-practice be relevant.
-.P
-The maximum length of a compiled pattern is 65539 (sic) bytes if PCRE is
-compiled with the default internal linkage size of 2. If you want to process
-regular expressions that are truly enormous, you can compile PCRE with an
-internal linkage size of 3 or 4 (see the \fBREADME\fP file in the source
-distribution and the
-.\" HREF
-\fBpcrebuild\fP
-.\"
-documentation for details). In these cases the limit is substantially larger.
-However, the speed of execution is slower.
-.P
-All values in repeating quantifiers must be less than 65536.
-.P
-There is no limit to the number of parenthesized subpatterns, but there can be
-no more than 65535 capturing subpatterns.
-.P
-The maximum length of name for a named subpattern is 32 characters, and the
-maximum number of named subpatterns is 10000.
-.P
-The maximum length of a subject string is the largest positive number that an
-integer variable can hold. However, when using the traditional matching
-function, PCRE uses recursion to handle subpatterns and indefinite repetition.
-This means that the available stack space may limit the size of a subject
-string that can be processed by certain patterns. For a discussion of stack
-issues, see the
-.\" HREF
-\fBpcrestack\fP
-.\"
-documentation.
-.
-.
-.\" HTML <a name="utf8support"></a>
-.SH "UTF-8 AND UNICODE PROPERTY SUPPORT"
-.rs
-.sp
-From release 3.3, PCRE has had some support for character strings encoded in
-the UTF-8 format. For release 4.0 this was greatly extended to cover most
-common requirements, and in release 5.0 additional support for Unicode general
-category properties was added.
-.P
-In order process UTF-8 strings, you must build PCRE to include UTF-8 support in
-the code, and, in addition, you must call
-.\" HREF
-\fBpcre_compile()\fP
-.\"
-with the PCRE_UTF8 option flag, or the pattern must start with the sequence
-(*UTF8). When either of these is the case, both the pattern and any subject
-strings that are matched against it are treated as UTF-8 strings instead of
-strings of 1-byte characters.
-.P
-If you compile PCRE with UTF-8 support, but do not use it at run time, the
-library will be a bit bigger, but the additional run time overhead is limited
-to testing the PCRE_UTF8 flag occasionally, so should not be very big.
-.P
-If PCRE is built with Unicode character property support (which implies UTF-8
-support), the escape sequences \ep{..}, \eP{..}, and \eX are supported.
-The available properties that can be tested are limited to the general
-category properties such as Lu for an upper case letter or Nd for a decimal
-number, the Unicode script names such as Arabic or Han, and the derived
-properties Any and L&. A full list is given in the
-.\" HREF
-\fBpcrepattern\fP
-.\"
-documentation. Only the short names for properties are supported. For example,
-\ep{L} matches a letter. Its Perl synonym, \ep{Letter}, is not supported.
-Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
-compatibility with Perl 5.6. PCRE does not support this.
-.
-.
-.\" HTML <a name="utf8strings"></a>
-.SS "Validity of UTF-8 strings"
-.rs
-.sp
-When you set the PCRE_UTF8 flag, the strings passed as patterns and subjects
-are (by default) checked for validity on entry to the relevant functions. From
-release 7.3 of PCRE, the check is according the rules of RFC 3629, which are
-themselves derived from the Unicode specification. Earlier releases of PCRE
-followed the rules of RFC 2279, which allows the full range of 31-bit values (0
-to 0x7FFFFFFF). The current check allows only values in the range U+0 to
-U+10FFFF, excluding U+D800 to U+DFFF.
-.P
-The excluded code points are the "Low Surrogate Area" of Unicode, of which the
-Unicode Standard says this: "The Low Surrogate Area does not contain any
-character assignments, consequently no character code charts or namelists are
-provided for this area. Surrogates are reserved for use with UTF-16 and then
-must be used in pairs." The code points that are encoded by UTF-16 pairs are
-available as independent code points in the UTF-8 encoding. (In other words,
-the whole surrogate thing is a fudge for UTF-16 which unfortunately messes up
-UTF-8.)
-.P
-If an invalid UTF-8 string is passed to PCRE, an error return is given. At
-compile time, the only additional information is the offset to the first byte
-of the failing character. The runtime functions (\fBpcre_exec()\fP and
-\fBpcre_dfa_exec()\fP), pass back this information as well as a more detailed
-reason code if the caller has provided memory in which to do this.
-.P
-In some situations, you may already know that your strings are valid, and
-therefore want to skip these checks in order to improve performance. If you set
-the PCRE_NO_UTF8_CHECK flag at compile time or at run time, PCRE assumes that
-the pattern or subject it is given (respectively) contains only valid UTF-8
-codes. In this case, it does not diagnose an invalid UTF-8 string.
-.P
-If you pass an invalid UTF-8 string when PCRE_NO_UTF8_CHECK is set, what
-happens depends on why the string is invalid. If the string conforms to the
-"old" definition of UTF-8 (RFC 2279), it is processed as a string of characters
-in the range 0 to 0x7FFFFFFF. In other words, apart from the initial validity
-test, PCRE (when in UTF-8 mode) handles strings according to the more liberal
-rules of RFC 2279. However, if the string does not even conform to RFC 2279,
-the result is undefined. Your program may crash.
-.P
-If you want to process strings of values in the full range 0 to 0x7FFFFFFF,
-encoded in a UTF-8-like manner as per the old RFC, you can set
-PCRE_NO_UTF8_CHECK to bypass the more restrictive test. However, in this
-situation, you will have to apply your own validity check.
-.
-.
-.SS "General comments about UTF-8 mode"
-.rs
-.sp
-1. An unbraced hexadecimal escape sequence (such as \exb3) matches a two-byte
-UTF-8 character if the value is greater than 127.
-.P
-2. Octal numbers up to \e777 are recognized, and match two-byte UTF-8
-characters for values greater than \e177.
-.P
-3. Repeat quantifiers apply to complete UTF-8 characters, not to individual
-bytes, for example: \ex{100}{3}.
-.P
-4. The dot metacharacter matches one UTF-8 character instead of a single byte.
-.P
-5. The escape sequence \eC can be used to match a single byte in UTF-8 mode,
-but its use can lead to some strange effects. This facility is not available in
-the alternative matching function, \fBpcre_dfa_exec()\fP.
-.P
-6. The character escapes \eb, \eB, \ed, \eD, \es, \eS, \ew, and \eW correctly
-test characters of any code value, but, by default, the characters that PCRE
-recognizes as digits, spaces, or word characters remain the same set as before,
-all with values less than 256. This remains true even when PCRE is built to
-include Unicode property support, because to do otherwise would slow down PCRE
-in many common cases. Note in particular that this applies to \eb and \eB,
-because they are defined in terms of \ew and \eW. If you really want to test
-for a wider sense of, say, "digit", you can use explicit Unicode property tests
-such as \ep{Nd}. Alternatively, if you set the PCRE_UCP option, the way that
-the character escapes work is changed so that Unicode properties are used to
-determine which characters match. There are more details in the section on
-.\" HTML <a href="pcrepattern.html#genericchartypes">
-.\" </a>
-generic character types
-.\"
-in the
-.\" HREF
-\fBpcrepattern\fP
-.\"
-documentation.
-.P
-7. Similarly, characters that match the POSIX named character classes are all
-low-valued characters, unless the PCRE_UCP option is set.
-.P
-8. However, the horizontal and vertical whitespace matching escapes (\eh, \eH,
-\ev, and \eV) do match all the appropriate Unicode characters, whether or not
-PCRE_UCP is set.
-.P
-9. Case-insensitive matching applies only to characters whose values are less
-than 128, unless PCRE is built with Unicode property support. Even when Unicode
-property support is available, PCRE still uses its own character tables when
-checking the case of low-valued characters, so as not to degrade performance.
-The Unicode property information is used only for characters with higher
-values. Furthermore, PCRE supports case-insensitive matching only when there is
-a one-to-one mapping between a letter's cases. There are a small number of
-many-to-one mappings in Unicode; these are not supported by PCRE.
-.
-.
 .SH AUTHOR
 .rs
 .sp
@@ -308,6 +133,6 @@
 .rs
 .sp
 .nf
-Last updated: 07 May 2011
+Last updated: 24 August 2011
 Copyright (c) 1997-2011 University of Cambridge.
 .fi

Modified: code/trunk/doc/pcre.txt
===================================================================
--- code/trunk/doc/pcre.txt    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/doc/pcre.txt    2011-08-28 15:23:03 UTC (rev 678)
@@ -85,6 +85,8 @@
          pcrecpp           details of the C++ wrapper
          pcredemo          a demonstration C program that uses PCRE
          pcregrep          description of the pcregrep command
+         pcrejit           discussion of the just-in-time optimization support
+         pcrelimits        details of size and other limits
          pcrematching      discussion of the two matching algorithms
          pcrepartial       details of the partial matching facility
          pcrepattern       syntax and semantics of supported
@@ -96,169 +98,12 @@
          pcrestack         discussion of stack usage
          pcresyntax        quick syntax reference
          pcretest          description of the pcretest testing command
+         pcreunicode       discussion of Unicode and UTF-8 support

        In  addition,  in the "man" and HTML formats, there is a short page for
        each C library function, listing its arguments and results.

-LIMITATIONS
-
-       There are some size limitations in PCRE but it is hoped that they  will
-       never in practice be relevant.
-
-       The  maximum  length of a compiled pattern is 65539 (sic) bytes if PCRE
-       is compiled with the default internal linkage size of 2. If you want to
-       process  regular  expressions  that are truly enormous, you can compile
-       PCRE with an internal linkage size of 3 or 4 (see the  README  file  in
-       the  source  distribution and the pcrebuild documentation for details).
-       In these cases the limit is substantially larger.  However,  the  speed
-       of execution is slower.
-
-       All values in repeating quantifiers must be less than 65536.
-
-       There is no limit to the number of parenthesized subpatterns, but there
-       can be no more than 65535 capturing subpatterns.
-
-       The maximum length of name for a named subpattern is 32 characters, and
-       the maximum number of named subpatterns is 10000.
-
-       The  maximum  length of a subject string is the largest positive number
-       that an integer variable can hold. However, when using the  traditional
-       matching function, PCRE uses recursion to handle subpatterns and indef-
-       inite repetition.  This means that the available stack space may  limit
-       the size of a subject string that can be processed by certain patterns.
-       For a discussion of stack issues, see the pcrestack documentation.
-
-
-UTF-8 AND UNICODE PROPERTY SUPPORT
-
-       From release 3.3, PCRE has  had  some  support  for  character  strings
-       encoded  in the UTF-8 format. For release 4.0 this was greatly extended
-       to cover most common requirements, and in release 5.0  additional  sup-
-       port for Unicode general category properties was added.
-
-       In  order  process  UTF-8 strings, you must build PCRE to include UTF-8
-       support in the code, and, in addition,  you  must  call  pcre_compile()
-       with  the  PCRE_UTF8  option  flag,  or the pattern must start with the
-       sequence (*UTF8). When either of these is the case,  both  the  pattern
-       and  any  subject  strings  that  are matched against it are treated as
-       UTF-8 strings instead of strings of 1-byte characters.
-
-       If you compile PCRE with UTF-8 support, but do not use it at run  time,
-       the  library will be a bit bigger, but the additional run time overhead
-       is limited to testing the PCRE_UTF8 flag occasionally, so should not be
-       very big.
-
-       If PCRE is built with Unicode character property support (which implies
-       UTF-8 support), the escape sequences \p{..}, \P{..}, and  \X  are  sup-
-       ported.  The available properties that can be tested are limited to the
-       general category properties such as Lu for an upper case letter  or  Nd
-       for  a  decimal number, the Unicode script names such as Arabic or Han,
-       and the derived properties Any and L&. A full  list  is  given  in  the
-       pcrepattern documentation. Only the short names for properties are sup-
-       ported. For example, \p{L} matches a letter. Its Perl synonym,  \p{Let-
-       ter},  is  not  supported.   Furthermore,  in Perl, many properties may
-       optionally be prefixed by "Is", for compatibility with Perl  5.6.  PCRE
-       does not support this.
-
-   Validity of UTF-8 strings
-
-       When  you  set  the  PCRE_UTF8 flag, the strings passed as patterns and
-       subjects are (by default) checked for validity on entry to the relevant
-       functions.  From  release 7.3 of PCRE, the check is according the rules
-       of RFC 3629, which are themselves derived from the  Unicode  specifica-
-       tion.  Earlier  releases  of PCRE followed the rules of RFC 2279, which
-       allows the full range of 31-bit values (0 to 0x7FFFFFFF).  The  current
-       check allows only values in the range U+0 to U+10FFFF, excluding U+D800
-       to U+DFFF.
-
-       The excluded code points are the "Low Surrogate Area"  of  Unicode,  of
-       which  the Unicode Standard says this: "The Low Surrogate Area does not
-       contain any  character  assignments,  consequently  no  character  code
-       charts or namelists are provided for this area. Surrogates are reserved
-       for use with UTF-16 and then must be used in pairs."  The  code  points
-       that  are  encoded  by  UTF-16  pairs are available as independent code
-       points in the UTF-8 encoding. (In  other  words,  the  whole  surrogate
-       thing is a fudge for UTF-16 which unfortunately messes up UTF-8.)
-
-       If an invalid UTF-8 string is passed to PCRE, an error return is given.
-       At compile time, the only additional information is the offset  to  the
-       first byte of the failing character. The runtime functions (pcre_exec()
-       and pcre_dfa_exec()), pass back this information  as  well  as  a  more
-       detailed  reason  code if the caller has provided memory in which to do
-       this.
-
-       In some situations, you may already know that your strings  are  valid,
-       and  therefore  want  to  skip these checks in order to improve perfor-
-       mance. If you set the PCRE_NO_UTF8_CHECK flag at compile time or at run
-       time,  PCRE  assumes  that  the pattern or subject it is given (respec-
-       tively) contains only valid UTF-8 codes. In  this  case,  it  does  not
-       diagnose an invalid UTF-8 string.
-
-       If  you  pass  an  invalid UTF-8 string when PCRE_NO_UTF8_CHECK is set,
-       what happens depends on why the string is invalid. If the  string  con-
-       forms to the "old" definition of UTF-8 (RFC 2279), it is processed as a
-       string of characters in the range 0  to  0x7FFFFFFF.  In  other  words,
-       apart from the initial validity test, PCRE (when in UTF-8 mode) handles
-       strings according to the more liberal rules of RFC  2279.  However,  if
-       the  string does not even conform to RFC 2279, the result is undefined.
-       Your program may crash.
-
-       If you want to process strings  of  values  in  the  full  range  0  to
-       0x7FFFFFFF,  encoded in a UTF-8-like manner as per the old RFC, you can
-       set PCRE_NO_UTF8_CHECK to bypass the more restrictive test. However, in
-       this situation, you will have to apply your own validity check.
-
-   General comments about UTF-8 mode
-
-       1.  An  unbraced  hexadecimal  escape sequence (such as \xb3) matches a
-       two-byte UTF-8 character if the value is greater than 127.
-
-       2. Octal numbers up to \777 are recognized, and  match  two-byte  UTF-8
-       characters for values greater than \177.
-
-       3.  Repeat quantifiers apply to complete UTF-8 characters, not to indi-
-       vidual bytes, for example: \x{100}{3}.
-
-       4. The dot metacharacter matches one UTF-8 character instead of a  sin-
-       gle byte.
-
-       5.  The  escape sequence \C can be used to match a single byte in UTF-8
-       mode, but its use can lead to some strange effects.  This  facility  is
-       not available in the alternative matching function, pcre_dfa_exec().
-
-       6.  The  character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly
-       test characters of any code value, but, by default, the characters that
-       PCRE  recognizes  as digits, spaces, or word characters remain the same
-       set as before, all with values less than 256. This  remains  true  even
-       when  PCRE  is built to include Unicode property support, because to do
-       otherwise would slow down PCRE in many common cases. Note in particular
-       that this applies to \b and \B, because they are defined in terms of \w
-       and \W. If you really want to test for a wider sense of, say,  "digit",
-       you  can  use  explicit Unicode property tests such as \p{Nd}. Alterna-
-       tively, if you set the PCRE_UCP option,  the  way  that  the  character
-       escapes  work  is changed so that Unicode properties are used to deter-
-       mine which characters match. There are more details in the  section  on
-       generic character types in the pcrepattern documentation.
-
-       7.  Similarly,  characters that match the POSIX named character classes
-       are all low-valued characters, unless the PCRE_UCP option is set.
-
-       8. However, the horizontal and  vertical  whitespace  matching  escapes
-       (\h,  \H,  \v, and \V) do match all the appropriate Unicode characters,
-       whether or not PCRE_UCP is set.
-
-       9. Case-insensitive matching applies only to  characters  whose  values
-       are  less than 128, unless PCRE is built with Unicode property support.
-       Even when Unicode property support is available, PCRE  still  uses  its
-       own  character  tables when checking the case of low-valued characters,
-       so as not to degrade performance.  The Unicode property information  is
-       used only for characters with higher values. Furthermore, PCRE supports
-       case-insensitive matching only  when  there  is  a  one-to-one  mapping
-       between  a letter's cases. There are a small number of many-to-one map-
-       pings in Unicode; these are not supported by PCRE.
-
-
 AUTHOR

        Philip Hazel
@@ -272,11 +117,11 @@

REVISION

-       Last updated: 07 May 2011
+       Last updated: 24 August 2011
        Copyright (c) 1997-2011 University of Cambridge.
 ------------------------------------------------------------------------------
-
-
+ 
+ 
 PCREBUILD(3)                                                      PCREBUILD(3)

@@ -622,8 +467,8 @@
        Last updated: 02 August 2011
        Copyright (c) 1997-2011 University of Cambridge.
 ------------------------------------------------------------------------------
-
-
+ 
+ 
 PCREMATCHING(3)                                                PCREMATCHING(3)

@@ -826,8 +671,8 @@
        Last updated: 17 November 2010
        Copyright (c) 1997-2010 University of Cambridge.
 ------------------------------------------------------------------------------
-
-
+ 
+ 
 PCREAPI(3)                                                          PCREAPI(3)

@@ -1453,8 +1298,8 @@
        strings  of  UTF-8 characters instead of single-byte character strings.
        However, it is available only when PCRE is built to include UTF-8  sup-
        port.  If not, the use of this option provokes an error. Details of how
-       this option changes the behaviour of PCRE are given in the  section  on
-       UTF-8 support in the main pcre page.
+       this option changes the behaviour of PCRE are given in the  pcreunicode
+       page.

          PCRE_NO_UTF8_CHECK

@@ -2998,8 +2843,8 @@
        Last updated: 13 August 2011
        Copyright (c) 1997-2011 University of Cambridge.
 ------------------------------------------------------------------------------
-
-
+ 
+ 
 PCRECALLOUT(3)                                                  PCRECALLOUT(3)

@@ -3187,8 +3032,8 @@
        Last updated: 31 July 2011
        Copyright (c) 1997-2011 University of Cambridge.
 ------------------------------------------------------------------------------
-
-
+ 
+ 
 PCRECOMPAT(3)                                                    PCRECOMPAT(3)

@@ -3203,54 +3048,53 @@
        respect to Perl versions 5.10 and above.

        1.  PCRE has only a subset of Perl's UTF-8 and Unicode support. Details
-       of what it does have are given in the section on UTF-8 support  in  the
-       main pcre page.
+       of what it does have are given in the pcreunicode page.

        2. PCRE allows repeat quantifiers only on parenthesized assertions, but
-       they do not mean what you might think. For example, (?!a){3}  does  not
+       they  do  not mean what you might think. For example, (?!a){3} does not
        assert that the next three characters are not "a". It just asserts that
        the next character is not "a" three times (in principle: PCRE optimizes
        this to run the assertion just once). Perl allows repeat quantifiers on
        other assertions such as \b, but these do not seem to have any use.

-       3. Capturing subpatterns that occur inside  negative  lookahead  asser-
-       tions  are  counted,  but their entries in the offsets vector are never
-       set. Perl sets its numerical variables from any such patterns that  are
+       3.  Capturing  subpatterns  that occur inside negative lookahead asser-
+       tions are counted, but their entries in the offsets  vector  are  never
+       set.  Perl sets its numerical variables from any such patterns that are
        matched before the assertion fails to match something (thereby succeed-
-       ing), but only if the negative lookahead assertion  contains  just  one
+       ing),  but  only  if the negative lookahead assertion contains just one
        branch.

-       4.  Though  binary zero characters are supported in the subject string,
+       4. Though binary zero characters are supported in the  subject  string,
        they are not allowed in a pattern string because it is passed as a nor-
        mal C string, terminated by zero. The escape sequence \0 can be used in
        the pattern to represent a binary zero.

-       5. The following Perl escape sequences are not supported: \l,  \u,  \L,
-       \U,  and  \N when followed by a character name or Unicode value. (\N on
+       5.  The  following Perl escape sequences are not supported: \l, \u, \L,
+       \U, and \N when followed by a character name or Unicode value.  (\N  on
        its own, matching a non-newline character, is supported.) In fact these
-       are  implemented  by Perl's general string-handling and are not part of
-       its pattern matching engine. If any of these are encountered  by  PCRE,
+       are implemented by Perl's general string-handling and are not  part  of
+       its  pattern  matching engine. If any of these are encountered by PCRE,
        an error is generated.

-       6.  The Perl escape sequences \p, \P, and \X are supported only if PCRE
-       is built with Unicode character property support. The  properties  that
-       can  be tested with \p and \P are limited to the general category prop-
-       erties such as Lu and Nd, script names such as Greek or  Han,  and  the
-       derived  properties  Any  and  L&. PCRE does support the Cs (surrogate)
-       property, which Perl does not; the  Perl  documentation  says  "Because
+       6. The Perl escape sequences \p, \P, and \X are supported only if  PCRE
+       is  built  with Unicode character property support. The properties that
+       can be tested with \p and \P are limited to the general category  prop-
+       erties  such  as  Lu and Nd, script names such as Greek or Han, and the
+       derived properties Any and L&. PCRE does  support  the  Cs  (surrogate)
+       property,  which  Perl  does  not; the Perl documentation says "Because
        Perl hides the need for the user to understand the internal representa-
-       tion of Unicode characters, there is no need to implement the  somewhat
+       tion  of Unicode characters, there is no need to implement the somewhat
        messy concept of surrogates."

-       7.  PCRE implements a simpler version of \X than Perl, which changed to
-       make \X match what Unicode calls an "extended grapheme  cluster".  This
-       is  more  complicated  than an extended Unicode sequence, which is what
+       7. PCRE implements a simpler version of \X than Perl, which changed  to
+       make  \X  match what Unicode calls an "extended grapheme cluster". This
+       is more complicated than an extended Unicode sequence,  which  is  what
        PCRE matches.

        8. PCRE does support the \Q...\E escape for quoting substrings. Charac-
-       ters  in  between  are  treated as literals. This is slightly different
-       from Perl in that $ and @ are  also  handled  as  literals  inside  the
-       quotes.  In Perl, they cause variable interpolation (but of course PCRE
+       ters in between are treated as literals.  This  is  slightly  different
+       from  Perl  in  that  $  and  @ are also handled as literals inside the
+       quotes. In Perl, they cause variable interpolation (but of course  PCRE
        does not have variables). Note the following examples:

            Pattern            PCRE matches      Perl matches
@@ -3260,60 +3104,60 @@
            \Qabc\$xyz\E       abc\$xyz          abc\$xyz
            \Qabc\E\$\Qxyz\E   abc$xyz           abc$xyz

-       The \Q...\E sequence is recognized both inside  and  outside  character
+       The  \Q...\E  sequence  is recognized both inside and outside character
        classes.

        9. Fairly obviously, PCRE does not support the (?{code}) and (??{code})
-       constructions. However, there is support for recursive  patterns.  This
-       is  not  available  in Perl 5.8, but it is in Perl 5.10. Also, the PCRE
-       "callout" feature allows an external function to be called during  pat-
+       constructions.  However,  there is support for recursive patterns. This
+       is not available in Perl 5.8, but it is in Perl 5.10.  Also,  the  PCRE
+       "callout"  feature allows an external function to be called during pat-
        tern matching. See the pcrecallout documentation for details.

-       10.  Subpatterns  that  are  called recursively or as "subroutines" are
-       always treated as atomic groups in  PCRE.  This  is  like  Python,  but
-       unlike  Perl. There is a discussion of an example that explains this in
-       more detail in the section on recursion differences from  Perl  in  the
+       10. Subpatterns that are called recursively  or  as  "subroutines"  are
+       always  treated  as  atomic  groups  in  PCRE. This is like Python, but
+       unlike Perl. There is a discussion of an example that explains this  in
+       more  detail  in  the section on recursion differences from Perl in the
        pcrepattern page.

-       11.  There are some differences that are concerned with the settings of
-       captured strings when part of  a  pattern  is  repeated.  For  example,
-       matching  "aba"  against  the  pattern  /^(a(b)?)+$/  in Perl leaves $2
+       11. There are some differences that are concerned with the settings  of
+       captured  strings  when  part  of  a  pattern is repeated. For example,
+       matching "aba" against the  pattern  /^(a(b)?)+$/  in  Perl  leaves  $2
        unset, but in PCRE it is set to "b".

-       12. PCRE's handling of duplicate subpattern numbers and duplicate  sub-
+       12.  PCRE's handling of duplicate subpattern numbers and duplicate sub-
        pattern names is not as general as Perl's. This is a consequence of the
        fact the PCRE works internally just with numbers, using an external ta-
-       ble  to  translate  between numbers and names. In particular, a pattern
-       such as (?|(?<a>A)|(?<b)B), where the two  capturing  parentheses  have
-       the  same  number  but different names, is not supported, and causes an
-       error at compile time. If it were allowed, it would not be possible  to
-       distinguish  which  parentheses matched, because both names map to cap-
+       ble to translate between numbers and names. In  particular,  a  pattern
+       such  as  (?|(?<a>A)|(?<b)B),  where the two capturing parentheses have
+       the same number but different names, is not supported,  and  causes  an
+       error  at compile time. If it were allowed, it would not be possible to
+       distinguish which parentheses matched, because both names map  to  cap-
        turing subpattern number 1. To avoid this confusing situation, an error
        is given at compile time.

-       13.  Perl  recognizes  comments  in some places that PCRE does not, for
-       example, between the ( and ? at the start of a subpattern.  If  the  /x
-       modifier  is set, Perl allows whitespace between ( and ? but PCRE never
+       13. Perl recognizes comments in some places that  PCRE  does  not,  for
+       example,  between  the  ( and ? at the start of a subpattern. If the /x
+       modifier is set, Perl allows whitespace between ( and ? but PCRE  never
        does, even if the PCRE_EXTENDED option is set.

        14. PCRE provides some extensions to the Perl regular expression facil-
-       ities.   Perl  5.10  includes new features that are not in earlier ver-
-       sions of Perl, some of which (such as named parentheses) have  been  in
+       ities.  Perl 5.10 includes new features that are not  in  earlier  ver-
+       sions  of  Perl, some of which (such as named parentheses) have been in
        PCRE for some time. This list is with respect to Perl 5.10:

-       (a)  Although  lookbehind  assertions  in  PCRE must match fixed length
-       strings, each alternative branch of a lookbehind assertion can match  a
-       different  length  of  string.  Perl requires them all to have the same
+       (a) Although lookbehind assertions in  PCRE  must  match  fixed  length
+       strings,  each alternative branch of a lookbehind assertion can match a
+       different length of string. Perl requires them all  to  have  the  same
        length.

-       (b) If PCRE_DOLLAR_ENDONLY is set and PCRE_MULTILINE is not set, the  $
+       (b)  If PCRE_DOLLAR_ENDONLY is set and PCRE_MULTILINE is not set, the $
        meta-character matches only at the very end of the string.

        (c) If PCRE_EXTRA is set, a backslash followed by a letter with no spe-
        cial meaning is faulted. Otherwise, like Perl, the backslash is quietly
        ignored.  (Perl can be made to issue a warning.)

-       (d)  If  PCRE_UNGREEDY is set, the greediness of the repetition quanti-
+       (d) If PCRE_UNGREEDY is set, the greediness of the  repetition  quanti-
        fiers is inverted, that is, by default they are not greedy, but if fol-
        lowed by a question mark they are.

@@ -3321,10 +3165,10 @@
        tried only at the first matching position in the subject string.

        (f) The PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART,
-       and  PCRE_NO_AUTO_CAPTURE  options for pcre_exec() have no Perl equiva-
+       and PCRE_NO_AUTO_CAPTURE options for pcre_exec() have no  Perl  equiva-
        lents.

-       (g) The \R escape sequence can be restricted to match only CR,  LF,  or
+       (g)  The  \R escape sequence can be restricted to match only CR, LF, or
        CRLF by the PCRE_BSR_ANYCRLF option.

        (h) The callout facility is PCRE-specific.
@@ -3334,10 +3178,10 @@
        (j) Patterns compiled by PCRE can be saved and re-used at a later time,
        even on different hosts that have the other endianness.

-       (k) The alternative matching function (pcre_dfa_exec())  matches  in  a
+       (k)  The  alternative  matching function (pcre_dfa_exec()) matches in a
        different way and is not Perl-compatible.

-       (l)  PCRE  recognizes some special sequences such as (*CR) at the start
+       (l) PCRE recognizes some special sequences such as (*CR) at  the  start
        of a pattern that set overall options that cannot be changed within the
        pattern.

@@ -3351,11 +3195,11 @@

REVISION

-       Last updated: 24 July 2011
+       Last updated: 24 August 2011
        Copyright (c) 1997-2011 University of Cambridge.
 ------------------------------------------------------------------------------
-
-
+ 
+ 
 PCREPATTERN(3)                                                  PCREPATTERN(3)

@@ -3391,8 +3235,8 @@
        Starting a pattern with this sequence  is  equivalent  to  setting  the
        PCRE_UTF8  option.  This  feature  is  not Perl-compatible. How setting
        UTF-8 mode affects pattern matching  is  mentioned  in  several  places
-       below.  There  is  also  a  summary of UTF-8 features in the section on
-       UTF-8 support in the main pcre page.
+       below.  There  is  also  a summary of UTF-8 features in the pcreunicode
+       page.

        Another special sequence that may appear at the start of a  pattern  or
        in combination with (*UTF8) is:
@@ -5860,11 +5704,11 @@

REVISION

-       Last updated: 24 July 2011
+       Last updated: 24 August 2011
        Copyright (c) 1997-2011 University of Cambridge.
 ------------------------------------------------------------------------------
-
-
+ 
+ 
 PCRESYNTAX(3)                                                    PCRESYNTAX(3)

@@ -6233,8 +6077,157 @@
        Last updated: 21 November 2010
        Copyright (c) 1997-2010 University of Cambridge.
 ------------------------------------------------------------------------------
+ 
+ 
+PCREUNICODE(3)                                                  PCREUNICODE(3)

+NAME
+       PCRE - Perl-compatible regular expressions
+
+
+UTF-8 AND UNICODE PROPERTY SUPPORT
+
+       In  order  process  UTF-8 strings, you must build PCRE to include UTF-8
+       support in the code, and, in addition,  you  must  call  pcre_compile()
+       with  the  PCRE_UTF8  option  flag,  or the pattern must start with the
+       sequence (*UTF8). When either of these is the case,  both  the  pattern
+       and  any  subject  strings  that  are matched against it are treated as
+       UTF-8 strings instead of strings of 1-byte characters.  PCRE  does  not
+       support any other formats (in particular, it does not support UTF-16).
+
+       If  you compile PCRE with UTF-8 support, but do not use it at run time,
+       the library will be a bit bigger, but the additional run time  overhead
+       is limited to testing the PCRE_UTF8 flag occasionally, so should not be
+       very big.
+
+       If PCRE is built with Unicode character property support (which implies
+       UTF-8  support),  the  escape sequences \p{..}, \P{..}, and \X are sup-
+       ported.  The available properties that can be tested are limited to the
+       general  category  properties such as Lu for an upper case letter or Nd
+       for a decimal number, the Unicode script names such as Arabic  or  Han,
+       and  the  derived  properties  Any  and L&. A full list is given in the
+       pcrepattern documentation. Only the short names for properties are sup-
+       ported.  For example, \p{L} matches a letter. Its Perl synonym, \p{Let-
+       ter}, is not supported.  Furthermore,  in  Perl,  many  properties  may
+       optionally  be  prefixed by "Is", for compatibility with Perl 5.6. PCRE
+       does not support this.
+
+   Validity of UTF-8 strings
+
+       When you set the PCRE_UTF8 flag, the strings  passed  as  patterns  and
+       subjects are (by default) checked for validity on entry to the relevant
+       functions. From release 7.3 of PCRE, the check is according  the  rules
+       of  RFC  3629, which are themselves derived from the Unicode specifica-
+       tion. Earlier releases of PCRE followed the rules of  RFC  2279,  which
+       allows  the  full range of 31-bit values (0 to 0x7FFFFFFF). The current
+       check allows only values in the range U+0 to U+10FFFF, excluding U+D800
+       to U+DFFF.
+
+       The  excluded  code  points are the "Low Surrogate Area" of Unicode, of
+       which the Unicode Standard says this: "The Low Surrogate Area does  not
+       contain  any  character  assignments,  consequently  no  character code
+       charts or namelists are provided for this area. Surrogates are reserved
+       for  use  with  UTF-16 and then must be used in pairs." The code points
+       that are encoded by UTF-16 pairs  are  available  as  independent  code
+       points  in  the  UTF-8  encoding.  (In other words, the whole surrogate
+       thing is a fudge for UTF-16 which unfortunately messes up UTF-8.)
+
+       If an invalid UTF-8 string is passed to PCRE, an error return is given.
+       At  compile  time, the only additional information is the offset to the
+       first byte of the failing character. The runtime functions  pcre_exec()
+       and  pcre_dfa_exec() also pass back this information, as well as a more
+       detailed reason code if the caller has provided memory in which  to  do
+       this.
+
+       In  some  situations, you may already know that your strings are valid,
+       and therefore want to skip these checks in  order  to  improve  perfor-
+       mance. If you set the PCRE_NO_UTF8_CHECK flag at compile time or at run
+       time, PCRE assumes that the pattern or subject  it  is  given  (respec-
+       tively)  contains  only  valid  UTF-8  codes. In this case, it does not
+       diagnose an invalid UTF-8 string.
+
+       If you pass an invalid UTF-8 string  when  PCRE_NO_UTF8_CHECK  is  set,
+       what  happens  depends on why the string is invalid. If the string con-
+       forms to the "old" definition of UTF-8 (RFC 2279), it is processed as a
+       string  of  characters  in  the  range 0 to 0x7FFFFFFF. In other words,
+       apart from the initial validity test, PCRE (when in UTF-8 mode) handles
+       strings  according  to  the more liberal rules of RFC 2279. However, if
+       the string does not even conform to RFC 2279, the result is  undefined.
+       Your program may crash.
+
+       If  you  want  to  process  strings  of  values  in the full range 0 to
+       0x7FFFFFFF, encoded in a UTF-8-like manner as per the old RFC, you  can
+       set PCRE_NO_UTF8_CHECK to bypass the more restrictive test. However, in
+       this situation, you will have to apply your own validity check.
+
+   General comments about UTF-8 mode
+
+       1. An unbraced hexadecimal escape sequence (such  as  \xb3)  matches  a
+       two-byte UTF-8 character if the value is greater than 127.
+
+       2.  Octal  numbers  up to \777 are recognized, and match two-byte UTF-8
+       characters for values greater than \177.
+
+       3. Repeat quantifiers apply to complete UTF-8 characters, not to  indi-
+       vidual bytes, for example: \x{100}{3}.
+
+       4.  The dot metacharacter matches one UTF-8 character instead of a sin-
+       gle byte.
+
+       5. The escape sequence \C can be used to match a single byte  in  UTF-8
+       mode,  but  its  use can lead to some strange effects. This facility is
+       not available in the alternative matching function, pcre_dfa_exec().
+
+       6. The character escapes \b, \B, \d, \D, \s, \S, \w, and  \W  correctly
+       test characters of any code value, but, by default, the characters that
+       PCRE recognizes as digits, spaces, or word characters remain  the  same
+       set  as  before,  all with values less than 256. This remains true even
+       when PCRE is built to include Unicode property support, because  to  do
+       otherwise would slow down PCRE in many common cases. Note in particular
+       that this applies to \b and \B, because they are defined in terms of \w
+       and  \W. If you really want to test for a wider sense of, say, "digit",
+       you can use explicit Unicode property tests such  as  \p{Nd}.  Alterna-
+       tively,  if  you  set  the  PCRE_UCP option, the way that the character
+       escapes work is changed so that Unicode properties are used  to  deter-
+       mine  which  characters match. There are more details in the section on
+       generic character types in the pcrepattern documentation.
+
+       7. Similarly, characters that match the POSIX named  character  classes
+       are all low-valued characters, unless the PCRE_UCP option is set.
+
+       8.  However,  the  horizontal  and vertical whitespace matching escapes
+       (\h, \H, \v, and \V) do match all the appropriate  Unicode  characters,
+       whether or not PCRE_UCP is set.
+
+       9.  Case-insensitive  matching  applies only to characters whose values
+       are less than 128, unless PCRE is built with Unicode property  support.
+       Even  when  Unicode  property support is available, PCRE still uses its
+       own character tables when checking the case of  low-valued  characters,
+       so  as not to degrade performance.  The Unicode property information is
+       used only for characters with higher values. Furthermore, PCRE supports
+       case-insensitive  matching  only  when  there  is  a one-to-one mapping
+       between a letter's cases. There are a small number of many-to-one  map-
+       pings in Unicode; these are not supported by PCRE.
+
+
+AUTHOR
+
+       Philip Hazel
+       University Computing Service
+       Cambridge CB2 3QH, England.
+
+
+REVISION
+
+       Last updated: 24 August 2011
+       Copyright (c) 1997-2011 University of Cambridge.
+------------------------------------------------------------------------------
+ 
+ 
+------------------------------------------------------------------------------
+ 
+ 
 PCREPARTIAL(3)                                                  PCREPARTIAL(3)

@@ -6653,8 +6646,8 @@
        Last updated: 07 November 2010
        Copyright (c) 1997-2010 University of Cambridge.
 ------------------------------------------------------------------------------
-
-
+ 
+ 
 PCREPRECOMPILE(3)                                            PCREPRECOMPILE(3)

@@ -6778,8 +6771,8 @@
        Last updated: 17 November 2010
        Copyright (c) 1997-2010 University of Cambridge.
 ------------------------------------------------------------------------------
-
-
+ 
+ 
 PCREPERFORM(3)                                                  PCREPERFORM(3)

@@ -6946,8 +6939,8 @@
        Last updated: 16 May 2010
        Copyright (c) 1997-2010 University of Cambridge.
 ------------------------------------------------------------------------------
-
-
+ 
+ 
 PCREPOSIX(3)                                                      PCREPOSIX(3)

@@ -7209,8 +7202,8 @@
        Last updated: 16 May 2010
        Copyright (c) 1997-2010 University of Cambridge.
 ------------------------------------------------------------------------------
-
-
+ 
+ 
 PCRECPP(3)                                                          PCRECPP(3)

@@ -7551,8 +7544,8 @@
        Last updated: 17 March 2009
        Minor typo fixed: 25 July 2011
 ------------------------------------------------------------------------------
-
-
+ 
+ 
 PCRESAMPLE(3)                                                    PCRESAMPLE(3)

@@ -7638,6 +7631,56 @@
        Last updated: 17 November 2010
        Copyright (c) 1997-2010 University of Cambridge.
 ------------------------------------------------------------------------------
+PCRELIMITS(3)                                                    PCRELIMITS(3)
+
+
+NAME
+       PCRE - Perl-compatible regular expressions
+
+
+SIZE AND OTHER LIMITATIONS
+
+       There  are some size limitations in PCRE but it is hoped that they will
+       never in practice be relevant.
+
+       The maximum length of a compiled pattern is 65539 (sic) bytes  if  PCRE
+       is compiled with the default internal linkage size of 2. If you want to
+       process regular expressions that are truly enormous,  you  can  compile
+       PCRE  with  an  internal linkage size of 3 or 4 (see the README file in
+       the source distribution and the pcrebuild documentation  for  details).
+       In  these  cases the limit is substantially larger.  However, the speed
+       of execution is slower.
+
+       All values in repeating quantifiers must be less than 65536.
+
+       There is no limit to the number of parenthesized subpatterns, but there
+       can be no more than 65535 capturing subpatterns.
+
+       The maximum length of name for a named subpattern is 32 characters, and
+       the maximum number of named subpatterns is 10000.
+
+       The maximum length of a subject string is the largest  positive  number
+       that  an integer variable can hold. However, when using the traditional
+       matching function, PCRE uses recursion to handle subpatterns and indef-
+       inite  repetition.  This means that the available stack space may limit
+       the size of a subject string that can be processed by certain patterns.
+       For a discussion of stack issues, see the pcrestack documentation.
+
+
+AUTHOR
+
+       Philip Hazel
+       University Computing Service
+       Cambridge CB2 3QH, England.
+
+
+REVISION
+
+       Last updated: 24 August 2011
+       Copyright (c) 1997-2011 University of Cambridge.
+------------------------------------------------------------------------------
+ 
+ 
 PCRESTACK(3)                                                      PCRESTACK(3)

@@ -7789,5 +7832,5 @@
        Last updated: 22 July 2011
        Copyright (c) 1997-2011 University of Cambridge.
 ------------------------------------------------------------------------------
-
-
+ 
+

Added: code/trunk/doc/pcre_assign_jit_stack.3
===================================================================
--- code/trunk/doc/pcre_assign_jit_stack.3                            (rev 0)
+++ code/trunk/doc/pcre_assign_jit_stack.3    2011-08-28 15:23:03 UTC (rev 678)
@@ -0,0 +1,49 @@
+.TH PCRE_ASSIGN_JIT_STACK 3
+.SH NAME
+PCRE - Perl-compatible regular expressions
+.SH SYNOPSIS
+.rs
+.sp
+.B #include <pcre.h>
+.PP
+.SM
+.B void pcre_assign_jit_stack(pcre_extra *\fIextra\fP, 
+.ti +5n
+.B pcre_jit_callback \fIcallback\fP, void *\fIdata\fP);
+.
+.SH DESCRIPTION
+.rs
+.sp
+This function provides control over the memory used as a stack at runtime by a
+call to \fBpcre_exec()\fP with a pattern that has been successfully compiled
+with JIT optimization. The arguments are:
+.sp
+  extra     the data pointer returned by \fBpcre_study()\fP
+  callback  a callback function
+  data      a JIT stack or a value to be passed to the callback 
+              function
+.P
+If \fIcallback\fP is NULL and \fIdata\fP is NULL, an internal 32K block on
+the machine stack is used.
+.P
+If \fIcallback\fP is NULL and \fIdata\fP is not NULL, \fIdata\fP must
+be a valid JIT stack, the result of calling \fBpcre_jit_stack_alloc()\fP.
+.P
+If \fIcallback\fP not NULL, it is called with \fIdata\fP as an argument at
+the start of matching, in order to set up a JIT stack. If the result is NULL, 
+the internal 32K stack is used; otherwise the return value must be a valid JIT 
+stack, the result of calling \fBpcre_jit_stack_alloc()\fP.
+.P
+You may safely assign the same JIT stack to multiple patterns, as long as they 
+are all matched in the same thread. In a multithread application, each thread 
+must use its own JIT stack.
+.P
+There is a complete description of the PCRE native API in the
+.\" HREF
+\fBpcreapi\fP
+.\"
+page and a description of the POSIX API in the
+.\" HREF
+\fBpcreposix\fP
+.\"
+page.

Modified: code/trunk/doc/pcre_config.3
===================================================================
--- code/trunk/doc/pcre_config.3    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/doc/pcre_config.3    2011-08-28 15:23:03 UTC (rev 678)
@@ -13,14 +13,18 @@
 .rs
 .sp
 This function makes it possible for a client program to find out which optional
-features are available in the version of the PCRE library it is using. Its
+features are available in the version of the PCRE library it is using. The
 arguments are as follows:
 .sp
   \fIwhat\fP     A code specifying what information is required
   \fIwhere\fP    Points to where to put the data
 .sp
-The available codes are:
+The \fIwhere\fP argument must point to an integer variable, except for
+PCRE_CONFIG_MATCH_LIMIT and PCRE_CONFIG_MATCH_LIMIT_RECURSION, when it must
+point to an unsigned long integer. The available codes are:
 .sp
+  PCRE_CONFIG_JIT           Availability of just-in-time compiler
+                              support (1=yes 0=no)
   PCRE_CONFIG_LINK_SIZE     Internal link size: 2, 3, or 4
   PCRE_CONFIG_MATCH_LIMIT   Internal resource limit
   PCRE_CONFIG_MATCH_LIMIT_RECURSION
@@ -35,9 +39,8 @@
                                  0             all Unicode line endings
                                  1             CR, LF, or CRLF only
   PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
-                            Threshold of return slots, above
-                              which \fBmalloc()\fP is used by
-                              the POSIX API
+                            Threshold of return slots, above which
+                              \fBmalloc()\fP is used by the POSIX API
   PCRE_CONFIG_STACKRECURSE  Recursion implementation (1=stack 0=heap)
   PCRE_CONFIG_UTF8          Availability of UTF-8 support (1=yes 0=no)
   PCRE_CONFIG_UNICODE_PROPERTIES

Modified: code/trunk/doc/pcre_dfa_exec.3
===================================================================
--- code/trunk/doc/pcre_dfa_exec.3    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/doc/pcre_dfa_exec.3    2011-08-28 15:23:03 UTC (rev 678)
@@ -75,17 +75,21 @@
 .P
 A \fBpcre_extra\fP structure contains the following fields:
 .sp
-  \fIflags\fP        Bits indicating which fields are set
-  \fIstudy_data\fP   Opaque data from \fBpcre_study()\fP
-  \fImatch_limit\fP  Limit on internal resource use
+  \fIflags\fP            Bits indicating which fields are set
+  \fIstudy_data\fP       Opaque data from \fBpcre_study()\fP
+  \fImatch_limit\fP      Limit on internal resource use
   \fImatch_limit_recursion\fP  Limit on internal recursion depth
-  \fIcallout_data\fP Opaque data passed back to callouts
-  \fItables\fP       Points to character tables or is NULL
+  \fIcallout_data\fP     Opaque data passed back to callouts
+  \fItables\fP           Points to character tables or is NULL
+  \fImark\fP             For passing back a *MARK pointer
+  \fIexecutable_jit\fP   Opaque data from JIT compilation  
 .sp
 The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
-PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA, and
-PCRE_EXTRA_TABLES. For this matching function, the \fImatch_limit\fP and
-\fImatch_limit_recursion\fP fields are not used, and must not be set.
+PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA,
+PCRE_EXTRA_TABLES, PCRE_EXTRA_MARK and PCRE_EXTRA_EXECUTABLE_JIT. For this
+matching function, the \fImatch_limit\fP and \fImatch_limit_recursion\fP fields
+are not used, and must not be set. The PCRE_EXTRA_EXECUTABLE_JIT flag and 
+the corresponding variable are ignored.
 .P
 There is a complete description of the PCRE native API in the
 .\" HREF

Modified: code/trunk/doc/pcre_exec.3
===================================================================
--- code/trunk/doc/pcre_exec.3    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/doc/pcre_exec.3    2011-08-28 15:23:03 UTC (rev 678)
@@ -61,16 +61,18 @@
 .\"
 page. A \fBpcre_extra\fP structure contains the following fields:
 .sp
-  \fIflags\fP        Bits indicating which fields are set
-  \fIstudy_data\fP   Opaque data from \fBpcre_study()\fP
-  \fImatch_limit\fP  Limit on internal resource use
+  \fIflags\fP            Bits indicating which fields are set
+  \fIstudy_data\fP       Opaque data from \fBpcre_study()\fP
+  \fImatch_limit\fP      Limit on internal resource use
   \fImatch_limit_recursion\fP  Limit on internal recursion depth
-  \fIcallout_data\fP Opaque data passed back to callouts
-  \fItables\fP       Points to character tables or is NULL
+  \fIcallout_data\fP     Opaque data passed back to callouts
+  \fItables\fP           Points to character tables or is NULL
+  \fImark\fP             For passing back a *MARK pointer
+  \fIexecutable_jit\fP   Opaque data from JIT compilation  
 .sp
 The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
-PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA, and
-PCRE_EXTRA_TABLES.
+PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA,
+PCRE_EXTRA_TABLES, PCRE_EXTRA_MARK and PCRE_EXTRA_EXECUTABLE_JIT.
 .P
 There is a complete description of the PCRE native API in the
 .\" HREF

Added: code/trunk/doc/pcre_free_study.3
===================================================================
--- code/trunk/doc/pcre_free_study.3                            (rev 0)
+++ code/trunk/doc/pcre_free_study.3    2011-08-28 15:23:03 UTC (rev 678)
@@ -0,0 +1,27 @@
+.TH PCRE_FREE_STUDY 3
+.SH NAME
+PCRE - Perl-compatible regular expressions
+.SH SYNOPSIS
+.rs
+.sp
+.B #include <pcre.h>
+.PP
+.SM
+.B void pcre_free_study(pcre_extra *\fIextra\fP);
+.
+.SH DESCRIPTION
+.rs
+.sp
+This function is used to free the memory used for the data generated by a call 
+to \fBpcre_study()\fP when it is no longer needed. The argument must be the 
+result of such a call.
+.P
+There is a complete description of the PCRE native API in the
+.\" HREF
+\fBpcreapi\fP
+.\"
+page and a description of the POSIX API in the
+.\" HREF
+\fBpcreposix\fP
+.\"
+page.

Modified: code/trunk/doc/pcre_fullinfo.3
===================================================================
--- code/trunk/doc/pcre_fullinfo.3    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/doc/pcre_fullinfo.3    2011-08-28 15:23:03 UTC (rev 678)
@@ -31,7 +31,9 @@
                                  or after newline, or
                               -2 otherwise
   PCRE_INFO_FIRSTTABLE      Table of first bytes (after studying)
+  PCRE_INFO_HASCRORLF       Return 1 if explicit CR or LF matches exist
   PCRE_INFO_JCHANGED        Return 1 if (?J) or (?-J) was used
+  PCRE_INFO_JIT             Return 1 after successful JIT compilation
   PCRE_INFO_LASTLITERAL     Literal last byte required
   PCRE_INFO_MINLENGTH       Lower bound length of matching strings
   PCRE_INFO_NAMECOUNT       Number of named subpatterns
@@ -43,6 +45,15 @@
   PCRE_INFO_SIZE            Size of compiled pattern
   PCRE_INFO_STUDYSIZE       Size of study data
 .sp
+The \fIwhere\fP argument must point to an integer variable, except for the 
+following \fIwhat\fP values:
+.sp
+  PCRE_INFO_DEFAULT_TABLES  const unsigned char *
+  PCRE_INFO_FIRSTTABLE      const unsigned char *
+  PCRE_INFO_NAMETABLE       const unsigned char *
+  PCRE_INFO_OPTIONS         unsigned long int
+  PCRE_INFO_SIZE            size_t 
+.sp
 The yield of the function is zero on success or:
 .sp
   PCRE_ERROR_NULL           the argument \fIcode\fP was NULL

Added: code/trunk/doc/pcre_jit_stack_alloc.3
===================================================================
--- code/trunk/doc/pcre_jit_stack_alloc.3                            (rev 0)
+++ code/trunk/doc/pcre_jit_stack_alloc.3    2011-08-28 15:23:03 UTC (rev 678)
@@ -0,0 +1,31 @@
+.TH PCRE_JIT_STACK_ALLOC 3
+.SH NAME
+PCRE - Perl-compatible regular expressions
+.SH SYNOPSIS
+.rs
+.sp
+.B #include <pcre.h>
+.PP
+.SM
+.B pcre_jit_stack *pcre_jit_stack_alloc(int \fIstartsize\fP, 
+.ti +5n
+.B int \fImaxsize\fP);
+.
+.SH DESCRIPTION
+.rs
+.sp
+This function is used to create a stack for use by the code compiled by the JIT
+optimization of \fBpcre_study()\fP. The arguments are a starting size for the
+stack, and a maximum size to which it is allowed to grow. The result can be
+passed to the JIT runtime code by \fBpcre_assign_jit_stack()\fP, or that
+function can set up a callback for obtaining a stack.
+.P
+There is a complete description of the PCRE native API in the
+.\" HREF
+\fBpcreapi\fP
+.\"
+page and a description of the POSIX API in the
+.\" HREF
+\fBpcreposix\fP
+.\"
+page.

Added: code/trunk/doc/pcre_jit_stack_free.3
===================================================================
--- code/trunk/doc/pcre_jit_stack_free.3                            (rev 0)
+++ code/trunk/doc/pcre_jit_stack_free.3    2011-08-28 15:23:03 UTC (rev 678)
@@ -0,0 +1,26 @@
+.TH PCRE_JIT_STACK_FREE 3
+.SH NAME
+PCRE - Perl-compatible regular expressions
+.SH SYNOPSIS
+.rs
+.sp
+.B #include <pcre.h>
+.PP
+.SM
+.B void pcre_jit_stack_free(pcre_jit_stack *\fIstack\fP);
+.
+.SH DESCRIPTION
+.rs
+.sp
+This function is used to free a JIT stack that was created by 
+\fBpcre_jit_stack_alloc()\fP when it is no longer needed.
+.P
+There is a complete description of the PCRE native API in the
+.\" HREF
+\fBpcreapi\fP
+.\"
+page and a description of the POSIX API in the
+.\" HREF
+\fBpcreposix\fP
+.\"
+page.

Modified: code/trunk/doc/pcre_study.3
===================================================================
--- code/trunk/doc/pcre_study.3    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/doc/pcre_study.3    2011-08-28 15:23:03 UTC (rev 678)
@@ -22,14 +22,19 @@
   \fIerrptr\fP     Where to put an error message
 .sp
 If the function succeeds, it returns a value that can be passed to
-\fBpcre_exec()\fP via its \fIextra\fP argument.
+\fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP via their \fIextra\fP arguments.
 .P
 If the function returns NULL, either it could not find any additional
 information, or there was an error. You can tell the difference by looking at
 the error value. It is NULL in first case.
 .P
-There are currently no options defined; the value of the second argument should
-always be zero.
+The only option is PCRE_STUDY_JIT_COMPILE. It requests just-in-time compilation 
+if possible. If PCRE has been compiled without JIT support, this option is 
+ignored. See the
+.\"HREF
+\fBpcrejit\fP
+.\"
+page for further details.
 .P
 There is a complete description of the PCRE native API in the
 .\" HREF

Modified: code/trunk/doc/pcreapi.3
===================================================================
--- code/trunk/doc/pcreapi.3    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/doc/pcreapi.3    2011-08-28 15:23:03 UTC (rev 678)
@@ -1,7 +1,7 @@
 .TH PCREAPI 3
 .SH NAME
 PCRE - Perl-compatible regular expressions
-.SH "PCRE NATIVE API"
+.SH "PCRE NATIVE API BASIC FUNCTIONS"
 .rs
 .sp
 .B #include <pcre.h>
@@ -25,12 +25,26 @@
 .ti +5n
 .B const char **\fIerrptr\fP);
 .PP
+.B void pcre_free_study(pcre_extra *\fIextra\fP);
+.PP
 .B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
 .ti +5n
 .B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
 .ti +5n
 .B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
+.
+.
+.SH "PCRE NATIVE API AUXILIARY FUNCTIONS"
+.rs
+.sp
+.B pcre_jit_stack *pcre_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
 .PP
+.B void pcre_jit_stack_free(pcre_jit_stack *\fIstack\fP);
+.PP
+.B void pcre_assign_jit_stack(pcre_extra *\fIextra\fP, 
+.ti +5n
+.B pcre_jit_callback \fIcallback\fP, void *\fIdata\fP);
+.PP
 .B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
 .ti +5n
 .B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
@@ -114,12 +128,13 @@
 .sp
 PCRE has its own native API, which is described in this document. There are
 also some wrapper functions that correspond to the POSIX regular expression
-API. These are described in the
+API, but they do not give access to all the functionality. They are described
+in the
 .\" HREF
 \fBpcreposix\fP
 .\"
 documentation. Both of these APIs define a set of C function calls. A C++
-wrapper is distributed with PCRE. It is documented in the
+wrapper is also distributed with PCRE. It is documented in the
 .\" HREF
 \fBpcrecpp\fP
 .\"
@@ -152,6 +167,18 @@
 .\"
 documentation describes how to compile and run it.
 .P
+Just-in-time compiler support is an optional feature of PCRE that can be built
+in appropriate hardware environments. It greatly speeds up the matching 
+performance of many patterns. Simple programs can request its use if available. 
+More complicated programs might need to make use of the 
+\fBpcre_jit_stack_alloc()\fP, \fBpcre_jit_stack_free()\fP, and 
+\fBpcre_assign_jit_stack()\fP functions in order to control its memory usage.
+These functions are discussed in the
+.\" HREF
+\fBpcrejit\fP
+.\"
+documentation.
+.P
 A second matching function, \fBpcre_dfa_exec()\fP, which is not
 Perl-compatible, is also provided. This uses a different algorithm for the
 matching. The alternative algorithm finds all possible matches (at a given
@@ -282,6 +309,13 @@
 .P
 The compiled form of a regular expression is not altered during matching, so
 the same compiled pattern can safely be used by several threads at once.
+.P
+If the just-in-time optimization feature is being used, it needs separate 
+memory stack areas for each thread. See the
+.\" HREF
+\fBpcrejit\fP
+.\"
+documentation for more details.
 .
 .
 .SH "SAVING PRECOMPILED PATTERNS FOR LATER USE"
@@ -324,6 +358,11 @@
 The output is an integer that is set to one if support for Unicode character
 properties is available; otherwise it is set to zero.
 .sp
+  PCRE_CONFIG_JIT
+.sp
+The output is an integer that is set to one if support for just-in-time 
+compiling is available; otherwise it is set to zero.   
+.sp
   PCRE_CONFIG_NEWLINE
 .sp
 The output is an integer whose value specifies the default character sequence
@@ -701,13 +740,8 @@
 available only when PCRE is built to include UTF-8 support. If not, the use
 of this option provokes an error. Details of how this option changes the
 behaviour of PCRE are given in the
-.\" HTML <a href="pcre.html#utf8support">
-.\" </a>
-section on UTF-8 support
-.\"
-in the main
 .\" HREF
-\fBpcre\fP
+\fBpcreunicode\fP
 .\"
 page.
 .sp
@@ -849,9 +883,24 @@
 wants to pass any of the other fields to \fBpcre_exec()\fP or
 \fBpcre_dfa_exec()\fP, it must set up its own \fBpcre_extra\fP block.
 .P
-The second argument of \fBpcre_study()\fP contains option bits. At present, no
-options are defined, and this argument should always be zero.
+The second argument of \fBpcre_study()\fP contains option bits. There is only 
+one option: PCRE_STUDY_JIT_COMPILE. If this is set, and the just-in-time 
+compiler is available, the pattern is further compiled into machine code that 
+executes much faster than the \fBpcre_exec()\fP matching function. If 
+the just-in-time compiler is not available, this option is ignored. All other 
+bits in the \fIoptions\fP argument must be zero.
 .P
+JIT compilation is a heavyweight optimization. It can take some time for 
+patterns to be analyzed, and for one-off matches and simple patterns the
+benefit of faster execution might be offset by a much slower study time.
+Not all patterns can be optimized by the JIT compiler. For those that cannot be 
+handled, matching automatically falls back to the \fBpcre_exec()\fP
+interpreter. For more details, see the
+.\" HREF
+\fBpcrejit\fP
+.\"
+documentation.
+.P
 The third argument for \fBpcre_study()\fP is a pointer for an error message. If
 studying succeeds (even if no data is returned), the variable it points to is
 set to NULL. Otherwise it is set to point to a textual error message. This is a
@@ -859,13 +908,29 @@
 should test the error pointer for NULL after calling \fBpcre_study()\fP, to be
 sure that it has run successfully.
 .P
-This is a typical call to \fBpcre_study\fP():
+When you are finished with a pattern, you can free the memory used for the
+study data by calling \fBpcre_free_study()\fP. This function was added to the
+API for release 8.20. For earlier versions, the memory could be freed with
+\fBpcre_free()\fP, just like the pattern itself. This will still work in cases
+where PCRE_STUDY_JIT_COMPILE is not used, but it is advisable to change to the
+new function when convenient.
+.P
+This is a typical way in which \fBpcre_study\fP() is used (except that in a 
+real application there should be tests for errors):
 .sp
-  pcre_extra *pe;
-  pe = pcre_study(
+  int rc;
+  pcre *re;
+  pcre_extra *sd;
+  re = pcre_compile("pattern", 0, &error, &erroroffset, NULL);
+  sd = pcre_study(
     re,             /* result of pcre_compile() */
-    0,              /* no options exist */
+    0,              /* no options */
     &error);        /* set to NULL or points to a message */
+  rc = pcre_exec(   /* see below for details of pcre_exec() options */
+    re, sd, "subject", 7, 0, 0, ovector, 30);   
+  ...
+  pcre_free_study(sd);
+  pcre_free(re);  
 .sp
 Studying a pattern does two things: first, a lower bound for the length of
 subject string that is needed to match the pattern is computed. This does not
@@ -880,11 +945,15 @@
 created. This speeds up finding a position in the subject at which to start
 matching.
 .P
-The two optimizations just described can be disabled by setting the
-PCRE_NO_START_OPTIMIZE option when calling \fBpcre_exec()\fP or
+These two optimizations apply to both \fBpcre_exec()\fP and 
+\fBpcre_dfa_exec()\fP. However, they are not used by \fBpcre_exec()\fP if 
+\fBpcre_study()\fP is called with the PCRE_STUDY_JIT_COMPILE option, and
+just-in-time compiling is successful. The optimizations can be disabled by
+setting the PCRE_NO_START_OPTIMIZE option when calling \fBpcre_exec()\fP or
 \fBpcre_dfa_exec()\fP. You might want to do this if your pattern contains
-callouts or (*MARK), and you want to make use of these facilities in cases
-where matching fails. See the discussion of PCRE_NO_START_OPTIMIZE
+callouts or (*MARK) (which cannot be handled by the JIT compiler), and you want
+to make use of these facilities in cases where matching fails. See the
+discussion of PCRE_NO_START_OPTIMIZE
 .\" HTML <a href="#execoptions">
 .\" </a>
 below.
@@ -981,7 +1050,7 @@
   size_t length;
   rc = pcre_fullinfo(
     re,               /* result of pcre_compile() */
-    pe,               /* result of pcre_study(), or NULL */
+    sd,               /* result of pcre_study(), or NULL */
     PCRE_INFO_SIZE,   /* what is required */
     &length);         /* where to put the data */
 .sp
@@ -1046,6 +1115,19 @@
 0. The fourth argument should point to an \fBint\fP variable. (?J) and
 (?-J) set and unset the local PCRE_DUPNAMES option, respectively.
 .sp
+  PCRE_INFO_JIT
+.sp
+Return 1 if the pattern was studied with the PCRE_STUDY_JIT_COMPILE option, and 
+just-in-time compiling was successful. The fourth argument should point to an
+\fBint\fP variable. A return value of 0 means that JIT support is not available
+in this version of PCRE, or that the pattern was not studied with the 
+PCRE_STUDY_JIT_COMPILE option, or that the JIT compiler could not handle this 
+particular pattern. See the
+.\" HREF
+\fBpcrejit\fP
+.\"
+documentation for details of what can and cannot be handled.
+.sp
   PCRE_INFO_LASTLITERAL
 .sp
 Return the value of the rightmost literal byte that must exist in any matched
@@ -1290,6 +1372,7 @@
 .sp
   unsigned long int \fIflags\fP;
   void *\fIstudy_data\fP;
+  void *\fIexecutable_jit\fP; 
   unsigned long int \fImatch_limit\fP;
   unsigned long int \fImatch_limit_recursion\fP;
   void *\fIcallout_data\fP;
@@ -1300,29 +1383,38 @@
 are set. The flag bits are:
 .sp
   PCRE_EXTRA_STUDY_DATA
+  PCRE_EXTRA_EXECUTABLE_JIT 
   PCRE_EXTRA_MATCH_LIMIT
   PCRE_EXTRA_MATCH_LIMIT_RECURSION
   PCRE_EXTRA_CALLOUT_DATA
   PCRE_EXTRA_TABLES
   PCRE_EXTRA_MARK
 .sp
-Other flag bits should be set to zero. The \fIstudy_data\fP field is set in the
-\fBpcre_extra\fP block that is returned by \fBpcre_study()\fP, together with
-the appropriate flag bit. You should not set this yourself, but you may add to
-the block by setting the other fields and their corresponding flag bits.
+Other flag bits should be set to zero. The \fIstudy_data\fP field and sometimes
+the \fIexecutable_jit\fP field are set in the \fBpcre_extra\fP block that is
+returned by \fBpcre_study()\fP, together with the appropriate flag bits. You
+should not set these yourself, but you may add to the block by setting the
+other fields and their corresponding flag bits.
 .P
 The \fImatch_limit\fP field provides a means of preventing PCRE from using up a
 vast amount of resources when running patterns that are not going to match,
 but which have a very large number of possibilities in their search trees. The
 classic example is a pattern that uses nested unlimited repeats.
 .P
-Internally, PCRE uses a function called \fBmatch()\fP which it calls repeatedly
-(sometimes recursively). The limit set by \fImatch_limit\fP is imposed on the
-number of times this function is called during a match, which has the effect of
-limiting the amount of backtracking that can take place. For patterns that are
-not anchored, the count restarts from zero for each position in the subject
-string.
+Internally, \fBpcre_exec()\fP uses a function called \fBmatch()\fP, which it
+calls repeatedly (sometimes recursively). The limit set by \fImatch_limit\fP is
+imposed on the number of times this function is called during a match, which
+has the effect of limiting the amount of backtracking that can take place. For
+patterns that are not anchored, the count restarts from zero for each position
+in the subject string. 
 .P
+When \fBpcre_exec()\fP is called with a pattern that was successfully studied 
+with the PCRE_STUDY_JIT_COMPILE option, the way that the matching is executed 
+is entirely different. However, there is still the possibility of runaway 
+matching that goes on for a very long time, and so the \fImatch_limit\fP value
+is also used in this case (but in a different way) to limit how long the
+matching can continue.
+.P
 The default value for the limit can be set when PCRE is built; the default
 default is 10 million, which handles all but the most extreme cases. You can
 override the default by suppling \fBpcre_exec()\fP with a \fBpcre_extra\fP
@@ -1334,11 +1426,13 @@
 instead of limiting the total number of times that \fBmatch()\fP is called, it
 limits the depth of recursion. The recursion depth is a smaller number than the
 total number of calls, because not all calls to \fBmatch()\fP are recursive.
-This limit is of use only if it is set smaller than \fImatch_limit\fP.
+This limit is of use only if it is set smaller than \fImatch_limit\fP. 
 .P
-Limiting the recursion depth limits the amount of stack that can be used, or,
-when PCRE has been compiled to use memory on the heap instead of the stack, the
-amount of heap memory that can be used.
+Limiting the recursion depth limits the amount of machine stack that can be
+used, or, when PCRE has been compiled to use memory on the heap instead of the
+stack, the amount of heap memory that can be used. This limit is relevant, and
+is ignored, when the pattern was successfully studied with
+PCRE_STUDY_JIT_COMPILE.
 .P
 The default value for \fImatch_limit_recursion\fP can be set when PCRE is
 built; the default default is the same value as the default for
@@ -1885,6 +1979,16 @@
 faulted at compile time, but more complicated cases, in particular mutual
 recursions between two different subpatterns, cannot be detected until run
 time.
+.sp
+  PCRE_ERROR_JIT_STACKLIMIT (-27)
+.sp
+This error is returned when a pattern that was successfully studied using the 
+PCRE_STUDY_JIT_COMPILE option is matched, but the memory available for the 
+just-in-time processing stack is not large enough. See the
+.\" HREF
+\fBpcrejit\fP
+.\"
+documentation for more details.   
 .P
 Error numbers -16 to -20 and -22 are not used by \fBpcre_exec()\fP.
 .
@@ -2354,8 +2458,9 @@
   PCRE_ERROR_DFA_UMLIMIT    (-18)
 .sp
 This return is given if \fBpcre_dfa_exec()\fP is called with an \fIextra\fP
-block that contains a setting of the \fImatch_limit\fP field. This is not
-supported (it is meaningless).
+block that contains a setting of the \fImatch_limit\fP or
+\fImatch_limit_recursion\fP fields. This is not supported (these fields are
+meaningless for DFA matching).
 .sp
   PCRE_ERROR_DFA_WSSIZE     (-19)
 .sp
@@ -2392,6 +2497,6 @@
 .rs
 .sp
 .nf
-Last updated: 13 August 2011
+Last updated: 27 August 2011
 Copyright (c) 1997-2011 University of Cambridge.
 .fi

Modified: code/trunk/doc/pcrebuild.3
===================================================================
--- code/trunk/doc/pcrebuild.3    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/doc/pcrebuild.3    2011-08-28 15:23:03 UTC (rev 678)
@@ -98,6 +98,22 @@
 documentation.
 .
 .
+.SH "JUST-IN-TIME COMPILER SUPPORT"
+.rs
+.sp
+Just-in-time compiler support is included in the build by specifying
+.sp
+  --enable-jit
+.sp
+This support is available only for certain hardware architectures. If this 
+option is set for an unsupported architecture, a compile time error occurs.
+See the 
+.\" HREF
+\fBpcrejit\fP
+.\"
+documentation for a discussion of JIT usage.
+.
+.
 .SH "CODE VALUE OF NEWLINE"
 .rs
 .sp
@@ -367,6 +383,6 @@
 .rs
 .sp
 .nf
-Last updated: 02 August 2011
+Last updated: 27 August 2011
 Copyright (c) 1997-2011 University of Cambridge.
 .fi

Modified: code/trunk/doc/pcrecallout.3
===================================================================
--- code/trunk/doc/pcrecallout.3    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/doc/pcrecallout.3    2011-08-28 15:23:03 UTC (rev 678)
@@ -39,6 +39,10 @@
 command has an option that sets automatic callouts; when it is used, the output
 indicates how the pattern is matched. This is useful information when you are
 trying to optimize the performance of a particular pattern.
+.P
+The use of callouts in a pattern makes it ineligible for optimization by the 
+just-in-time compiler. Studying such a pattern with the PCRE_STUDY_JIT_COMPILE 
+option always fails.
 .
 .
 .SH "MISSING CALLOUTS"
@@ -191,6 +195,6 @@
 .rs
 .sp
 .nf
-Last updated: 31 July 2011
+Last updated: 26 August 2011
 Copyright (c) 1997-2011 University of Cambridge.
 .fi

Modified: code/trunk/doc/pcrecompat.3
===================================================================
--- code/trunk/doc/pcrecompat.3    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/doc/pcrecompat.3    2011-08-28 15:23:03 UTC (rev 678)
@@ -10,13 +10,8 @@
 .P
 1. PCRE has only a subset of Perl's UTF-8 and Unicode support. Details of what
 it does have are given in the
-.\" HTML <a href="pcre.html#utf8support">
-.\" </a>
-section on UTF-8 support
-.\"
-in the main
 .\" HREF
-\fBpcre\fP
+\fBpcreunicode\fP
 .\"
 page.
 .P
@@ -173,6 +168,6 @@
 .rs
 .sp
 .nf
-Last updated: 24 July 2011
+Last updated: 24 August 2011
 Copyright (c) 1997-2011 University of Cambridge.
 .fi

Added: code/trunk/doc/pcrejit.3
===================================================================
--- code/trunk/doc/pcrejit.3                            (rev 0)
+++ code/trunk/doc/pcrejit.3    2011-08-28 15:23:03 UTC (rev 678)
@@ -0,0 +1,234 @@
+.TH PCREJIT 3
+.SH NAME
+PCRE - Perl-compatible regular expressions
+.SH "PCRE JUST-IN-TIME COMPILER SUPPORT"
+.rs
+.sp
+Just-in-time compiling is a heavyweight optimization that can greatly speed up
+pattern matching. However, it comes at the cost of extra processing before the 
+match is performed. Therefore, it is of most benefit when the same pattern is 
+going to be matched many times. This does not necessarily mean many calls of 
+\fPpcre_exec()\fP; if the pattern is not anchored, matching attempts may take
+place many times at various positions in the subject, even for a single call to
+\fBpcre_exec()\fP. If the subject string is very long, it may still pay to use 
+JIT for one-off matches.
+.P
+JIT support applies only to the traditional matching function, 
+\fBpcre_exec()\fP. It does not apply when \fBpcre_dfa_exec()\fP is being used.
+The code for this support was written by Zoltan Herczeg.
+.
+.
+.SH "AVAILABILITY OF JIT SUPPORT"
+.rs
+.sp
+JIT support is an optional feature of PCRE. The "configure" option --enable-jit
+(or equivalent CMake option) must be set when PCRE is built if you want to use
+JIT. The support is limited to the following hardware platforms:
+.sp
+  ARM v5, v7, and Thumb2
+  MIPS 32-bit
+  Power PC 32-bit and 64-bit
+  Intel x86 32-bit and 64-bit
+.sp      
+If --enable-jit is set on an unsupported platform, compilation fails.
+.P
+A program can tell if JIT support is available by calling \fBpcre_config()\fP 
+with the PCRE_CONFIG_JIT option. The result is 1 when JIT is available, and 0 
+otherwise. However, a simple program does not need to check this in order to
+use JIT. The API is implemented in a way that falls back to the ordinary PCRE
+code if JIT is not available.
+.
+.
+.SH "SIMPLE USE OF JIT"
+.rs
+.sp
+You have to do two things to make use of the JIT support in the simplest way:
+.sp
+  (1) Call \fBpcre_study()\fP with the PCRE_STUDY_JIT_COMPILE option for
+      each compiled pattern, and pass the resulting \fBpcre_extra\fP block to
+      \fBpcre_exec()\fP.
+      
+  (2) Use \fBpcre_free_study()\fP to free the \fBpcre_extra\fP block when it is
+      no longer needed instead of just freeing it yourself. This ensures that
+      any JIT data is also freed.
+.sp
+In some circumstances you may need to call additional functions. These are 
+described in the section entitled
+.\" HTML <a href="#stackcontrol">
+.\" </a>
+"Controlling the JIT stack"
+.\"
+below.
+.P
+If JIT support is not available, PCRE_STUDY_JIT_COMPILE is ignored, and no JIT 
+data is set up. Otherwise, the compiled pattern is passed to the JIT compiler, 
+which turns it into machine code that executes much faster than the normal 
+interpretive code. When \fBpcre_exec()\fP is passed a \fBpcre_extra\fP block 
+containing a pointer to JIT code, it obeys that instead of the normal code. The 
+result is identical, but the code runs much faster. 
+.P
+There are some \fBpcre_exec()\fP options that are not supported for JIT
+execution. There are also some pattern items that JIT cannot handle. Details 
+are given below. In both cases, execution automatically falls back to the 
+interpretive code.
+.P
+If the JIT compiler finds an unsupported item, no JIT data is generated. You
+can find out if JIT execution is available after studying a pattern by calling
+\fBpcre_fullinfo()\fP with the PCRE_INFO_JIT option. A result of 1 means that
+JIT compilationw was successful. A result of 0 means that JIT support is not
+available, or the pattern was not studied with PCRE_STUDY_JIT_COMPILE, or the
+JIT compiler was not able to handle the pattern.
+.
+.
+.SH "UNSUPPORTED OPTIONS AND PATTERN ITEMS"
+.rs
+.sp
+The only \fBpcre_exec()\fP options that are supported for JIT execution are
+PCRE_NO_UTF8_CHECK, PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, and 
+PCRE_NOTEMPTY_ATSTART. Note in particular that partial matching is not 
+supported.
+.P
+The unsupported pattern items are:
+.sp
+  \eC            match a single byte, even in UTF-8 mode
+  (?Cn)          callouts
+  (?(<name>)...  conditional test on setting of a named subpattern
+  (?(R)...       conditional test on whole pattern recursion 
+  (?(Rn)...      conditional test on recursion, by number
+  (?(R&name)...  conditional test on recursion, by name
+  (*COMMIT)      )
+  (*MARK)        )
+  (*PRUNE)       ) the backtracking control verbs
+  (*SKIP)        )
+  (*THEN)        )
+.sp       
+Support for some of these may be added in future.
+.
+.
+.SH "RETURN VALUES FROM JIT EXECUTION"
+.rs
+.sp
+When a pattern is matched using JIT execution, the return values are the same 
+as those given by the interpretive \fBpcre_exec()\fP code, with the addition of 
+one new error code: PCRE_ERROR_JIT_STACKLIMIT. This means that the memory used 
+for the JIT stack was insufficient. See
+.\" HTML <a href="#stackcontrol">
+.\" </a>
+"Controlling the JIT stack"
+.\"
+below for a discussion of JIT stack usage. 
+.P
+The error code PCRE_ERROR_MATCHLIMIT is returned by the JIT code if searching a
+very large pattern tree goes on for too long, as it is in the same circumstance
+when JIT is not used, but the details of exactly what is counted are not the
+same. The PCRE_ERROR_RECURSIONLIMIT error code is never returned by JIT
+execution.
+.
+.
+.SH "SAVING AND RESTORING COMPILED PATTERNS"
+.rs
+.sp
+The code that is generated by the JIT compiler is architecture-specific, and is 
+also position dependent. For those reasons it cannot be saved and restored like 
+the bytecode and other data of a compiled pattern. You should be able run
+\fBpcre_study()\fP on a saved and restored pattern, and thereby recreate the
+JIT data, but because JIT compilation uses significant resources, it is
+probably not worth doing.
+.
+.
+.\" HTML <a name="stackcontrol"></a>
+.SH "CONTROLLING THE JIT STACK"
+.rs
+.sp
+When the compiled JIT code runs, it needs a block of memory to use as a stack. 
+By default, it uses 32K on the machine stack. However, some large or 
+complicated patterns need more than this. The error PCRE_ERROR_JIT_STACKLIMIT 
+is given when there is not enough stack. Three functions are provided for 
+setting up alternative blocks of memory for use as JIT stacks.
+.P
+The \fBpcre_jit_stack_alloc()\fP function creates a JIT stack. Its arguments 
+are a starting size and a maximum size, and it returns an opaque value 
+of type \fBpcre_jit_stack\fP that represents a JIT stack, or NULL if there is 
+an error. The \fBpcre_jit_stack_free()\fP function can be used to free a stack
+that is no longer needed.
+.P
+The \fBpcre_assign_jit_stack()\fP function specifies which stack JIT code 
+should use. Its arguments are as follows:
+.sp
+  pcre_extra         *extra
+  pcre_jit_callback  callback
+  void               *data
+.sp    
+The \fIextra\fP argument must be the result of studying a pattern with 
+PCRE_STUDY_JIT_COMPILE. There are three cases for the values of the other two 
+options:
+.sp
+  (1) If \fIcallback\fP is NULL and \fIdata\fP is NULL, an internal 32K block
+      on the machine stack is used.
+.sp
+  (2) If \fIcallback\fP is NULL and \fIdata\fP is not NULL, \fIdata\fP must be
+      a valid JIT stack, the result of calling \fBpcre_jit_stack_alloc()\fP.
+.sp
+  (3) If \fIcallback\fP not NULL, it must point to a function that is called
+      with \fIdata\fP as an argument at the start of matching, in order to 
+      set up a JIT stack. If the result is NULL, the internal 32K stack 
+      is used; otherwise the return value must be a valid JIT stack, 
+      the result of calling \fBpcre_jit_stack_alloc()\fP.
+.sp
+You may safely assign the same JIT stack to more than one pattern, as long as
+they are all matched sequentially in the same thread. In a multithread
+application, each thread must use its own JIT stack.
+.P
+All the functions described in this section do nothing if JIT is not available,
+and \fBpcre_assign_jit_stack()\fP does nothing unless the \fBextra\fP argument 
+is non-NULL and points to a \fBpcre_extra\fP block that is the result of a 
+successful study with PCRE_STUDY_JIT_COMPILE.
+.
+.
+.SH "EXAMPLE CODE"
+.rs
+.sp
+This is a single-threaded example that specifies a JIT stack without using a 
+callback.
+.sp
+  int rc;
+  pcre *re;
+  pcre_extra *extra; 
+  pcre_jit_stack *jit_stack; 
+.sp   
+  re = pcre_compile(pattern, 0, &error, &erroffset, NULL);
+  /* Check for errors */
+  extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);
+  jit_stack = pcre_jit_stack_alloc(1, 512 * 1024);
+  /* Check for error (NULL) */
+  pcre_assign_jit_stack(extra, NULL, jit_stack);
+  rc = pcre_exec(re, extra, subject, length, 0, 0, ovector, ovecsize);
+  /* Check results */
+  pcre_free(re);
+  pcre_free_study(extra);    
+.sp
+.
+.
+.SH "SEE ALSO"
+.rs
+.sp
+\fBpcreapi\fP(3)
+.
+.
+.SH AUTHOR
+.rs
+.sp
+.nf
+Philip Hazel
+University Computing Service
+Cambridge CB2 3QH, England.
+.fi
+.
+.
+.SH REVISION
+.rs
+.sp
+.nf
+Last updated: 28 August 2011
+Copyright (c) 1997-2011 University of Cambridge.
+.fi

Added: code/trunk/doc/pcrelimits.3
===================================================================
--- code/trunk/doc/pcrelimits.3                            (rev 0)
+++ code/trunk/doc/pcrelimits.3    2011-08-28 15:23:03 UTC (rev 678)
@@ -0,0 +1,57 @@
+.TH PCRELIMITS 3
+.SH NAME
+PCRE - Perl-compatible regular expressions
+.SH "SIZE AND OTHER LIMITATIONS"
+.rs
+.sp
+There are some size limitations in PCRE but it is hoped that they will never in
+practice be relevant.
+.P
+The maximum length of a compiled pattern is 65539 (sic) bytes if PCRE is
+compiled with the default internal linkage size of 2. If you want to process
+regular expressions that are truly enormous, you can compile PCRE with an
+internal linkage size of 3 or 4 (see the \fBREADME\fP file in the source
+distribution and the
+.\" HREF
+\fBpcrebuild\fP
+.\"
+documentation for details). In these cases the limit is substantially larger.
+However, the speed of execution is slower.
+.P
+All values in repeating quantifiers must be less than 65536.
+.P
+There is no limit to the number of parenthesized subpatterns, but there can be
+no more than 65535 capturing subpatterns.
+.P
+The maximum length of name for a named subpattern is 32 characters, and the
+maximum number of named subpatterns is 10000.
+.P
+The maximum length of a subject string is the largest positive number that an
+integer variable can hold. However, when using the traditional matching
+function, PCRE uses recursion to handle subpatterns and indefinite repetition.
+This means that the available stack space may limit the size of a subject
+string that can be processed by certain patterns. For a discussion of stack
+issues, see the
+.\" HREF
+\fBpcrestack\fP
+.\"
+documentation.
+.
+.
+.SH AUTHOR
+.rs
+.sp
+.nf
+Philip Hazel
+University Computing Service
+Cambridge CB2 3QH, England.
+.fi
+.
+.
+.SH REVISION
+.rs
+.sp
+.nf
+Last updated: 24 August 2011
+Copyright (c) 1997-2011 University of Cambridge.
+.fi

Modified: code/trunk/doc/pcrepartial.3
===================================================================
--- code/trunk/doc/pcrepartial.3    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/doc/pcrepartial.3    2011-08-28 15:23:03 UTC (rev 678)
@@ -32,13 +32,15 @@
 though the details differ between the two matching functions. If both options
 are set, PCRE_PARTIAL_HARD takes precedence.
 .P
-Setting a partial matching option disables two of PCRE's optimizations. PCRE
-remembers the last literal byte in a pattern, and abandons matching immediately
-if such a byte is not present in the subject string. This optimization cannot
-be used for a subject string that might match only partially. If the pattern
-was studied, PCRE knows the minimum length of a matching string, and does not
-bother to run the matching function on shorter strings. This optimization is
-also disabled for partial matching.
+Setting a partial matching option for \fBpcre_exec()\fP disables the use of any
+just-in-time code that was set up by calling \fBpcre_study()\fP with the
+PCRE_STUDY_JIT_COMPILE option. It also disables two of PCRE's standard
+optimizations. PCRE remembers the last literal byte in a pattern, and abandons
+matching immediately if such a byte is not present in the subject string. This
+optimization cannot be used for a subject string that might match only
+partially. If the pattern was studied, PCRE knows the minimum length of a
+matching string, and does not bother to run the matching function on shorter
+strings. This optimization is also disabled for partial matching.
 .
 .
 .SH "PARTIAL MATCHING USING pcre_exec()"
@@ -411,6 +413,6 @@
 .rs
 .sp
 .nf
-Last updated: 07 November 2010
-Copyright (c) 1997-2010 University of Cambridge.
+Last updated: 26 August 2011
+Copyright (c) 1997-2011 University of Cambridge.
 .fi

Modified: code/trunk/doc/pcrepattern.3
===================================================================
--- code/trunk/doc/pcrepattern.3    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/doc/pcrepattern.3    2011-08-28 15:23:03 UTC (rev 678)
@@ -32,13 +32,8 @@
 option. This feature is not Perl-compatible. How setting UTF-8 mode affects
 pattern matching is mentioned in several places below. There is also a summary
 of UTF-8 features in the
-.\" HTML <a href="pcre.html#utf8support">
-.\" </a>
-section on UTF-8 support
-.\"
-in the main
 .\" HREF
-\fBpcre\fP
+\fBpcreunicode\fP
 .\"
 page.
 .P
@@ -2780,6 +2775,6 @@
 .rs
 .sp
 .nf
-Last updated: 24 July 2011
+Last updated: 24 August 2011
 Copyright (c) 1997-2011 University of Cambridge.
 .fi

Modified: code/trunk/doc/pcreprecompile.3
===================================================================
--- code/trunk/doc/pcreprecompile.3    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/doc/pcreprecompile.3    2011-08-28 15:23:03 UTC (rev 678)
@@ -12,14 +12,17 @@
 \fBpcre_maketables()\fP
 .\"
 documentation), this is relatively straightforward. If you are using private
-tables, it is a little bit more complicated.
+tables, it is a little bit more complicated. However, if you are using the 
+just-in-time optimization feature of \fBpcre_study()\fP, it is not possible to 
+save and reload the JIT data.
 .P
 If you save compiled patterns to a file, you can copy them to a different host
 and run them there. This works even if the new host has the opposite endianness
 to the one on which the patterns were compiled. There may be a small
 performance penalty, but it should be insignificant. However, compiling regular
 expressions with one version of PCRE for use with a different version is not
-guaranteed to work and may cause crashes.
+guaranteed to work and may cause crashes, and saving and restoring a compiled 
+pattern loses any JIT optimization data.
 .
 .
 .SH "SAVING A COMPILED PATTERN"
@@ -58,9 +61,11 @@
 some daemon process that passes them via sockets to the processes that want
 them.
 .P
-If the pattern has been studied, it is also possible to save the study data in
-a similar way to the compiled pattern itself. When studying generates
-additional information, \fBpcre_study()\fP returns a pointer to a
+If the pattern has been studied, it is also possible to save the normal study
+data in a similar way to the compiled pattern itself. However, if the 
+PCRE_STUDY_JIT_COMPILE was used, the just-in-time data that is created cannot
+be saved because it is too dependent on the current environment. When studying
+generates additional information, \fBpcre_study()\fP returns a pointer to a
 \fBpcre_extra\fP data block. Its format is defined in the
 .\" HTML <a href="pcreapi.html#extradata">
 .\" </a>
@@ -111,7 +116,8 @@
 reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in the
 \fIflags\fP field to indicate that study data is present. Then pass the
 \fBpcre_extra\fP block to \fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP in the
-usual way.
+usual way. If the pattern was studied for just-in-time optimization, that data 
+cannot be saved, and so is lost by a save/restore cycle.
 .
 .
 .SH "COMPATIBILITY WITH DIFFERENT PCRE RELEASES"
@@ -136,6 +142,6 @@
 .rs
 .sp
 .nf
-Last updated: 17 November 2010
-Copyright (c) 1997-2010 University of Cambridge.
+Last updated: 26 August 2011
+Copyright (c) 1997-2011 University of Cambridge.
 .fi

Modified: code/trunk/doc/pcrestack.3
===================================================================
--- code/trunk/doc/pcrestack.3    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/doc/pcrestack.3    2011-08-28 15:23:03 UTC (rev 678)
@@ -19,6 +19,17 @@
 the recursive call would immediately be passed back as the result of the
 current call (a "tail recursion"), the function is just restarted instead.
 .P
+The above comments apply when \fBpcre_exec()\fP is run in its normal 
+interpretive manner. If the pattern was studied with the 
+PCRE_STUDY_JIT_COMPILE option, and just-in-time compiling was successful, and 
+the options passed to \fBpcre_exec()\fP were not incompatible, the matching 
+process uses the JIT-compiled code instead of the \fBmatch()\fP function. In 
+this case, the memory requirements are handled entirely differently. See the
+.\" HREF
+\fBpcrejit\fP
+.\"
+documentation for details.
+.P
 The \fBpcre_dfa_exec()\fP function operates in an entirely different way, and
 uses recursion only when there is a regular expression recursion or subroutine
 call in the pattern. This includes the processing of assertion and "once-only"
@@ -30,7 +41,7 @@
 against this.
 .P
 The comments that follow do NOT apply to \fBpcre_dfa_exec()\fP; they are
-relevant only for \fBpcre_exec()\fP.
+relevant only for \fBpcre_exec()\fP without the JIT optimization.
 .
 .
 .SS "Reducing \fBpcre_exec()\fP's stack usage"
@@ -173,6 +184,6 @@
 .rs
 .sp
 .nf
-Last updated: 22 July 2011
+Last updated: 26 August 2011
 Copyright (c) 1997-2011 University of Cambridge.
 .fi

Modified: code/trunk/doc/pcretest.1
===================================================================
--- code/trunk/doc/pcretest.1    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/doc/pcretest.1    2011-08-28 15:23:03 UTC (rev 678)
@@ -81,22 +81,25 @@
 On Unix-like systems, set the size of the run-time stack to \fIsize\fP
 megabytes.
 .TP 10
-\fB-s\fP
+\fB-s\fP or \fB-s+\fP
 Behave as if each pattern has the \fB/S\fP modifier; in other words, force each
-pattern to be studied. If the \fB/I\fP or \fB/D\fP option is present on a
-pattern (requesting output about the compiled pattern), information about the
-result of studying is not included when studying is caused only by \fB-s\fP and
-neither \fB-i\fP nor \fB-d\fP is present on the command line. This behaviour
-means that the output from tests that are run with and without \fB-s\fP should
-be identical, except when options that output information about the actual
-running of a match are set. The \fB-M\fP, \fB-t\fP, and \fB-tm\fP options,
-which give information about resources used, are likely to produce different
-output with and without \fB-s\fP. Output may also differ if the \fB/C\fP option
-is present on an individual pattern. This uses callouts to trace the the
-matching process, and this may be different between studied and non-studied
-patterns. If the pattern contains (*MARK) items there may also be differences,
-for the same reason. The \fB-s\fP command line option can be overridden for
-specific patterns that should never be studied (see the /S option below).
+pattern to be studied. If \fB-s+\fP is used, the PCRE_STUDY_JIT_COMPILE flag is
+passed to \fBpcre_study()\fP, causing just-in-time optimization to be set up if
+it is available. If the \fB/I\fP or \fB/D\fP option is present on a pattern
+(requesting output about the compiled pattern), information about the result of
+studying is not included when studying is caused only by \fB-s\fP and neither
+\fB-i\fP nor \fB-d\fP is present on the command line. This behaviour means that
+the output from tests that are run with and without \fB-s\fP should be
+identical, except when options that output information about the actual running
+of a match are set. The \fB-M\fP, \fB-t\fP, and \fB-tm\fP options, which give
+information about resources used, are likely to produce different output with
+and without \fB-s\fP. Output may also differ if the \fB/C\fP option is present
+on an individual pattern. This uses callouts to trace the the matching process,
+and this may be different between studied and non-studied patterns. If the
+pattern contains (*MARK) items there may also be differences, for the same
+reason. The \fB-s\fP command line option can be overridden for specific
+patterns that should never be studied (see the \fB/S\fP pattern modifier
+below).
 .TP 10
 \fB-t\fP
 Run each compile, study, and match many times with a timer, and output
@@ -259,7 +262,8 @@
 contains multiple copies of the same substring. If the \fB+\fP modifier appears
 twice, the same action is taken for captured substrings. In each case the
 remainder is output on the following line with a plus character following the
-capture number.
+capture number. Note that this modifier must not immediately follow the /S 
+modifier because /S+ has another meaning.
 .P
 The \fB/=\fP modifier requests that the values of all potential captured
 parentheses be output after a match by \fBpcre_exec()\fP. By default, only
@@ -325,6 +329,20 @@
 never studied, independently of \fB-s\fP. This feature is used in the test
 files in a few cases where the output is different when the pattern is studied.
 .P
+If the \fB/S\fP modifier is immediately followed by a + character, the call to 
+\fBpcre_study()\fP is made with the PCRE_STUDY_JIT_COMPILE option, requesting 
+just-in-time optimization support if it is available. Note that there is also a 
+\fB/+\fP modifier; it must not be given immediately after \fB/S\fP because this 
+will be misinterpreted. If JIT studying is successful, it will automatically be 
+used when \fBpcre_exec()\fP is run, except when incompatible run-time options 
+are specified. These include the partial matching options; a complete list is 
+given in the
+.\" HREF
+\fBpcrejit\fP
+.\"
+documentation. See also the \fB\eJ\fP escape sequence below for a way of 
+setting the size of the JIT stack.
+.P
 The \fB/T\fP modifier must be followed by a single digit. It causes a specific
 set of built-in character tables to be passed to \fBpcre_compile()\fP. It is
 used in the standard PCRE tests to check behaviour with different character
@@ -420,6 +438,9 @@
                "name" after a successful match (name termin-
                ated by next non-alphanumeric character)
 .\" JOIN
+  \eJdd       set up a JIT stack of dd kilobytes maximum (any
+               number of digits)              
+.\" JOIN
   \eL         call pcre_get_substringlist() after a
                successful match
 .\" JOIN
@@ -485,18 +506,27 @@
 passing an empty line as data, since a real empty line terminates the data
 input.
 .P
+The \fB\eJ\fP escape provides a way of setting the maximum stack size that is
+used by the just-in-time optimization code. It is ignored if JIT optimization 
+is not being used. Providing a stack that is larger than the default 32K is 
+necessary only for very complicated patterns.
+.P
 If \eM is present, \fBpcretest\fP calls \fBpcre_exec()\fP several times, with
 different values in the \fImatch_limit\fP and \fImatch_limit_recursion\fP
 fields of the \fBpcre_extra\fP data structure, until it finds the minimum
-numbers for each parameter that allow \fBpcre_exec()\fP to complete. The
-\fImatch_limit\fP number is a measure of the amount of backtracking that takes
-place, and checking it out can be instructive. For most simple matches, the
-number is quite small, but for patterns with very large numbers of matching
-possibilities, it can become large very quickly with increasing length of
-subject string. The \fImatch_limit_recursion\fP number is a measure of how much
-stack (or, if PCRE is compiled with NO_RECURSE, how much heap) memory is needed
-to complete the match attempt.
+numbers for each parameter that allow \fBpcre_exec()\fP to complete without 
+error. Because this is testing a specific feature of the normal interpretive 
+\fBpcre_exec()\fP execution, the use of any JIT optimization that might have 
+been set up by the \fB/S+\fP qualifier of \fB-s+\fP option is disabled.
 .P
+The \fImatch_limit\fP number is a measure of the amount of backtracking
+that takes place, and checking it out can be instructive. For most simple
+matches, the number is quite small, but for patterns with very large numbers of
+matching possibilities, it can become large very quickly with increasing length
+of subject string. The \fImatch_limit_recursion\fP number is a measure of how
+much stack (or, if PCRE is compiled with NO_RECURSE, how much heap) memory is
+needed to complete the match attempt.
+.P
 When \eO is used, the value specified may be higher or lower than the size set
 by the \fB-O\fP command line option (or defaulted to 45); \eO applies only to
 the call of \fBpcre_exec()\fP for the line in which it appears.
@@ -765,7 +795,7 @@
 .sp
 The facilities described in this section are not available when the POSIX
 interface to PCRE is being used, that is, when the \fB/P\fP pattern modifier is
-specified.
+specified. 
 .P
 When the POSIX interface is not in use, you can cause \fBpcretest\fP to write a
 compiled pattern to a file, by following the modifiers with > and a file name.
@@ -778,6 +808,8 @@
 \fBpcreprecompile\fP
 .\"
 documentation for a discussion about saving and re-using compiled patterns.
+Note that if the pattern was successfully studied with JIT optimization, the
+JIT data cannot be saved.
 .P
 The data that is written is binary. The first eight bytes are the length of the
 compiled pattern data followed by the length of the optional study data, each
@@ -785,8 +817,8 @@
 there is no study data (either the pattern was not studied, or studying did not
 return any data), the second length is zero. The lengths are followed by an
 exact copy of the compiled pattern. If there is additional study data, this
-follows immediately after the compiled pattern. After writing the file,
-\fBpcretest\fP expects to read a new pattern.
+(excluding any JIT data) follows immediately after the compiled pattern. After
+writing the file, \fBpcretest\fP expects to read a new pattern.
 .P
 A saved pattern can be reloaded into \fBpcretest\fP by specifying < and a file
 name instead of a pattern. The name of the file must not contain a < character,
@@ -798,8 +830,9 @@
   Compiled pattern loaded from /some/file
   No study data
 .sp
-When the pattern has been loaded, \fBpcretest\fP proceeds to read data lines in
-the usual way.
+If the pattern was previously studied with the JIT optimization, the JIT
+information cannot be saved and restored, and so is lost. When the pattern has
+been loaded, \fBpcretest\fP proceeds to read data lines in the usual way.
 .P
 You can copy a file written by \fBpcretest\fP to a different host and reload it
 there, even if the new host has opposite endianness to the one on which the
@@ -823,8 +856,9 @@
 .SH "SEE ALSO"
 .rs
 .sp
-\fBpcre\fP(3), \fBpcreapi\fP(3), \fBpcrecallout\fP(3), \fBpcrematching\fP(3),
-\fBpcrepartial\fP(d), \fBpcrepattern\fP(3), \fBpcreprecompile\fP(3).
+\fBpcre\fP(3), \fBpcreapi\fP(3), \fBpcrecallout\fP(3), \fBpcrejit\fP,
+\fBpcrematching\fP(3), \fBpcrepartial\fP(d), \fBpcrepattern\fP(3),
+\fBpcreprecompile\fP(3).
 .
 .
 .SH AUTHOR
@@ -841,6 +875,6 @@
 .rs
 .sp
 .nf
-Last updated: 01 August 2011
+Last updated: 26 August 2011
 Copyright (c) 1997-2011 University of Cambridge.
 .fi

Added: code/trunk/doc/pcreunicode.3
===================================================================
--- code/trunk/doc/pcreunicode.3                            (rev 0)
+++ code/trunk/doc/pcreunicode.3    2011-08-28 15:23:03 UTC (rev 678)
@@ -0,0 +1,156 @@
+.TH PCREUNICODE 3
+.SH NAME
+PCRE - Perl-compatible regular expressions
+.SH "UTF-8 AND UNICODE PROPERTY SUPPORT"
+.rs
+.sp
+In order process UTF-8 strings, you must build PCRE to include UTF-8 support in
+the code, and, in addition, you must call
+.\" HREF
+\fBpcre_compile()\fP
+.\"
+with the PCRE_UTF8 option flag, or the pattern must start with the sequence
+(*UTF8). When either of these is the case, both the pattern and any subject
+strings that are matched against it are treated as UTF-8 strings instead of
+strings of 1-byte characters. PCRE does not support any other formats (in 
+particular, it does not support UTF-16).
+.P
+If you compile PCRE with UTF-8 support, but do not use it at run time, the
+library will be a bit bigger, but the additional run time overhead is limited
+to testing the PCRE_UTF8 flag occasionally, so should not be very big.
+.P
+If PCRE is built with Unicode character property support (which implies UTF-8
+support), the escape sequences \ep{..}, \eP{..}, and \eX are supported.
+The available properties that can be tested are limited to the general
+category properties such as Lu for an upper case letter or Nd for a decimal
+number, the Unicode script names such as Arabic or Han, and the derived
+properties Any and L&. A full list is given in the
+.\" HREF
+\fBpcrepattern\fP
+.\"
+documentation. Only the short names for properties are supported. For example,
+\ep{L} matches a letter. Its Perl synonym, \ep{Letter}, is not supported.
+Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
+compatibility with Perl 5.6. PCRE does not support this.
+.
+.
+.\" HTML <a name="utf8strings"></a>
+.SS "Validity of UTF-8 strings"
+.rs
+.sp
+When you set the PCRE_UTF8 flag, the strings passed as patterns and subjects
+are (by default) checked for validity on entry to the relevant functions. From
+release 7.3 of PCRE, the check is according the rules of RFC 3629, which are
+themselves derived from the Unicode specification. Earlier releases of PCRE
+followed the rules of RFC 2279, which allows the full range of 31-bit values (0
+to 0x7FFFFFFF). The current check allows only values in the range U+0 to
+U+10FFFF, excluding U+D800 to U+DFFF.
+.P
+The excluded code points are the "Low Surrogate Area" of Unicode, of which the
+Unicode Standard says this: "The Low Surrogate Area does not contain any
+character assignments, consequently no character code charts or namelists are
+provided for this area. Surrogates are reserved for use with UTF-16 and then
+must be used in pairs." The code points that are encoded by UTF-16 pairs are
+available as independent code points in the UTF-8 encoding. (In other words,
+the whole surrogate thing is a fudge for UTF-16 which unfortunately messes up
+UTF-8.)
+.P
+If an invalid UTF-8 string is passed to PCRE, an error return is given. At
+compile time, the only additional information is the offset to the first byte
+of the failing character. The runtime functions \fBpcre_exec()\fP and
+\fBpcre_dfa_exec()\fP also pass back this information, as well as a more
+detailed reason code if the caller has provided memory in which to do this.
+.P
+In some situations, you may already know that your strings are valid, and
+therefore want to skip these checks in order to improve performance. If you set
+the PCRE_NO_UTF8_CHECK flag at compile time or at run time, PCRE assumes that
+the pattern or subject it is given (respectively) contains only valid UTF-8
+codes. In this case, it does not diagnose an invalid UTF-8 string.
+.P
+If you pass an invalid UTF-8 string when PCRE_NO_UTF8_CHECK is set, what
+happens depends on why the string is invalid. If the string conforms to the
+"old" definition of UTF-8 (RFC 2279), it is processed as a string of characters
+in the range 0 to 0x7FFFFFFF. In other words, apart from the initial validity
+test, PCRE (when in UTF-8 mode) handles strings according to the more liberal
+rules of RFC 2279. However, if the string does not even conform to RFC 2279,
+the result is undefined. Your program may crash.
+.P
+If you want to process strings of values in the full range 0 to 0x7FFFFFFF,
+encoded in a UTF-8-like manner as per the old RFC, you can set
+PCRE_NO_UTF8_CHECK to bypass the more restrictive test. However, in this
+situation, you will have to apply your own validity check.
+.
+.
+.SS "General comments about UTF-8 mode"
+.rs
+.sp
+1. An unbraced hexadecimal escape sequence (such as \exb3) matches a two-byte
+UTF-8 character if the value is greater than 127.
+.P
+2. Octal numbers up to \e777 are recognized, and match two-byte UTF-8
+characters for values greater than \e177.
+.P
+3. Repeat quantifiers apply to complete UTF-8 characters, not to individual
+bytes, for example: \ex{100}{3}.
+.P
+4. The dot metacharacter matches one UTF-8 character instead of a single byte.
+.P
+5. The escape sequence \eC can be used to match a single byte in UTF-8 mode,
+but its use can lead to some strange effects. This facility is not available in
+the alternative matching function, \fBpcre_dfa_exec()\fP.
+.P
+6. The character escapes \eb, \eB, \ed, \eD, \es, \eS, \ew, and \eW correctly
+test characters of any code value, but, by default, the characters that PCRE
+recognizes as digits, spaces, or word characters remain the same set as before,
+all with values less than 256. This remains true even when PCRE is built to
+include Unicode property support, because to do otherwise would slow down PCRE
+in many common cases. Note in particular that this applies to \eb and \eB,
+because they are defined in terms of \ew and \eW. If you really want to test
+for a wider sense of, say, "digit", you can use explicit Unicode property tests
+such as \ep{Nd}. Alternatively, if you set the PCRE_UCP option, the way that
+the character escapes work is changed so that Unicode properties are used to
+determine which characters match. There are more details in the section on
+.\" HTML <a href="pcrepattern.html#genericchartypes">
+.\" </a>
+generic character types
+.\"
+in the
+.\" HREF
+\fBpcrepattern\fP
+.\"
+documentation.
+.P
+7. Similarly, characters that match the POSIX named character classes are all
+low-valued characters, unless the PCRE_UCP option is set.
+.P
+8. However, the horizontal and vertical whitespace matching escapes (\eh, \eH,
+\ev, and \eV) do match all the appropriate Unicode characters, whether or not
+PCRE_UCP is set.
+.P
+9. Case-insensitive matching applies only to characters whose values are less
+than 128, unless PCRE is built with Unicode property support. Even when Unicode
+property support is available, PCRE still uses its own character tables when
+checking the case of low-valued characters, so as not to degrade performance.
+The Unicode property information is used only for characters with higher
+values. Furthermore, PCRE supports case-insensitive matching only when there is
+a one-to-one mapping between a letter's cases. There are a small number of
+many-to-one mappings in Unicode; these are not supported by PCRE.
+.
+.
+.SH AUTHOR
+.rs
+.sp
+.nf
+Philip Hazel
+University Computing Service
+Cambridge CB2 3QH, England.
+.fi
+.
+.
+.SH REVISION
+.rs
+.sp
+.nf
+Last updated: 24 August 2011
+Copyright (c) 1997-2011 University of Cambridge.
+.fi

Modified: code/trunk/doc/perltest.txt
===================================================================
--- code/trunk/doc/perltest.txt    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/doc/perltest.txt    2011-08-28 15:23:03 UTC (rev 678)
@@ -3,17 +3,28 @@

The perltest.pl script tests Perl's regular expressions; it has the same
specification as pcretest, and so can be given identical input, except that
-input patterns can be followed only by Perl's lower case modifiers and /+ (as
-used by pcretest), which is recognized and handled by the program.
+input patterns can be followed only by Perl's lower case modifiers and certain
+other pcretest modifiers that are either handled or ignored:

+ /+ recognized and handled by perltest
+ /++ the second + is ignored
+ /8 recognized and handled by perltest
+ /J ignored
+ /K ignored
+ /W ignored
+ /S ignored
+ /SS ignored
+
The data lines are processed as Perl double-quoted strings, so if they contain
" $ or @ characters, these have to be escaped. For this reason, all such
characters in testinput1, testinput4, testinput6, and testinput11 are escaped
-so that they can be used for perltest as well as for pcretest. The special
-upper case pattern modifiers such as /A that pcretest recognizes, and its
-special data line escapes, are not used in these files. The output should be
-identical, apart from the initial identifying banner.
+so that they can be used for perltest as well as for pcretest. The pcretest \Y
+escape in data lines is removed.

+The special upper case pattern modifiers such as /A that pcretest recognizes,
+and its special data line escapes, are not used in these files. The output
+should be identical, apart from the initial identifying banner.
+
The perltest.pl script can also test UTF-8 features. It recognizes the special
modifier /8 that pcretest uses to invoke UTF-8 functionality. The testinput4
and testinput6 files can be fed to perltest to run compatible UTF-8 tests.
@@ -29,4 +40,4 @@
regular expressions, in order to check that PCRE diagnoses them correctly.

Philip Hazel
-October 2009
+August 2011

Modified: code/trunk/pcre_jit_compile.c
===================================================================
--- code/trunk/pcre_jit_compile.c    2011-08-28 10:50:07 UTC (rev 677)
+++ code/trunk/pcre_jit_compile.c    2011-08-28 15:23:03 UTC (rev 678)
@@ -6388,7 +6388,9 @@
 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
 {
 executable_function *function;
-if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL)
+if (extra != NULL &&
+    (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && 
+    extra->executable_jit != NULL)
   {
   function = (executable_function*)extra->executable_jit;
   function->callback = callback;

このメッセージは次のスレッドの一部です:
	日付によるスレッドの仕分け