[Pcre-svn] [892] code/trunk: Save extra compile options with…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [892] code/trunk: Save extra compile options with the compiled pattern, and add an info call to
Revision: 892
          http://www.exim.org/viewvc/pcre2?view=rev&revision=892
Author:   ph10
Date:     2017-12-16 17:49:26 +0000 (Sat, 16 Dec 2017)
Log Message:
-----------
Save extra compile options with the compiled pattern, and add an info call to 
retrieve them.


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/doc/pcre2_pattern_info.3
    code/trunk/doc/pcre2api.3
    code/trunk/src/pcre2.h
    code/trunk/src/pcre2.h.in
    code/trunk/src/pcre2_compile.c
    code/trunk/src/pcre2_intmodedep.h
    code/trunk/src/pcre2_pattern_info.c
    code/trunk/src/pcre2test.c
    code/trunk/testdata/testinput10
    code/trunk/testdata/testinput12
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput10
    code/trunk/testdata/testoutput12-16
    code/trunk/testdata/testoutput12-32
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2017-12-16 16:43:47 UTC (rev 891)
+++ code/trunk/ChangeLog    2017-12-16 17:49:26 UTC (rev 892)
@@ -85,7 +85,11 @@
 20. Allocate a single callout block on the stack at the start of pcre2_match()
 and set its never-changing fields once only.


+21. Save the extra compile options (set in the compile context) with the
+compiled pattern (they were not previously saved), add PCRE2_INFO_EXTRAOPTIONS
+to retrieve them, and update pcre2test to show them.

+
Version 10.30 14-August-2017
----------------------------


Modified: code/trunk/doc/pcre2_pattern_info.3
===================================================================
--- code/trunk/doc/pcre2_pattern_info.3    2017-12-16 16:43:47 UTC (rev 891)
+++ code/trunk/doc/pcre2_pattern_info.3    2017-12-16 17:49:26 UTC (rev 892)
@@ -1,4 +1,4 @@
-.TH PCRE2_PATTERN_INFO 3 "26 May 2017" "PCRE2 10.30"
+.TH PCRE2_PATTERN_INFO 3 "16 December 2017" "PCRE2 10.31"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@@ -15,7 +15,7 @@
 .sp
 This function returns information about a compiled pattern. Its arguments are:
 .sp
-  \fIcode\fP     Pointer to a compiled regular expression
+  \fIcode\fP     Pointer to a compiled regular expression pattern
   \fIwhat\fP     What information is required
   \fIwhere\fP    Where to put the information
 .sp
@@ -32,6 +32,8 @@
 .\" JOIN
   PCRE2_INFO_DEPTHLIMIT      Backtracking depth limit if set,
                                otherwise PCRE2_ERROR_UNSET
+  PCRE2_INFO_EXTRAOPTIONS    Extra options that were passed in the
+                               compile context
   PCRE2_INFO_FIRSTBITMAP     Bitmap of first code units, or NULL
   PCRE2_INFO_FIRSTCODETYPE   Type of start-of-match information
                                0 nothing set


Modified: code/trunk/doc/pcre2api.3
===================================================================
--- code/trunk/doc/pcre2api.3    2017-12-16 16:43:47 UTC (rev 891)
+++ code/trunk/doc/pcre2api.3    2017-12-16 17:49:26 UTC (rev 892)
@@ -1,4 +1,4 @@
-.TH PCRE2API 3 "14 November 2017" "PCRE2 10.31"
+.TH PCRE2API 3 "16 December 2017" "PCRE2 10.31"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .sp
@@ -1904,12 +1904,15 @@
 .sp
   PCRE2_INFO_ALLOPTIONS
   PCRE2_INFO_ARGOPTIONS
+  PCRE2_INFO_EXTRAOPTIONS 
 .sp
-Return a copy of the pattern's options. The third argument should point to a
+Return copies of the pattern's options. The third argument should point to a
 \fBuint32_t\fP variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that
 were passed to \fBpcre2_compile()\fP, whereas PCRE2_INFO_ALLOPTIONS returns
 the compile options as modified by any top-level (*XXX) option settings such as
-(*UTF) at the start of the pattern itself.
+(*UTF) at the start of the pattern itself. PCRE2_INFO_EXTRAOPTIONS returns the 
+extra options that were set in the compile context by calling the
+pcre2_set_compile_extra_options() function.
 .P
 For example, if the pattern /(*UTF)abc/ is compiled with the PCRE2_EXTENDED
 option, the result for PCRE2_INFO_ALLOPTIONS is PCRE2_EXTENDED and PCRE2_UTF.
@@ -3597,6 +3600,6 @@
 .rs
 .sp
 .nf
-Last updated: 22 October 2017
+Last updated: 16 December 2017
 Copyright (c) 1997-2017 University of Cambridge.
 .fi


Modified: code/trunk/src/pcre2.h
===================================================================
--- code/trunk/src/pcre2.h    2017-12-16 16:43:47 UTC (rev 891)
+++ code/trunk/src/pcre2.h    2017-12-16 17:49:26 UTC (rev 892)
@@ -418,6 +418,7 @@
 #define PCRE2_INFO_HASBACKSLASHC        23
 #define PCRE2_INFO_FRAMESIZE            24
 #define PCRE2_INFO_HEAPLIMIT            25
+#define PCRE2_INFO_EXTRAOPTIONS         26


/* Request types for pcre2_config(). */


Modified: code/trunk/src/pcre2.h.in
===================================================================
--- code/trunk/src/pcre2.h.in    2017-12-16 16:43:47 UTC (rev 891)
+++ code/trunk/src/pcre2.h.in    2017-12-16 17:49:26 UTC (rev 892)
@@ -418,6 +418,7 @@
 #define PCRE2_INFO_HASBACKSLASHC        23
 #define PCRE2_INFO_FRAMESIZE            24
 #define PCRE2_INFO_HEAPLIMIT            25
+#define PCRE2_INFO_EXTRAOPTIONS         26


/* Request types for pcre2_config(). */


Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c    2017-12-16 16:43:47 UTC (rev 891)
+++ code/trunk/src/pcre2_compile.c    2017-12-16 17:49:26 UTC (rev 892)
@@ -9485,6 +9485,7 @@
 re->magic_number = MAGIC_NUMBER;
 re->compile_options = options;
 re->overall_options = cb.external_options;
+re->extra_options = ccontext->extra_options;
 re->flags = PCRE2_CODE_UNIT_WIDTH/8 | cb.external_flags | setflags;
 re->limit_heap = limit_heap;
 re->limit_match = limit_match;


Modified: code/trunk/src/pcre2_intmodedep.h
===================================================================
--- code/trunk/src/pcre2_intmodedep.h    2017-12-16 16:43:47 UTC (rev 891)
+++ code/trunk/src/pcre2_intmodedep.h    2017-12-16 17:49:26 UTC (rev 892)
@@ -623,6 +623,7 @@
   uint32_t magic_number;          /* Paranoid and endianness check */
   uint32_t compile_options;       /* Options passed to pcre2_compile() */
   uint32_t overall_options;       /* Options after processing the pattern */
+  uint32_t extra_options;         /* Taken from compile_context */ 
   uint32_t flags;                 /* Various state flags */
   uint32_t limit_heap;            /* Limit set in the pattern */
   uint32_t limit_match;           /* Limit set in the pattern */


Modified: code/trunk/src/pcre2_pattern_info.c
===================================================================
--- code/trunk/src/pcre2_pattern_info.c    2017-12-16 16:43:47 UTC (rev 891)
+++ code/trunk/src/pcre2_pattern_info.c    2017-12-16 17:49:26 UTC (rev 892)
@@ -76,6 +76,7 @@
     case PCRE2_INFO_BSR:
     case PCRE2_INFO_CAPTURECOUNT:
     case PCRE2_INFO_DEPTHLIMIT:
+    case PCRE2_INFO_EXTRAOPTIONS: 
     case PCRE2_INFO_FIRSTCODETYPE:
     case PCRE2_INFO_FIRSTCODEUNIT:
     case PCRE2_INFO_HASBACKSLASHC:
@@ -144,6 +145,10 @@
   if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET;
   break;


+  case PCRE2_INFO_EXTRAOPTIONS:
+  *((uint32_t *)where) = re->extra_options;
+  break;
+
   case PCRE2_INFO_FIRSTCODETYPE:
   *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :
                          ((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;


Modified: code/trunk/src/pcre2test.c
===================================================================
--- code/trunk/src/pcre2test.c    2017-12-16 16:43:47 UTC (rev 891)
+++ code/trunk/src/pcre2test.c    2017-12-16 17:49:26 UTC (rev 892)
@@ -4073,8 +4073,7 @@
 *           Show compile extra options           *
 *************************************************/


-/* Called only for unsupported POSIX options at present, and therefore needed
-only when the 8-bit library is being compiled.
+/* Called from show_pattern_info() and for unsupported POSIX options.

 Arguments:
   options     an options word
@@ -4084,7 +4083,6 @@
 Returns:      nothing
 */


-#ifdef SUPPORT_PCRE2_8
static void
show_compile_extra_options(uint32_t options, const char *before,
const char *after)
@@ -4098,7 +4096,6 @@
((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "",
after);
}
-#endif



@@ -4272,7 +4269,7 @@
static int
show_pattern_info(void)
{
-uint32_t compile_options, overall_options;
+uint32_t compile_options, overall_options, extra_options;

if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
{
@@ -4412,6 +4409,7 @@

pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
+ pattern_info(PCRE2_INFO_EXTRAOPTIONS, &extra_options, FALSE);

   /* Remove UTF/UCP if they were there only because of forbid_utf. This saves
   cluttering up the verification output of non-UTF test files. */
@@ -4438,6 +4436,9 @@
       show_compile_options(overall_options, "Overall options:", "\n");
       }
     }
+    
+  if (extra_options != 0) 
+    show_compile_extra_options(extra_options, "Extra options:", "\n");  


if (jchanged) fprintf(outfile, "Duplicate name status changes\n");


Modified: code/trunk/testdata/testinput10
===================================================================
--- code/trunk/testdata/testinput10    2017-12-16 16:43:47 UTC (rev 891)
+++ code/trunk/testdata/testinput10    2017-12-16 17:49:26 UTC (rev 892)
@@ -461,7 +461,7 @@
 # A special extra option allows excaped surrogate code points in 8-bit mode,
 # but subjects containing them must not be UTF-checked.


-/\x{d800}/utf,allow_surrogate_escapes
+/\x{d800}/I,utf,allow_surrogate_escapes
     \x{d800}\=no_utf_check


/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes

Modified: code/trunk/testdata/testinput12
===================================================================
--- code/trunk/testdata/testinput12    2017-12-16 16:43:47 UTC (rev 891)
+++ code/trunk/testdata/testinput12    2017-12-16 17:49:26 UTC (rev 892)
@@ -367,7 +367,7 @@
 # but subjects containing them must not be UTF-checked. These patterns give
 # errors in 16-bit mode.


-/\x{d800}/utf,allow_surrogate_escapes
+/\x{d800}/I,utf,allow_surrogate_escapes
     \x{d800}\=no_utf_check


/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes

Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2017-12-16 16:43:47 UTC (rev 891)
+++ code/trunk/testdata/testinput2    2017-12-16 17:49:26 UTC (rev 892)
@@ -5287,7 +5287,7 @@


/\j\x{z}\o{82}\L\uabcd\u\U\g{\g/B,\bad_escape_is_literal

-/\N{\c/B,bad_escape_is_literal
+/\N{\c/IB,bad_escape_is_literal

/[\j\x{z}\o\gA-\Nb-\g]/B,bad_escape_is_literal

@@ -5330,7 +5330,7 @@
 /(*CR)abc/literal
     (*CR)abc


-/cat|dog/match_word
+/cat|dog/I,match_word
     the cat sat
 \= Expect no match
     caterpillar
@@ -5337,7 +5337,7 @@
     snowcat
     syndicate


-/(cat)|dog/match_line,literal
+/(cat)|dog/I,match_line,literal
     (cat)|dog
 \= Expect no match
     the cat sat


Modified: code/trunk/testdata/testoutput10
===================================================================
--- code/trunk/testdata/testoutput10    2017-12-16 16:43:47 UTC (rev 891)
+++ code/trunk/testdata/testoutput10    2017-12-16 17:49:26 UTC (rev 892)
@@ -1578,7 +1578,13 @@
 # A special extra option allows excaped surrogate code points in 8-bit mode,
 # but subjects containing them must not be UTF-checked.


-/\x{d800}/utf,allow_surrogate_escapes
+/\x{d800}/I,utf,allow_surrogate_escapes
+Capturing subpattern count = 0
+Options: utf
+Extra options: allow_surrogate_escapes
+First code unit = \xed
+Last code unit = \x80
+Subject length lower bound = 1
     \x{d800}\=no_utf_check
  0: \x{d800}



Modified: code/trunk/testdata/testoutput12-16
===================================================================
--- code/trunk/testdata/testoutput12-16    2017-12-16 16:43:47 UTC (rev 891)
+++ code/trunk/testdata/testoutput12-16    2017-12-16 17:49:26 UTC (rev 892)
@@ -1425,7 +1425,7 @@
 # but subjects containing them must not be UTF-checked. These patterns give
 # errors in 16-bit mode.


-/\x{d800}/utf,allow_surrogate_escapes
+/\x{d800}/I,utf,allow_surrogate_escapes
 Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode
     \x{d800}\=no_utf_check



Modified: code/trunk/testdata/testoutput12-32
===================================================================
--- code/trunk/testdata/testoutput12-32    2017-12-16 16:43:47 UTC (rev 891)
+++ code/trunk/testdata/testoutput12-32    2017-12-16 17:49:26 UTC (rev 892)
@@ -1417,7 +1417,12 @@
 # but subjects containing them must not be UTF-checked. These patterns give
 # errors in 16-bit mode.


-/\x{d800}/utf,allow_surrogate_escapes
+/\x{d800}/I,utf,allow_surrogate_escapes
+Capturing subpattern count = 0
+Options: utf
+Extra options: allow_surrogate_escapes
+First code unit = \x{d800}
+Subject length lower bound = 1
     \x{d800}\=no_utf_check
  0: \x{d800}



Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2017-12-16 16:43:47 UTC (rev 891)
+++ code/trunk/testdata/testoutput2    2017-12-16 17:49:26 UTC (rev 892)
@@ -16180,7 +16180,7 @@
 /\j\x{z}\o{82}\L\uabcd\u\U\g{\g/B,\bad_escape_is_literal
 ** Unrecognized modifier '\' in '\bad_escape_is_literal'


-/\N{\c/B,bad_escape_is_literal
+/\N{\c/IB,bad_escape_is_literal
 ------------------------------------------------------------------
         Bra
         N{c
@@ -16187,6 +16187,11 @@
         Ket
         End
 ------------------------------------------------------------------
+Capturing subpattern count = 0
+Extra options: bad_escape_is_literal
+First code unit = 'N'
+Last code unit = 'c'
+Subject length lower bound = 3


 /[\j\x{z}\o\gA-\Nb-\g]/B,bad_escape_is_literal
 ------------------------------------------------------------------
@@ -16269,7 +16274,12 @@
     (*CR)abc
  0: (*CR)abc


-/cat|dog/match_word
+/cat|dog/I,match_word
+Capturing subpattern count = 0
+Max lookbehind = 1
+Extra options: match_word
+Starting code units: c d 
+Subject length lower bound = 3
     the cat sat
  0: cat
 \= Expect no match
@@ -16280,7 +16290,13 @@
     syndicate
 No match


-/(cat)|dog/match_line,literal
+/(cat)|dog/I,match_line,literal
+Capturing subpattern count = 0
+Compile options: literal
+Overall options: anchored literal
+Extra options: match_line
+First code unit = '('
+Subject length lower bound = 9
     (cat)|dog
  0: (cat)|dog
 \= Expect no match