[Pcre-svn] [309] code/trunk: Move pcre2_find_bracket() into …

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [309] code/trunk: Move pcre2_find_bracket() into its own module to avoid circular dependency.
Revision: 309
          http://www.exim.org/viewvc/pcre2?view=rev&revision=309
Author:   ph10
Date:     2015-07-16 17:11:52 +0100 (Thu, 16 Jul 2015)
Log Message:
-----------
Move pcre2_find_bracket() into its own module to avoid circular dependency.


Modified Paths:
--------------
    code/trunk/CMakeLists.txt
    code/trunk/ChangeLog
    code/trunk/Makefile.am
    code/trunk/NON-AUTOTOOLS-BUILD
    code/trunk/PrepareRelease
    code/trunk/README
    code/trunk/src/pcre2_compile.c


Added Paths:
-----------
    code/trunk/src/pcre2_find_bracket.c


Modified: code/trunk/CMakeLists.txt
===================================================================
--- code/trunk/CMakeLists.txt    2015-07-15 09:34:04 UTC (rev 308)
+++ code/trunk/CMakeLists.txt    2015-07-16 16:11:52 UTC (rev 309)
@@ -67,7 +67,8 @@
 # 2013-10-08 PH got rid of the "source" command, which is a bash-ism (use ".")
 # 2013-11-05 PH added support for PARENS_NEST_LIMIT
 # 2014-08-29 PH converted the file for PCRE2 (which has no C++).
-# 2015-04024 PH added support for PCRE2_DEBUG
+# 2015-04-24 PH added support for PCRE2_DEBUG
+# 2015-07-16 PH updated for new pcre2_find_bracket source module


PROJECT(PCRE2 C)

@@ -390,6 +391,7 @@
src/pcre2_context.c
src/pcre2_dfa_match.c
src/pcre2_error.c
+ src/pcre2_find_bracket.c
src/pcre2_jit_compile.c
src/pcre2_maketables.c
src/pcre2_match.c

Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2015-07-15 09:34:04 UTC (rev 308)
+++ code/trunk/ChangeLog    2015-07-16 16:11:52 UTC (rev 309)
@@ -25,7 +25,11 @@
 treated as a comment and the invalid (?' at the end of the pattern was not 
 diagnosed. This caused a buffer overflow during the real compile.


+7. Moved the pcre2_find_bracket() function from src/pcre2_compile.c into its
+own source module to avoid a circular dependency between src/pcre2_compile.c
+and src/pcre2_study.c

+
Version 10.20 30-June-2015
--------------------------


Modified: code/trunk/Makefile.am
===================================================================
--- code/trunk/Makefile.am    2015-07-15 09:34:04 UTC (rev 308)
+++ code/trunk/Makefile.am    2015-07-16 16:11:52 UTC (rev 309)
@@ -319,6 +319,7 @@
   src/pcre2_context.c \
   src/pcre2_dfa_match.c \
   src/pcre2_error.c \
+  src/pcre2_find_bracket.c \
   src/pcre2_internal.h \
   src/pcre2_intmodedep.h \
   src/pcre2_jit_compile.c \


Modified: code/trunk/NON-AUTOTOOLS-BUILD
===================================================================
--- code/trunk/NON-AUTOTOOLS-BUILD    2015-07-15 09:34:04 UTC (rev 308)
+++ code/trunk/NON-AUTOTOOLS-BUILD    2015-07-16 16:11:52 UTC (rev 309)
@@ -97,6 +97,7 @@
        pcre2_context.c
        pcre2_dfa_match.c
        pcre2_error.c
+       pcre2_find_bracket.c 
        pcre2_jit_compile.c
        pcre2_maketables.c
        pcre2_match.c
@@ -388,4 +389,4 @@
 recommended download site.


=============================
-Last Updated: 15 June 2015
+Last Updated: 16 July 2015

Modified: code/trunk/PrepareRelease
===================================================================
--- code/trunk/PrepareRelease    2015-07-15 09:34:04 UTC (rev 308)
+++ code/trunk/PrepareRelease    2015-07-16 16:11:52 UTC (rev 309)
@@ -204,6 +204,7 @@
   src/pcre2_context.c \
   src/pcre2_dfa_match.c \
   src/pcre2_error.c \
+  src/pcre2_find_bracket.c \
   src/pcre2_internal.h \
   src/pcre2_intmodedep.h \
   src/pcre2_jit_compile.c \


Modified: code/trunk/README
===================================================================
--- code/trunk/README    2015-07-15 09:34:04 UTC (rev 308)
+++ code/trunk/README    2015-07-16 16:11:52 UTC (rev 309)
@@ -724,6 +724,7 @@
   src/pcre2_context.c      )
   src/pcre2_dfa_match.c    )
   src/pcre2_error.c        )
+  src/pcre2_find_bracket.c ) 
   src/pcre2_jit_compile.c  )
   src/pcre2_jit_match.c    ) sources for the functions in the library,
   src/pcre2_jit_misc.c     )   and some internal functions that they use
@@ -832,4 +833,4 @@
 Philip Hazel
 Email local part: ph10
 Email domain: cam.ac.uk
-Last updated: 24 April 2015
+Last updated: 16 July 2015


Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c    2015-07-15 09:34:04 UTC (rev 308)
+++ code/trunk/src/pcre2_compile.c    2015-07-16 16:11:52 UTC (rev 309)
@@ -2334,175 +2334,6 @@



 /*************************************************
-*    Scan compiled regex for specific bracket    *
-*************************************************/
-
-/* This function scans through a compiled pattern until it finds a
-capturing bracket with the given number, or, if the number is negative, an
-instance of OP_REVERSE for a lookbehind. The function is global in the C sense
-so that it can be called from pcre2_study() when finding the minimum matching
-length.
-
-Arguments:
-  code        points to start of expression
-  utf         TRUE in UTF mode
-  number      the required bracket number or negative to find a lookbehind
-
-Returns:      pointer to the opcode for the bracket, or NULL if not found
-*/
-
-PCRE2_SPTR
-PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
-{
-for (;;)
-  {
-  register PCRE2_UCHAR c = *code;
-
-  if (c == OP_END) return NULL;
-
-  /* XCLASS is used for classes that cannot be represented just by a bit map.
-  This includes negated single high-valued characters. CALLOUT_STR is used for
-  callouts with string arguments. In both cases the length in the table is
-  zero; the actual length is stored in the compiled code. */
-
-  if (c == OP_XCLASS) code += GET(code, 1);
-    else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
-
-  /* Handle recursion */
-
-  else if (c == OP_REVERSE)
-    {
-    if (number < 0) return (PCRE2_UCHAR *)code;
-    code += PRIV(OP_lengths)[c];
-    }
-
-  /* Handle capturing bracket */
-
-  else if (c == OP_CBRA || c == OP_SCBRA ||
-           c == OP_CBRAPOS || c == OP_SCBRAPOS)
-    {
-    int n = (int)GET2(code, 1+LINK_SIZE);
-    if (n == number) return (PCRE2_UCHAR *)code;
-    code += PRIV(OP_lengths)[c];
-    }
-
-  /* Otherwise, we can get the item's length from the table, except that for
-  repeated character types, we have to test for \p and \P, which have an extra
-  two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
-  must add in its length. */
-
-  else
-    {
-    switch(c)
-      {
-      case OP_TYPESTAR:
-      case OP_TYPEMINSTAR:
-      case OP_TYPEPLUS:
-      case OP_TYPEMINPLUS:
-      case OP_TYPEQUERY:
-      case OP_TYPEMINQUERY:
-      case OP_TYPEPOSSTAR:
-      case OP_TYPEPOSPLUS:
-      case OP_TYPEPOSQUERY:
-      if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
-      break;
-
-      case OP_TYPEUPTO:
-      case OP_TYPEMINUPTO:
-      case OP_TYPEEXACT:
-      case OP_TYPEPOSUPTO:
-      if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
-        code += 2;
-      break;
-
-      case OP_MARK:
-      case OP_PRUNE_ARG:
-      case OP_SKIP_ARG:
-      case OP_THEN_ARG:
-      code += code[1];
-      break;
-      }
-
-    /* Add in the fixed length from the table */
-
-    code += PRIV(OP_lengths)[c];
-
-  /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
-  followed by a multi-byte character. The length in the table is a minimum, so
-  we have to arrange to skip the extra bytes. */
-
-#ifdef MAYBE_UTF_MULTI
-    if (utf) switch(c)
-      {
-      case OP_CHAR:
-      case OP_CHARI:
-      case OP_NOT:
-      case OP_NOTI:
-      case OP_EXACT:
-      case OP_EXACTI:
-      case OP_NOTEXACT:
-      case OP_NOTEXACTI:
-      case OP_UPTO:
-      case OP_UPTOI:
-      case OP_NOTUPTO:
-      case OP_NOTUPTOI:
-      case OP_MINUPTO:
-      case OP_MINUPTOI:
-      case OP_NOTMINUPTO:
-      case OP_NOTMINUPTOI:
-      case OP_POSUPTO:
-      case OP_POSUPTOI:
-      case OP_NOTPOSUPTO:
-      case OP_NOTPOSUPTOI:
-      case OP_STAR:
-      case OP_STARI:
-      case OP_NOTSTAR:
-      case OP_NOTSTARI:
-      case OP_MINSTAR:
-      case OP_MINSTARI:
-      case OP_NOTMINSTAR:
-      case OP_NOTMINSTARI:
-      case OP_POSSTAR:
-      case OP_POSSTARI:
-      case OP_NOTPOSSTAR:
-      case OP_NOTPOSSTARI:
-      case OP_PLUS:
-      case OP_PLUSI:
-      case OP_NOTPLUS:
-      case OP_NOTPLUSI:
-      case OP_MINPLUS:
-      case OP_MINPLUSI:
-      case OP_NOTMINPLUS:
-      case OP_NOTMINPLUSI:
-      case OP_POSPLUS:
-      case OP_POSPLUSI:
-      case OP_NOTPOSPLUS:
-      case OP_NOTPOSPLUSI:
-      case OP_QUERY:
-      case OP_QUERYI:
-      case OP_NOTQUERY:
-      case OP_NOTQUERYI:
-      case OP_MINQUERY:
-      case OP_MINQUERYI:
-      case OP_NOTMINQUERY:
-      case OP_NOTMINQUERYI:
-      case OP_POSQUERY:
-      case OP_POSQUERYI:
-      case OP_NOTPOSQUERY:
-      case OP_NOTPOSQUERYI:
-      if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
-      break;
-      }
-#else
-    (void)(utf);  /* Keep compiler happy by referencing function argument */
-#endif  /* MAYBE_UTF_MULTI */
-    }
-  }
-}
-
-
-
-/*************************************************
 *   Scan compiled regex for recursion reference  *
 *************************************************/



Added: code/trunk/src/pcre2_find_bracket.c
===================================================================
--- code/trunk/src/pcre2_find_bracket.c                            (rev 0)
+++ code/trunk/src/pcre2_find_bracket.c    2015-07-16 16:11:52 UTC (rev 309)
@@ -0,0 +1,218 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+         New API code Copyright (c) 2015 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains a single function that scans through a compiled pattern
+until it finds a capturing bracket with the given number, or, if the number is
+negative, an instance of OP_REVERSE for a lookbehind. The function is called
+from pcre2_compile.c and also from pcre2_study.c when finding the minimum
+matching length. */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pcre2_internal.h"
+
+
+/*************************************************
+*    Scan compiled regex for specific bracket    *
+*************************************************/
+
+/*
+Arguments:
+  code        points to start of expression
+  utf         TRUE in UTF mode
+  number      the required bracket number or negative to find a lookbehind
+
+Returns:      pointer to the opcode for the bracket, or NULL if not found
+*/
+
+PCRE2_SPTR
+PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
+{
+for (;;)
+  {
+  register PCRE2_UCHAR c = *code;
+
+  if (c == OP_END) return NULL;
+
+  /* XCLASS is used for classes that cannot be represented just by a bit map.
+  This includes negated single high-valued characters. CALLOUT_STR is used for
+  callouts with string arguments. In both cases the length in the table is
+  zero; the actual length is stored in the compiled code. */
+
+  if (c == OP_XCLASS) code += GET(code, 1);
+    else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
+
+  /* Handle recursion */
+
+  else if (c == OP_REVERSE)
+    {
+    if (number < 0) return (PCRE2_UCHAR *)code;
+    code += PRIV(OP_lengths)[c];
+    }
+
+  /* Handle capturing bracket */
+
+  else if (c == OP_CBRA || c == OP_SCBRA ||
+           c == OP_CBRAPOS || c == OP_SCBRAPOS)
+    {
+    int n = (int)GET2(code, 1+LINK_SIZE);
+    if (n == number) return (PCRE2_UCHAR *)code;
+    code += PRIV(OP_lengths)[c];
+    }
+
+  /* Otherwise, we can get the item's length from the table, except that for
+  repeated character types, we have to test for \p and \P, which have an extra
+  two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
+  must add in its length. */
+
+  else
+    {
+    switch(c)
+      {
+      case OP_TYPESTAR:
+      case OP_TYPEMINSTAR:
+      case OP_TYPEPLUS:
+      case OP_TYPEMINPLUS:
+      case OP_TYPEQUERY:
+      case OP_TYPEMINQUERY:
+      case OP_TYPEPOSSTAR:
+      case OP_TYPEPOSPLUS:
+      case OP_TYPEPOSQUERY:
+      if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
+      break;
+
+      case OP_TYPEUPTO:
+      case OP_TYPEMINUPTO:
+      case OP_TYPEEXACT:
+      case OP_TYPEPOSUPTO:
+      if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
+        code += 2;
+      break;
+
+      case OP_MARK:
+      case OP_PRUNE_ARG:
+      case OP_SKIP_ARG:
+      case OP_THEN_ARG:
+      code += code[1];
+      break;
+      }
+
+    /* Add in the fixed length from the table */
+
+    code += PRIV(OP_lengths)[c];
+
+  /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
+  followed by a multi-byte character. The length in the table is a minimum, so
+  we have to arrange to skip the extra bytes. */
+
+#ifdef MAYBE_UTF_MULTI
+    if (utf) switch(c)
+      {
+      case OP_CHAR:
+      case OP_CHARI:
+      case OP_NOT:
+      case OP_NOTI:
+      case OP_EXACT:
+      case OP_EXACTI:
+      case OP_NOTEXACT:
+      case OP_NOTEXACTI:
+      case OP_UPTO:
+      case OP_UPTOI:
+      case OP_NOTUPTO:
+      case OP_NOTUPTOI:
+      case OP_MINUPTO:
+      case OP_MINUPTOI:
+      case OP_NOTMINUPTO:
+      case OP_NOTMINUPTOI:
+      case OP_POSUPTO:
+      case OP_POSUPTOI:
+      case OP_NOTPOSUPTO:
+      case OP_NOTPOSUPTOI:
+      case OP_STAR:
+      case OP_STARI:
+      case OP_NOTSTAR:
+      case OP_NOTSTARI:
+      case OP_MINSTAR:
+      case OP_MINSTARI:
+      case OP_NOTMINSTAR:
+      case OP_NOTMINSTARI:
+      case OP_POSSTAR:
+      case OP_POSSTARI:
+      case OP_NOTPOSSTAR:
+      case OP_NOTPOSSTARI:
+      case OP_PLUS:
+      case OP_PLUSI:
+      case OP_NOTPLUS:
+      case OP_NOTPLUSI:
+      case OP_MINPLUS:
+      case OP_MINPLUSI:
+      case OP_NOTMINPLUS:
+      case OP_NOTMINPLUSI:
+      case OP_POSPLUS:
+      case OP_POSPLUSI:
+      case OP_NOTPOSPLUS:
+      case OP_NOTPOSPLUSI:
+      case OP_QUERY:
+      case OP_QUERYI:
+      case OP_NOTQUERY:
+      case OP_NOTQUERYI:
+      case OP_MINQUERY:
+      case OP_MINQUERYI:
+      case OP_NOTMINQUERY:
+      case OP_NOTMINQUERYI:
+      case OP_POSQUERY:
+      case OP_POSQUERYI:
+      case OP_NOTPOSQUERY:
+      case OP_NOTPOSQUERYI:
+      if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
+      break;
+      }
+#else
+    (void)(utf);  /* Keep compiler happy by referencing function argument */
+#endif  /* MAYBE_UTF_MULTI */
+    }
+  }
+}
+
+/* End of pcre2_find_bracket.c */