[Pcre-svn] [446] code/trunk: Fix auto-callout with (?# comme…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [446] code/trunk: Fix auto-callout with (?# comment bug.
Revision: 446
          http://www.exim.org/viewvc/pcre2?view=rev&revision=446
Author:   ph10
Date:     2015-11-25 18:46:35 +0000 (Wed, 25 Nov 2015)
Log Message:
-----------
Fix auto-callout with (?# comment bug.


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/src/pcre2_compile.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testinput5
    code/trunk/testdata/testoutput2
    code/trunk/testdata/testoutput5


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2015-11-21 16:27:06 UTC (rev 445)
+++ code/trunk/ChangeLog    2015-11-25 18:46:35 UTC (rev 446)
@@ -330,7 +330,11 @@
 98. In pcre2test, make the "startoffset" modifier a synonym of "offset", 
 because it sets the "startoffset" parameter for pcre2_match().


+99. If PCRE2_AUTO_CALLOUT was set on a pattern that had a (?# comment between
+an item and its qualifier (for example, A(?#comment)?B) pcre2_compile()
+misbehaved.

+
Version 10.20 30-June-2015
--------------------------


Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c    2015-11-21 16:27:06 UTC (rev 445)
+++ code/trunk/src/pcre2_compile.c    2015-11-25 18:46:35 UTC (rev 446)
@@ -4001,6 +4001,23 @@
       }
     }


+  /* Skip over (?# comments. We need to do this here because we want to know if
+  the next thing is a quantifier, and these comments may come between an item
+  and its quantifier. */
+
+  if (c == CHAR_LEFT_PARENTHESIS && ptr[1] == CHAR_QUESTION_MARK &&
+      ptr[2] == CHAR_NUMBER_SIGN)
+    {
+    ptr += 3;
+    while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
+    if (*ptr != CHAR_RIGHT_PARENTHESIS)
+      {
+      *errorcodeptr = ERR18;
+      goto FAILED;
+      }
+    continue;
+    }
+
   /* See if the next thing is a quantifier. */


   is_quantifier =
@@ -4007,25 +4024,24 @@
     c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
      (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));


- /* Fill in length of a previous callout, except when the next thing is a
- quantifier or when processing a property substitution string in UCP mode. */
+ /* Fill in length of a previous callout and create an auto callout if
+ required, except when the next thing is a quantifier or when processing a
+ property substitution string for \w etc in UCP mode. */

-  if (!is_quantifier && previous_callout != NULL && cb->nestptr[0] == NULL &&
-       after_manual_callout-- <= 0)
+  if (!is_quantifier && cb->nestptr[0] == NULL)
     {
-    if (lengthptr == NULL)      /* Don't attempt in pre-compile phase */
-      complete_callout(previous_callout, ptr, cb);
-    previous_callout = NULL;
-    }
+    if (previous_callout != NULL && after_manual_callout-- <= 0)
+      {
+      if (lengthptr == NULL)      /* Don't attempt in pre-compile phase */
+        complete_callout(previous_callout, ptr, cb);
+      previous_callout = NULL;
+      }


-  /* Create auto callout, except for quantifiers, or while processing property
-  strings that are substituted for \w etc in UCP mode. */
-
-  if ((options & PCRE2_AUTO_CALLOUT) != 0 && !is_quantifier &&
-       cb->nestptr[0] == NULL)
-    {
-    previous_callout = code;
-    code = auto_callout(code, ptr, cb);
+    if ((options & PCRE2_AUTO_CALLOUT) != 0)
+      {
+      previous_callout = code;
+      code = auto_callout(code, ptr, cb);
+      }
     }


/* Process the next pattern item. */
@@ -5742,34 +5758,20 @@


     /* ===================================================================*/
-    /* Start of nested parenthesized sub-expression, or comment or lookahead or
-    lookbehind or option setting or condition or all the other extended
-    parenthesis forms.  We must save the current high-water-mark for the
-    forward reference list so that we know where they start for this group.
-    However, because the list may be extended when there are very many forward
-    references (usually the result of a replicated inner group), we must use
-    an offset rather than an absolute address. */
+    /* Start of nested parenthesized sub-expression, or lookahead or lookbehind
+    or option setting or condition or all the other extended parenthesis forms.
+    We must save the current high-water-mark for the forward reference list so
+    that we know where they start for this group. However, because the list may
+    be extended when there are very many forward references (usually the result
+    of a replicated inner group), we must use an offset rather than an absolute
+    address. Note that (?# comments are dealt with at the top of the loop;
+    they do not get this far. */


     case CHAR_LEFT_PARENTHESIS:
     ptr++;


-    /* First deal with comments. Putting this code right at the start ensures
-    that comments have no bad side effects. */
+    /* Deal with various "verbs" that can be introduced by '*'. */


-    if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN)
-      {
-      ptr += 2;
-      while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
-      if (*ptr != CHAR_RIGHT_PARENTHESIS)
-        {
-        *errorcodeptr = ERR18;
-        goto FAILED;
-        }
-      continue;
-      }
-
-    /* Now deal with various "verbs" that can be introduced by '*'. */
-
     if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':'
          || (MAX_255(ptr[1]) && ((cb->ctypes[ptr[1]] & ctype_letter) != 0))))
       {


Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2015-11-21 16:27:06 UTC (rev 445)
+++ code/trunk/testdata/testinput2    2015-11-25 18:46:35 UTC (rev 446)
@@ -4689,4 +4689,12 @@


/a[b[:punct:]]/bincode

+/L(?#(|++<!(2)?/B
+
+/L(?#(|++<!(2)?/B,no_auto_possess
+
+/L(?#(|++<!(2)?/B,auto_callout
+
+/L(?#(|++<!(2)?/B,no_auto_possess,auto_callout
+
# End of testinput2

Modified: code/trunk/testdata/testinput5
===================================================================
--- code/trunk/testdata/testinput5    2015-11-21 16:27:06 UTC (rev 445)
+++ code/trunk/testdata/testinput5    2015-11-25 18:46:35 UTC (rev 446)
@@ -1714,4 +1714,8 @@


/[[:^ascii:]a]/utf,ucp,bincode

+/L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout
+
+/L(?#(|++<!(2)?/B,utf,ucp,auto_callout
+
# End of testinput5

Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2015-11-21 16:27:06 UTC (rev 445)
+++ code/trunk/testdata/testoutput2    2015-11-25 18:46:35 UTC (rev 446)
@@ -14906,4 +14906,40 @@
         End
 ------------------------------------------------------------------


+/L(?#(|++<!(2)?/B
+------------------------------------------------------------------
+        Bra
+        L?+
+        Ket
+        End
+------------------------------------------------------------------
+
+/L(?#(|++<!(2)?/B,no_auto_possess
+------------------------------------------------------------------
+        Bra
+        L?
+        Ket
+        End
+------------------------------------------------------------------
+
+/L(?#(|++<!(2)?/B,auto_callout
+------------------------------------------------------------------
+        Bra
+        Callout 255 0 14
+        L?+
+        Callout 255 14 0
+        Ket
+        End
+------------------------------------------------------------------
+
+/L(?#(|++<!(2)?/B,no_auto_possess,auto_callout
+------------------------------------------------------------------
+        Bra
+        Callout 255 0 14
+        L?
+        Callout 255 14 0
+        Ket
+        End
+------------------------------------------------------------------
+
 # End of testinput2 


Modified: code/trunk/testdata/testoutput5
===================================================================
--- code/trunk/testdata/testoutput5    2015-11-21 16:27:06 UTC (rev 445)
+++ code/trunk/testdata/testoutput5    2015-11-25 18:46:35 UTC (rev 446)
@@ -4148,4 +4148,24 @@
         End
 ------------------------------------------------------------------


+/L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout
+------------------------------------------------------------------
+        Bra
+        Callout 255 0 14
+        L?
+        Callout 255 14 0
+        Ket
+        End
+------------------------------------------------------------------
+
+/L(?#(|++<!(2)?/B,utf,ucp,auto_callout
+------------------------------------------------------------------
+        Bra
+        Callout 255 0 14
+        L?+
+        Callout 255 14 0
+        Ket
+        End
+------------------------------------------------------------------
+
 # End of testinput5