Revision: 446
http://www.exim.org/viewvc/pcre2?view=rev&revision=446
Author: ph10
Date: 2015-11-25 18:46:35 +0000 (Wed, 25 Nov 2015)
Log Message:
-----------
Fix auto-callout with (?# comment bug.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_compile.c
code/trunk/testdata/testinput2
code/trunk/testdata/testinput5
code/trunk/testdata/testoutput2
code/trunk/testdata/testoutput5
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-11-21 16:27:06 UTC (rev 445)
+++ code/trunk/ChangeLog 2015-11-25 18:46:35 UTC (rev 446)
@@ -330,7 +330,11 @@
98. In pcre2test, make the "startoffset" modifier a synonym of "offset",
because it sets the "startoffset" parameter for pcre2_match().
+99. If PCRE2_AUTO_CALLOUT was set on a pattern that had a (?# comment between
+an item and its qualifier (for example, A(?#comment)?B) pcre2_compile()
+misbehaved.
+
Version 10.20 30-June-2015
--------------------------
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2015-11-21 16:27:06 UTC (rev 445)
+++ code/trunk/src/pcre2_compile.c 2015-11-25 18:46:35 UTC (rev 446)
@@ -4001,6 +4001,23 @@
}
}
+ /* Skip over (?# comments. We need to do this here because we want to know if
+ the next thing is a quantifier, and these comments may come between an item
+ and its quantifier. */
+
+ if (c == CHAR_LEFT_PARENTHESIS && ptr[1] == CHAR_QUESTION_MARK &&
+ ptr[2] == CHAR_NUMBER_SIGN)
+ {
+ ptr += 3;
+ while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
+ if (*ptr != CHAR_RIGHT_PARENTHESIS)
+ {
+ *errorcodeptr = ERR18;
+ goto FAILED;
+ }
+ continue;
+ }
+
/* See if the next thing is a quantifier. */
is_quantifier =
@@ -4007,25 +4024,24 @@
c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
(c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
- /* Fill in length of a previous callout, except when the next thing is a
- quantifier or when processing a property substitution string in UCP mode. */
+ /* Fill in length of a previous callout and create an auto callout if
+ required, except when the next thing is a quantifier or when processing a
+ property substitution string for \w etc in UCP mode. */
- if (!is_quantifier && previous_callout != NULL && cb->nestptr[0] == NULL &&
- after_manual_callout-- <= 0)
+ if (!is_quantifier && cb->nestptr[0] == NULL)
{
- if (lengthptr == NULL) /* Don't attempt in pre-compile phase */
- complete_callout(previous_callout, ptr, cb);
- previous_callout = NULL;
- }
+ if (previous_callout != NULL && after_manual_callout-- <= 0)
+ {
+ if (lengthptr == NULL) /* Don't attempt in pre-compile phase */
+ complete_callout(previous_callout, ptr, cb);
+ previous_callout = NULL;
+ }
- /* Create auto callout, except for quantifiers, or while processing property
- strings that are substituted for \w etc in UCP mode. */
-
- if ((options & PCRE2_AUTO_CALLOUT) != 0 && !is_quantifier &&
- cb->nestptr[0] == NULL)
- {
- previous_callout = code;
- code = auto_callout(code, ptr, cb);
+ if ((options & PCRE2_AUTO_CALLOUT) != 0)
+ {
+ previous_callout = code;
+ code = auto_callout(code, ptr, cb);
+ }
}
/* Process the next pattern item. */
@@ -5742,34 +5758,20 @@
/* ===================================================================*/
- /* Start of nested parenthesized sub-expression, or comment or lookahead or
- lookbehind or option setting or condition or all the other extended
- parenthesis forms. We must save the current high-water-mark for the
- forward reference list so that we know where they start for this group.
- However, because the list may be extended when there are very many forward
- references (usually the result of a replicated inner group), we must use
- an offset rather than an absolute address. */
+ /* Start of nested parenthesized sub-expression, or lookahead or lookbehind
+ or option setting or condition or all the other extended parenthesis forms.
+ We must save the current high-water-mark for the forward reference list so
+ that we know where they start for this group. However, because the list may
+ be extended when there are very many forward references (usually the result
+ of a replicated inner group), we must use an offset rather than an absolute
+ address. Note that (?# comments are dealt with at the top of the loop;
+ they do not get this far. */
case CHAR_LEFT_PARENTHESIS:
ptr++;
- /* First deal with comments. Putting this code right at the start ensures
- that comments have no bad side effects. */
+ /* Deal with various "verbs" that can be introduced by '*'. */
- if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN)
- {
- ptr += 2;
- while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
- if (*ptr != CHAR_RIGHT_PARENTHESIS)
- {
- *errorcodeptr = ERR18;
- goto FAILED;
- }
- continue;
- }
-
- /* Now deal with various "verbs" that can be introduced by '*'. */
-
if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':'
|| (MAX_255(ptr[1]) && ((cb->ctypes[ptr[1]] & ctype_letter) != 0))))
{
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2015-11-21 16:27:06 UTC (rev 445)
+++ code/trunk/testdata/testinput2 2015-11-25 18:46:35 UTC (rev 446)
@@ -4689,4 +4689,12 @@
/a[b[:punct:]]/bincode
+/L(?#(|++<!(2)?/B
+
+/L(?#(|++<!(2)?/B,no_auto_possess
+
+/L(?#(|++<!(2)?/B,auto_callout
+
+/L(?#(|++<!(2)?/B,no_auto_possess,auto_callout
+
# End of testinput2
Modified: code/trunk/testdata/testinput5
===================================================================
--- code/trunk/testdata/testinput5 2015-11-21 16:27:06 UTC (rev 445)
+++ code/trunk/testdata/testinput5 2015-11-25 18:46:35 UTC (rev 446)
@@ -1714,4 +1714,8 @@
/[[:^ascii:]a]/utf,ucp,bincode
+/L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout
+
+/L(?#(|++<!(2)?/B,utf,ucp,auto_callout
+
# End of testinput5
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2015-11-21 16:27:06 UTC (rev 445)
+++ code/trunk/testdata/testoutput2 2015-11-25 18:46:35 UTC (rev 446)
@@ -14906,4 +14906,40 @@
End
------------------------------------------------------------------
+/L(?#(|++<!(2)?/B
+------------------------------------------------------------------
+ Bra
+ L?+
+ Ket
+ End
+------------------------------------------------------------------
+
+/L(?#(|++<!(2)?/B,no_auto_possess
+------------------------------------------------------------------
+ Bra
+ L?
+ Ket
+ End
+------------------------------------------------------------------
+
+/L(?#(|++<!(2)?/B,auto_callout
+------------------------------------------------------------------
+ Bra
+ Callout 255 0 14
+ L?+
+ Callout 255 14 0
+ Ket
+ End
+------------------------------------------------------------------
+
+/L(?#(|++<!(2)?/B,no_auto_possess,auto_callout
+------------------------------------------------------------------
+ Bra
+ Callout 255 0 14
+ L?
+ Callout 255 14 0
+ Ket
+ End
+------------------------------------------------------------------
+
# End of testinput2
Modified: code/trunk/testdata/testoutput5
===================================================================
--- code/trunk/testdata/testoutput5 2015-11-21 16:27:06 UTC (rev 445)
+++ code/trunk/testdata/testoutput5 2015-11-25 18:46:35 UTC (rev 446)
@@ -4148,4 +4148,24 @@
End
------------------------------------------------------------------
+/L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout
+------------------------------------------------------------------
+ Bra
+ Callout 255 0 14
+ L?
+ Callout 255 14 0
+ Ket
+ End
+------------------------------------------------------------------
+
+/L(?#(|++<!(2)?/B,utf,ucp,auto_callout
+------------------------------------------------------------------
+ Bra
+ Callout 255 0 14
+ L?+
+ Callout 255 14 0
+ Ket
+ End
+------------------------------------------------------------------
+
# End of testinput5