Revision: 484
http://www.exim.org/viewvc/pcre2?view=rev&revision=484
Author: ph10
Date: 2016-02-02 17:22:55 +0000 (Tue, 02 Feb 2016)
Log Message:
-----------
Detect unmatched closing parentheses in the pre-scan to avoid giving incorrect
error messages.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/src/pcre2_compile.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2016-02-02 16:25:47 UTC (rev 483)
+++ code/trunk/ChangeLog 2016-02-02 17:22:55 UTC (rev 484)
@@ -37,7 +37,12 @@
8. Minor tidies to the pcre2demo.c sample program, including more comments
about its 8-bit-ness.
+9. Detect unmatched closing parentheses and give the error in the pre-scan
+instead of later. Previously the pre-scan carried on and could give a
+misleading incorrect error message. For example, /(?J)(?'a'))(?'a')/ gave a
+message about invalid duplicate group names.
+
Version 10.21 12-January-2016
-----------------------------
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2016-02-02 16:25:47 UTC (rev 483)
+++ code/trunk/src/pcre2_compile.c 2016-02-02 17:22:55 UTC (rev 484)
@@ -3377,28 +3377,25 @@
if ((options & PCRE2_NO_AUTO_CAPTURE) == 0) cb->bracount++;
}
- /* (*something) - just skip to closing ket unless PCRE2_ALT_VERBNAMES is
- set, in which case we have to process escapes in the string after the
- name. */
+ /* (*something) - skip over a name, and then just skip to closing ket
+ unless PCRE2_ALT_VERBNAMES is set, in which case we have to process
+ escapes in the string after a verb name terminated by a colon. */
else
{
ptr += 2;
while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) != 0) ptr++;
- if (*ptr == CHAR_COLON)
+ if (*ptr == CHAR_COLON && (options & PCRE2_ALT_VERBNAMES) != 0)
{
ptr++;
- if ((options & PCRE2_ALT_VERBNAMES) != 0)
- {
- if (process_verb_name(&ptr, NULL, &errorcode, options, utf, cb) < 0)
- goto FAILED;
- }
- else
- {
- while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS)
- ptr++;
- }
+ if (process_verb_name(&ptr, NULL, &errorcode, options, utf, cb) < 0)
+ goto FAILED;
}
+ else
+ {
+ while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS)
+ ptr++;
+ }
nest_depth--;
}
}
@@ -3748,7 +3745,12 @@
if (top_nest == (nest_save *)(cb->start_workspace)) top_nest = NULL;
else top_nest--;
}
- if (nest_depth > 0) nest_depth--; /* Can be 0 for unmatched ) */
+ if (nest_depth == 0) /* Unmatched closing parenthesis */
+ {
+ errorcode = ERR22;
+ goto FAILED;
+ }
+ nest_depth--;
break;
}
}
@@ -8704,14 +8706,11 @@
reqcuflags = REQ_NONE;
}
-/* If we have not reached end of pattern after a successful compile, there's an
-excess bracket. Fill in the final opcode and check for disastrous overflow.
-If no overflow, but the estimated length exceeds the really used length, adjust
-the value of re->blocksize, and if valgrind support is configured, mark the
-extra allocated memory as unaddressable, so that any out-of-bound reads can be
-detected. */
+/* Fill in the final opcode and check for disastrous overflow. If no overflow,
+but the estimated length exceeds the really used length, adjust the value of
+re->blocksize, and if valgrind support is configured, mark the extra allocated
+memory as unaddressable, so that any out-of-bound reads can be detected. */
-if (errorcode == 0 && ptr < cb.end_pattern) errorcode = ERR22;
*code++ = OP_END;
usedlength = code - codestart;
if (usedlength > length) errorcode = ERR23; else
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2016-02-02 16:25:47 UTC (rev 483)
+++ code/trunk/testdata/testinput2 2016-02-02 17:22:55 UTC (rev 484)
@@ -4804,4 +4804,6 @@
/effg/hex
+/(?J)(?'a'))(?'a')/
+
# End of testinput2
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2016-02-02 16:25:47 UTC (rev 483)
+++ code/trunk/testdata/testoutput2 2016-02-02 17:22:55 UTC (rev 484)
@@ -15165,4 +15165,7 @@
/effg/hex
** Unexpected non-hex-digit 'g' in hex pattern: quote missing?
+/(?J)(?'a'))(?'a')/
+Failed: error 122 at offset 10: unmatched closing parenthesis
+
# End of testinput2