Revision: 1545
http://vcs.pcre.org/viewvc?view=rev&revision=1545
Author: ph10
Date: 2015-04-08 17:34:24 +0100 (Wed, 08 Apr 2015)
Log Message:
-----------
Fix backtracking bug for \C\X* in UTF mode.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_exec.c
code/trunk/testdata/testinput4
code/trunk/testdata/testoutput4
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-04-07 16:19:03 UTC (rev 1544)
+++ code/trunk/ChangeLog 2015-04-08 16:34:24 UTC (rev 1545)
@@ -151,7 +151,13 @@
37. There was a similar problem to 36 in pcretest for global matches.
+38. If a greedy quantified \X was preceded by \C in UTF mode (e.g. \C\X*),
+ and a subsequent item in the pattern caused a non-match, backtracking over
+ the repeated \X did not stop, but carried on past the start of the subject,
+ causing reference to random memory and/or a segfault. This bug was
+ discovered by the LLVM fuzzer.
+
Version 8.36 26-September-2014
------------------------------
Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c 2015-04-07 16:19:03 UTC (rev 1544)
+++ code/trunk/pcre_exec.c 2015-04-08 16:34:24 UTC (rev 1545)
@@ -1376,7 +1376,7 @@
break;
case OP_DEF: /* DEFINE - always false */
- case OP_FAIL: /* From optimized (?!) condition */
+ case OP_FAIL: /* From optimized (?!) condition */
break;
/* The condition is an assertion. Call match() to evaluate it - setting
@@ -5652,12 +5652,17 @@
if (possessive) continue; /* No backtracking */
+ /* We use <= pp rather than == pp to detect the start of the run while
+ backtracking because the use of \C in UTF mode can cause BACKCHAR to
+ move back past pp. This is just palliative; the use of \C in UTF mode
+ is fraught with danger. */
+
for(;;)
{
int lgb, rgb;
PCRE_PUCHAR fptr;
- if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
+ if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */
RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
@@ -5675,7 +5680,7 @@
for (;;)
{
- if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
+ if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */
fptr = eptr - 1;
if (!utf) c = *fptr; else
{
Modified: code/trunk/testdata/testinput4
===================================================================
--- code/trunk/testdata/testinput4 2015-04-07 16:19:03 UTC (rev 1544)
+++ code/trunk/testdata/testinput4 2015-04-08 16:34:24 UTC (rev 1545)
@@ -724,4 +724,7 @@
"[\S\V\H]"8
+/\C\X*QT/8
+ Ӆ\x0aT
+
/-- End of testinput4 --/
Modified: code/trunk/testdata/testoutput4
===================================================================
--- code/trunk/testdata/testoutput4 2015-04-07 16:19:03 UTC (rev 1544)
+++ code/trunk/testdata/testoutput4 2015-04-08 16:34:24 UTC (rev 1545)
@@ -1273,4 +1273,8 @@
"[\S\V\H]"8
+/\C\X*QT/8
+ Ӆ\x0aT
+No match
+
/-- End of testinput4 --/