[Pcre-svn] [244] code/trunk: Fix backtracking bug for \C\X* …

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [244] code/trunk: Fix backtracking bug for \C\X* in UTF mode.
Revision: 244
          http://www.exim.org/viewvc/pcre2?view=rev&revision=244
Author:   ph10
Date:     2015-04-08 17:33:58 +0100 (Wed, 08 Apr 2015)


Log Message:
-----------
Fix backtracking bug for \C\X* in UTF mode.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/src/pcre2_match.c
    code/trunk/testdata/testinput4
    code/trunk/testdata/testoutput4


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2015-04-06 12:16:36 UTC (rev 243)
+++ code/trunk/ChangeLog    2015-04-08 16:33:58 UTC (rev 244)
@@ -73,7 +73,13 @@
 18. There was a similar problem to 17 in pcre2test for global matches, though
 the code there did catch the loop.


+19. If a greedy quantified \X was preceded by \C in UTF mode (e.g. \C\X*),
+and a subsequent item in the pattern caused a non-match, backtracking over the
+repeated \X did not stop, but carried on past the start of the subject, causing
+reference to random memory and/or a segfault. This bug was discovered by the
+LLVM fuzzer.

+
Version 10.10 06-March-2015
---------------------------


Modified: code/trunk/src/pcre2_match.c
===================================================================
--- code/trunk/src/pcre2_match.c    2015-04-06 12:16:36 UTC (rev 243)
+++ code/trunk/src/pcre2_match.c    2015-04-08 16:33:58 UTC (rev 244)
@@ -1333,14 +1333,14 @@
         if (*ecode == OP_CALLOUT)
           {
           cb.callout_number = ecode[1 + 2*LINK_SIZE];
-          cb.callout_string_offset = 0; 
+          cb.callout_string_offset = 0;
           cb.callout_string = NULL;
           cb.callout_string_length = 0;
           }
         else
           {
           cb.callout_number = 0;
-          cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE); 
+          cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
           cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
           cb.callout_string_length =
             callout_length - (1 + 4*LINK_SIZE) - 2;
@@ -1408,7 +1408,7 @@
       break;


       case OP_FALSE:
-      case OP_FAIL:   /* The assertion (?!) becomes OP_FAIL */ 
+      case OP_FAIL:   /* The assertion (?!) becomes OP_FAIL */
       break;


       case OP_TRUE:
@@ -1760,14 +1760,14 @@
         if (*ecode == OP_CALLOUT)
           {
           cb.callout_number = ecode[1 + 2*LINK_SIZE];
-          cb.callout_string_offset = 0; 
+          cb.callout_string_offset = 0;
           cb.callout_string = NULL;
           cb.callout_string_length = 0;
           }
         else
           {
           cb.callout_number = 0;
-          cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE); 
+          cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
           cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
           cb.callout_string_length =
             callout_length - (1 + 4*LINK_SIZE) - 2;
@@ -5723,12 +5723,17 @@


         if (possessive) continue;    /* No backtracking */


+        /* We use <= pp rather than == pp to detect the start of the run while
+        backtracking because the use of \C in UTF mode can cause BACKCHAR to
+        move back past pp. This is just palliative; the use of \C in UTF mode
+        is fraught with danger. */
+
         for(;;)
           {
           int lgb, rgb;
           PCRE2_SPTR fptr;


-          if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */
+          if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM45);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);


@@ -5746,7 +5751,7 @@

           for (;;)
             {
-            if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */
+            if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
             fptr = eptr - 1;
             if (!utf) c = *fptr; else
               {


Modified: code/trunk/testdata/testinput4
===================================================================
--- code/trunk/testdata/testinput4    2015-04-06 12:16:36 UTC (rev 243)
+++ code/trunk/testdata/testinput4    2015-04-08 16:33:58 UTC (rev 244)
@@ -2221,4 +2221,10 @@


"[\S\V\H]"utf

+/\C\X*TӅ;
+{0,6}\v+
+F
+/utf
+    Ӆ\x0a
+
 # End of testinput4


Modified: code/trunk/testdata/testoutput4
===================================================================
--- code/trunk/testdata/testoutput4    2015-04-06 12:16:36 UTC (rev 243)
+++ code/trunk/testdata/testoutput4    2015-04-08 16:33:58 UTC (rev 244)
@@ -3741,4 +3741,11 @@


"[\S\V\H]"utf

+/\C\X*TӅ;
+{0,6}\v+
+F
+/utf
+    Ӆ\x0a
+No match
+
 # End of testinput4