[Pcre-svn] [681] code/trunk: Fix crash for forward reference…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [681] code/trunk: Fix crash for forward reference in lookbehind with PCRE2_ANCHORED.
Revision: 681
          http://www.exim.org/viewvc/pcre2?view=rev&revision=681
Author:   ph10
Date:     2017-03-16 17:17:47 +0000 (Thu, 16 Mar 2017)
Log Message:
-----------
Fix crash for forward reference in lookbehind with PCRE2_ANCHORED. Fixes 
oss-fuzz issue 865.


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/src/pcre2_compile.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2017-03-15 09:37:46 UTC (rev 680)
+++ code/trunk/ChangeLog    2017-03-16 17:17:47 UTC (rev 681)
@@ -46,7 +46,13 @@
 for a character with a code point greater than 0x10ffff (the Unicode maximum)
 caused a crash.


+5. If a lookbehind assertion that contained a back reference to a group
+appearing later in the pattern was compiled with the PCRE2_ANCHORED option,
+undefined actions (often a segmentation fault) could occur, depending on what
+other options were set. An example assertion is (?<!\1(abc)) where the
+reference \1 precedes the group (abc). This fixes oss-fuzz issue 865.

+
Version 10.23 14-February-2017
------------------------------


Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c    2017-03-15 09:37:46 UTC (rev 680)
+++ code/trunk/src/pcre2_compile.c    2017-03-16 17:17:47 UTC (rev 681)
@@ -8090,6 +8090,10 @@
 is also called to skip to the end of a class, during which it will never
 encounter nested groups (but there's no need to have special code for that).


+When called to find the end of a branch or group, pptr must point to the first
+meta code inside the branch, not the branch-starting code. In other cases it
+can point to the item that causes the function to be called.
+
 Arguments:
   pptr       current pointer to skip from
   skiptype   PSKIP_CLASS when skipping to end of class
@@ -8106,10 +8110,10 @@
 {
 uint32_t nestlevel = 0;


-for (pptr += 1;; pptr++)
+for (;; pptr++)
   {
   uint32_t meta = META_CODE(*pptr);
-
+  
   switch(meta)
     {
     default:  /* Just skip over most items */
@@ -8201,11 +8205,12 @@
 /* This is called for nested groups within a branch of a lookbehind whose
 length is being computed. If all the branches in the nested group have the same
 length, that is OK. On entry, the pointer must be at the first element after
-the group initializing code. Caching is used to improve processing speed when
-the same capturing group occurs many times.
+the group initializing code. On exit it points to OP_KET. Caching is used to
+improve processing speed when the same capturing group occurs many times.


 Arguments:
   pptrptr     pointer to pointer in the parsed pattern
+  isinline    FALSE if a reference or recursion; TRUE for inline group
   errcodeptr  pointer to the errorcode
   lcptr       pointer to the loop counter
   group       number of captured group or -1 for a non-capturing group
@@ -8216,7 +8221,7 @@
 */


static int
-get_grouplength(uint32_t **pptrptr, int *errcodeptr, int *lcptr,
+get_grouplength(uint32_t **pptrptr, BOOL isinline, int *errcodeptr, int *lcptr,
int group, parsed_recurse_check *recurses, compile_block *cb)
{
int branchlength;
@@ -8223,20 +8228,22 @@
int grouplength = -1;

/* The cache can be used only if there is no possibility of there being two
-groups with the same number. */
+groups with the same number. We do not need to set the end pointer for a group
+that is being processed as a back reference or recursion, but we must do so for
+an inline group. */

-if (group > 0)
+if (group > 0 && (cb->external_flags & PCRE2_DUPCAPUSED) == 0)
   {
   uint32_t groupinfo = cb->groupinfo[group];
-  if ((cb->external_flags & PCRE2_DUPCAPUSED) == 0)
+  if ((groupinfo & GI_NOT_FIXED_LENGTH) != 0) return -1;
+  if ((groupinfo & GI_SET_FIXED_LENGTH) != 0)
     {
-    if ((groupinfo & GI_NOT_FIXED_LENGTH) != 0) return -1;
-    if ((groupinfo & GI_SET_FIXED_LENGTH) != 0)
-      return groupinfo & GI_FIXED_LENGTH_MASK;
+    if (isinline) *pptrptr = parsed_skip(*pptrptr, PSKIP_KET);
+    return groupinfo & GI_FIXED_LENGTH_MASK;
     }
   }


-/* Scan the group */
+/* Scan the group. In this case we find the end pointer of necessity. */

 for(;;)
   {
@@ -8394,11 +8401,12 @@
       }
     break;


-    /* Lookaheads can be ignored. */
+    /* Lookaheads can be ignored, but we must start the skip inside the group 
+    so that it isn't treated as a group within the branch. */


     case META_LOOKAHEAD:
     case META_LOOKAHEADNOT:
-    pptr = parsed_skip(pptr, PSKIP_KET);
+    pptr = parsed_skip(pptr + 1, PSKIP_KET);
     if (pptr == NULL) goto PARSED_SKIP_FAILED;
     break;


@@ -8496,7 +8504,10 @@
         else if (*gptr == (META_CAPTURE | group)) break;
       }


-    gptrend = parsed_skip(gptr, PSKIP_KET);
+    /* We must start the search for the end of the group at the first meta code 
+    inside the group. Otherwise it will be treated as an enclosed group. */
+
+    gptrend = parsed_skip(gptr + 1, PSKIP_KET);
     if (gptrend == NULL) goto PARSED_SKIP_FAILED;
     if (pptr > gptr && pptr < gptrend) goto ISNOTFIXED;  /* Local recursion */
     for (r = recurses; r != NULL; r = r->prev) if (r->groupptr == gptr) break;
@@ -8503,8 +8514,14 @@
     if (r != NULL) goto ISNOTFIXED;   /* Mutual recursion */
     this_recurse.prev = recurses;
     this_recurse.groupptr = gptr;
+    
+    /* We do not need to know the position of the end of the group, that is,
+    gptr is not used after the call to get_grouplength(). Setting the second 
+    argument FALSE stops it scanning for the end when the length can be found 
+    in the cache. */ 
+     
     gptr++;
-    grouplength = get_grouplength(&gptr, errcodeptr, lcptr, group,
+    grouplength = get_grouplength(&gptr, FALSE, errcodeptr, lcptr, group,
       &this_recurse, cb);
     if (grouplength < 0)
       {
@@ -8541,7 +8558,8 @@
     case META_NOCAPTURE:
     pptr++;
     CHECK_GROUP:
-    grouplength = get_grouplength(&pptr, errcodeptr, lcptr, group, recurses, cb);
+    grouplength = get_grouplength(&pptr, TRUE, errcodeptr, lcptr, group, 
+      recurses, cb);
     if (grouplength < 0) return -1;
     itemlength = grouplength;
     break;


Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2017-03-15 09:37:46 UTC (rev 680)
+++ code/trunk/testdata/testinput2    2017-03-16 17:17:47 UTC (rev 681)
@@ -5015,4 +5015,6 @@
 /\[(a)]{60}/expand
     aaaa


+/(?<!\1((?U)1((?U))))(*F)/never_backslash_c,alt_bsux,anchored,extended
+
# End of testinput2

Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2017-03-15 09:37:46 UTC (rev 680)
+++ code/trunk/testdata/testoutput2    2017-03-16 17:17:47 UTC (rev 681)
@@ -15568,6 +15568,8 @@
     aaaa
 No match


+/(?<!\1((?U)1((?U))))(*F)/never_backslash_c,alt_bsux,anchored,extended
+
# End of testinput2
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data