[Pcre-svn] [1155] code/trunk: Fix incorrect computation of g…

Kezdőlap
Üzenet törlése
Szerző: Subversion repository
Dátum:  
Címzett: pcre-svn
Tárgy: [Pcre-svn] [1155] code/trunk: Fix incorrect computation of group length when one branch exceeded 65535.
Revision: 1155
          http://www.exim.org/viewvc/pcre2?view=rev&revision=1155
Author:   ph10
Date:     2019-08-03 09:30:40 +0100 (Sat, 03 Aug 2019)
Log Message:
-----------
Fix incorrect computation of group length when one branch exceeded 65535.


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/src/pcre2_study.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2019-08-01 16:59:50 UTC (rev 1154)
+++ code/trunk/ChangeLog    2019-08-03 08:30:40 UTC (rev 1155)
@@ -124,7 +124,12 @@


26. Make pcre2test -C show if libreadline or libedit is supported.

+28. If the length of one branch of a group exceeded 65535 (the maximum value
+that is remembered as a minimum length), the whole group's length was
+incorrectly recorded as 65535, leading to incorrect "no match" when start-up
+optimizations were in force.

+
Version 10.33 16-April-2019
---------------------------


Modified: code/trunk/src/pcre2_study.c
===================================================================
--- code/trunk/src/pcre2_study.c    2019-08-01 16:59:50 UTC (rev 1154)
+++ code/trunk/src/pcre2_study.c    2019-08-03 08:30:40 UTC (rev 1155)
@@ -105,6 +105,7 @@
   int *backref_cache)
 {
 int length = -1;
+int branchlength = 0;
 int prev_cap_recno = -1;
 int prev_cap_d = 0;
 int prev_recurse_recno = -1;
@@ -112,9 +113,9 @@
 uint32_t once_fudge = 0;
 BOOL had_recurse = FALSE;
 BOOL dupcapused = (re->flags & PCRE2_DUPCAPUSED) != 0;
+PCRE2_SPTR nextbranch = code + GET(code, 1);
+PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE;
 recurse_check this_recurse;
-int branchlength = 0;
-PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE;


/* If this is a "could be empty" group, its minimum length is 0. */

@@ -130,16 +131,20 @@

/* Scan along the opcodes for this branch. If we get to the end of the branch,
check the length against that of the other branches. If the accumulated length
-passes 16-bits, stop. */
+passes 16-bits, reset to that value and skip the rest of the branch. */

for (;;)
{
int d, min, recno;
- PCRE2_UCHAR *cs, *ce;
- PCRE2_UCHAR op = *cc;
+ PCRE2_UCHAR op, *cs, *ce;

-  if (branchlength >= UINT16_MAX) return UINT16_MAX;
+  if (branchlength >= UINT16_MAX)
+    {
+    branchlength = UINT16_MAX;
+    cc = (PCRE2_UCHAR *)nextbranch;
+    }


+  op = *cc;
   switch (op)
     {
     case OP_COND:
@@ -229,6 +234,7 @@
     if (length < 0 || (!had_recurse && branchlength < length))
       length = branchlength;
     if (op != OP_ALT) return length;
+    nextbranch = cc + GET(cc, 1);
     cc += 1 + LINK_SIZE;
     branchlength = 0;
     had_recurse = FALSE;
@@ -241,7 +247,7 @@
     case OP_ASSERTBACK:
     case OP_ASSERTBACK_NOT:
     case OP_ASSERT_NA:
-    case OP_ASSERTBACK_NA:  
+    case OP_ASSERTBACK_NA:
     do cc += GET(cc, 1); while (*cc == OP_ALT);
     /* Fall through */


@@ -1091,7 +1097,7 @@
       case OP_ONCE:
       case OP_SCRIPT_RUN:
       case OP_ASSERT:
-      case OP_ASSERT_NA: 
+      case OP_ASSERT_NA:
       rc = set_start_bits(re, tcode, utf);
       if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
       if (rc == SSB_DONE) try_next = FALSE; else
@@ -1134,7 +1140,7 @@
       case OP_ASSERT_NOT:
       case OP_ASSERTBACK:
       case OP_ASSERTBACK_NOT:
-      case OP_ASSERTBACK_NA: 
+      case OP_ASSERTBACK_NA:
       do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
       tcode += 1 + LINK_SIZE;
       break;
@@ -1584,9 +1590,9 @@
 /* This function is handed a compiled expression that it must study to produce
 information that will speed up the matching.


-Argument:  
+Argument:
   re       points to the compiled expression
-   
+
 Returns:   0 normally; non-zero should never normally occur
            1 unknown opcode in set_start_bits
            2 missing capturing bracket


Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2019-08-01 16:59:50 UTC (rev 1154)
+++ code/trunk/testdata/testinput2    2019-08-03 08:30:40 UTC (rev 1155)
@@ -5740,4 +5740,12 @@
 /c*+/
     ab\=ph,offset=2


+/\A\s*(a|(?:[^`]{28500}){4})/I
+    a
+
+/\A\s*((?:[^`]{28500}){4})/I
+
+/\A\s*((?:[^`]{28500}){4}|a)/I
+    a
+
 # End of testinput2


Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2019-08-01 16:59:50 UTC (rev 1154)
+++ code/trunk/testdata/testoutput2    2019-08-03 08:30:40 UTC (rev 1155)
@@ -17267,6 +17267,33 @@
     ab\=ph,offset=2
 Partial match: 


+/\A\s*(a|(?:[^`]{28500}){4})/I
+Capture group count = 1
+Max lookbehind = 1
+Compile options: <none>
+Overall options: anchored
+Subject length lower bound = 1
+    a
+ 0: a
+ 1: a
+
+/\A\s*((?:[^`]{28500}){4})/I
+Capture group count = 1
+Max lookbehind = 1
+Compile options: <none>
+Overall options: anchored
+Subject length lower bound = 65535
+
+/\A\s*((?:[^`]{28500}){4}|a)/I
+Capture group count = 1
+Max lookbehind = 1
+Compile options: <none>
+Overall options: anchored
+Subject length lower bound = 1
+    a
+ 0: a
+ 1: a
+
 # End of testinput2
 Error -70: PCRE2_ERROR_BADDATA (unknown error number)
 Error -62: bad serialized data