[Pcre-svn] [424] code/trunk: Small optimizations in pcre2_st…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [424] code/trunk: Small optimizations in pcre2_study.c
Revision: 424
          http://www.exim.org/viewvc/pcre2?view=rev&revision=424
Author:   ph10
Date:     2015-11-11 09:42:26 +0000 (Wed, 11 Nov 2015)
Log Message:
-----------
Small optimizations in pcre2_study.c


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/doc/pcre2api.3
    code/trunk/src/pcre2_study.c
    code/trunk/testdata/testoutput15
    code/trunk/testdata/testoutput17
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2015-11-10 14:50:51 UTC (rev 423)
+++ code/trunk/ChangeLog    2015-11-11 09:42:26 UTC (rev 424)
@@ -282,7 +282,9 @@
 81. Check for integer overflow in minimum length calculation and cap it at 
 65535.


+82. Small optimizations in code for finding the minimum matching length.

+
Version 10.20 30-June-2015
--------------------------


Modified: code/trunk/doc/pcre2api.3
===================================================================
--- code/trunk/doc/pcre2api.3    2015-11-10 14:50:51 UTC (rev 423)
+++ code/trunk/doc/pcre2api.3    2015-11-11 09:42:26 UTC (rev 424)
@@ -1,4 +1,4 @@
-.TH PCRE2API 3 "05 November 2015" "PCRE2 10.21"
+.TH PCRE2API 3 "10 November 2015" "PCRE2 10.21"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .sp
@@ -1684,8 +1684,11 @@
 .sp
   PCRE2_INFO_MATCHEMPTY
 .sp
-Return 1 if the pattern can match an empty string, otherwise 0. The third
-argument should point to an \fBuint32_t\fP variable.
+Return 1 if the pattern might match an empty string, otherwise 0. The third
+argument should point to an \fBuint32_t\fP variable. When a pattern contains 
+recursive subroutine calls it is not always possible to determine whether or 
+not it can match an empty string. PCRE2 takes a cautious approach and returns 1 
+in such cases.
 .sp
   PCRE2_INFO_MATCHLIMIT
 .sp
@@ -3084,6 +3087,6 @@
 .rs
 .sp
 .nf
-Last updated: 05 November 2015
+Last updated: 10 November 2015
 Copyright (c) 1997-2015 University of Cambridge.
 .fi


Modified: code/trunk/src/pcre2_study.c
===================================================================
--- code/trunk/src/pcre2_study.c    2015-11-10 14:50:51 UTC (rev 423)
+++ code/trunk/src/pcre2_study.c    2015-11-11 09:42:26 UTC (rev 424)
@@ -104,19 +104,22 @@
 register int branchlength = 0;
 register PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE;


-/* A large and/or complex regex can take too long to process. */
+/* If this is a "could be empty" group, its minimum length is 0. */

-if ((*countptr)++ > 1000) return -1;
+if (*code >= OP_SBRA && *code <= OP_SCOND) return 0;

/* Skip over capturing bracket number */

-if (*code == OP_CBRA || *code == OP_SCBRA ||
-    *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE;
+if (*code == OP_CBRA || *code == OP_CBRAPOS) cc += IMM2_SIZE;


-/* Scan along the opcodes for this branch. If we get to the end of the
-branch, check the length against that of the other branches. If the accumulated
-length passes 16-bits, stop and return it. */
+/* A large and/or complex regex can take too long to process. */

+if ((*countptr)++ > 1000) return -1;
+
+/* Scan along the opcodes for this branch. If we get to the end of the branch,
+check the length against that of the other branches. If the accumulated length
+passes 16-bits, stop. */
+
for (;;)
{
int d, min, recno;
@@ -1543,25 +1546,29 @@
if (rc == SSB_DONE) re->flags |= PCRE2_FIRSTMAPSET;
}

-/* Find the minimum length of subject string. */
+/* Find the minimum length of subject string. If it can match an empty string,
+the minimum length is already known. */

-switch(min = find_minlength(re, code, code, utf, NULL, &count))
+if ((re->flags & PCRE2_MATCH_EMPTY) == 0)
   {
-  case -1:  /* \C in UTF mode or (*ACCEPT) or over-complex regex */
-  break;    /* Leave minlength unchanged (will be zero) */
+  switch(min = find_minlength(re, code, code, utf, NULL, &count))
+    {
+    case -1:  /* \C in UTF mode or (*ACCEPT) or over-complex regex */
+    break;    /* Leave minlength unchanged (will be zero) */
+  
+    case -2:
+    return 2; /* missing capturing bracket */
+  
+    case -3:
+    return 3; /* unrecognized opcode */
+  
+    default:
+    if (min > UINT16_MAX) min = UINT16_MAX;
+    re->minlength = min;
+    break;
+    }
+  }   


- case -2:
- return 2; /* missing capturing bracket */
-
- case -3:
- return 3; /* unrecognized opcode */
-
- default:
- if (min > UINT16_MAX) min = UINT16_MAX;
- re->minlength = min;
- break;
- }
-
return 0;
}


Modified: code/trunk/testdata/testoutput15
===================================================================
--- code/trunk/testdata/testoutput15    2015-11-10 14:50:51 UTC (rev 423)
+++ code/trunk/testdata/testoutput15    2015-11-11 09:42:26 UTC (rev 424)
@@ -252,7 +252,7 @@
 /(a|(?R))/I
 Capturing subpattern count = 1
 May match empty string
-Subject length lower bound = 1
+Subject length lower bound = 0
     abcd
  0: a
  1: a
@@ -262,7 +262,7 @@
 /(ab|(bc|(de|(?R))))/I
 Capturing subpattern count = 3
 May match empty string
-Subject length lower bound = 2
+Subject length lower bound = 0
     abcd
  0: ab
  1: ab
@@ -272,7 +272,7 @@
 /(ab|(bc|(de|(?1))))/I
 Capturing subpattern count = 3
 May match empty string
-Subject length lower bound = 2
+Subject length lower bound = 0
     abcd
  0: ab
  1: ab


Modified: code/trunk/testdata/testoutput17
===================================================================
--- code/trunk/testdata/testoutput17    2015-11-10 14:50:51 UTC (rev 423)
+++ code/trunk/testdata/testoutput17    2015-11-11 09:42:26 UTC (rev 424)
@@ -416,7 +416,7 @@
 /(a|(?R))/I
 Capturing subpattern count = 1
 May match empty string
-Subject length lower bound = 1
+Subject length lower bound = 0
 JIT compilation was successful
     abcd
  0: a (JIT)
@@ -427,7 +427,7 @@
 /(ab|(bc|(de|(?R))))/I
 Capturing subpattern count = 3
 May match empty string
-Subject length lower bound = 2
+Subject length lower bound = 0
 JIT compilation was successful
     abcd
  0: ab (JIT)
@@ -438,7 +438,7 @@
 /(ab|(bc|(de|(?1))))/I
 Capturing subpattern count = 3
 May match empty string
-Subject length lower bound = 2
+Subject length lower bound = 0
 JIT compilation was successful
     abcd
  0: ab (JIT)


Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2015-11-10 14:50:51 UTC (rev 423)
+++ code/trunk/testdata/testoutput2    2015-11-11 09:42:26 UTC (rev 424)
@@ -3960,7 +3960,7 @@
 Capturing subpattern count = 2
 Compile options: <none>
 Overall options: anchored
-Subject length lower bound = 3
+Subject length lower bound = 2
     a=a
  0: a=a
  1: a