[Pcre-svn] [1112] code/trunk/src/pcre2_study.c: Minor code a…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [1112] code/trunk/src/pcre2_study.c: Minor code and comment tidies.
Revision: 1112
          http://www.exim.org/viewvc/pcre2?view=rev&revision=1112
Author:   ph10
Date:     2019-06-19 17:39:18 +0100 (Wed, 19 Jun 2019)
Log Message:
-----------
Minor code and comment tidies.


Modified Paths:
--------------
    code/trunk/src/pcre2_study.c


Modified: code/trunk/src/pcre2_study.c
===================================================================
--- code/trunk/src/pcre2_study.c    2019-06-19 16:27:50 UTC (rev 1111)
+++ code/trunk/src/pcre2_study.c    2019-06-19 16:39:18 UTC (rev 1112)
@@ -88,6 +88,9 @@
   countptr        pointer to call count (to catch over complexity)
   backref_cache   vector for caching back references.


+This function is no longer called when the pattern contains (*ACCEPT); however,
+the old code for returning -1 is retained, just in case.
+
 Returns:   the minimum length
            -1 \C in UTF-8 mode
               or (*ACCEPT)
@@ -205,7 +208,9 @@
     cc += 1 + LINK_SIZE;
     break;


-    /* ACCEPT makes things far too complicated; we have to give up. */
+    /* ACCEPT makes things far too complicated; we have to give up. In fact,
+    from 10.34 onwards, if a pattern contains (*ACCEPT), this function is not
+    used. However, leave the code in place, just in case. */


     case OP_ACCEPT:
     case OP_ASSERT_ACCEPT:
@@ -453,9 +458,9 @@
     For backreferenes, if duplicate numbers are present in the pattern we check
     for a reference to a duplicate. If it is, we don't know which version will
     be referenced, so we have to set the minimum length to zero. */
-    
-    /* Duplicate named pattern back reference. */ 


+    /* Duplicate named pattern back reference. */
+
     case OP_DNREF:
     case OP_DNREFI:
     if (!dupcapused && (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
@@ -481,7 +486,7 @@
           ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno);
           if (cs == NULL) return -2;
           do ce += GET(ce, 1); while (*ce == OP_ALT);
-          
+
           dd = 0;
           if (!dupcapused ||
               (PCRE2_UCHAR *)PRIV(find_bracket)(ce, utf, recno) == NULL)
@@ -508,7 +513,7 @@
                 if (dd < 0) return dd;
                 }
               }
-            }   
+            }


           backref_cache[recno] = dd;
           for (i = backref_cache[0] + 1; i < recno; i++) backref_cache[i] = -1;
@@ -524,7 +529,7 @@
     cc += 1 + 2*IMM2_SIZE;
     goto REPEAT_BACK_REFERENCE;


-    /* Single back reference by number. References by name are converted to by 
+    /* Single back reference by number. References by name are converted to by
     number when there is no duplication. */


     case OP_REF:
@@ -1585,7 +1590,6 @@
 int
 PRIV(study)(pcre2_real_code *re)
 {
-int min;
 int count = 0;
 PCRE2_UCHAR *code;
 BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
@@ -1607,21 +1611,23 @@
   }


/* Find the minimum length of subject string. If the pattern can match an empty
-string, the minimum length is already known. If the pattern contains (*ACCEPT)
-all bets are off. If there are more back references than the size of the vector
-we are going to cache them in, do nothing. A pattern that complicated will
-probably take a long time to analyze and may in any case turn out to be too
-complicated. Note that back reference minima are held as 16-bit numbers. */
+string, the minimum length is already known. If the pattern contains (*ACCEPT)
+all bets are off, and we don't even try to find a minimum length. If there are
+more back references than the size of the vector we are going to cache them in,
+do nothing. A pattern that complicated will probably take a long time to
+analyze and may in any case turn out to be too complicated. Note that back
+reference minima are held as 16-bit numbers. */

 if ((re->flags & (PCRE2_MATCH_EMPTY|PCRE2_HASACCEPT)) == 0 &&
      re->top_backref <= MAX_CACHE_BACKREF)
   {
+  int min;
   int backref_cache[MAX_CACHE_BACKREF+1];
   backref_cache[0] = 0;    /* Highest one that is set */
   min = find_minlength(re, code, code, utf, NULL, &count, backref_cache);
   switch(min)
     {
-    case -1:  /* \C in UTF mode or (*ACCEPT) or over-complex regex */
+    case -1:  /* \C in UTF mode or over-complex regex */
     break;    /* Leave minlength unchanged (will be zero) */


     case -2:
@@ -1631,8 +1637,7 @@
     return 3; /* unrecognized opcode */


     default:
-    if (min > UINT16_MAX) min = UINT16_MAX;
-    re->minlength = min;
+    re->minlength = (min > UINT16_MAX)? UINT16_MAX : min;
     break;
     }
   }