[Pcre-svn] [462] code/trunk: Fix PCRE_PARTIAL_HARD for patterns that end optionally, e .g.

Autore: Subversion repository
Data:
To: pcre-svn
Oggetto: [Pcre-svn] [462] code/trunk: Fix PCRE_PARTIAL_HARD for patterns that end optionally, e .g.

Revision: 462

          http://vcs.pcre.org/viewvc?view=rev&revision=462
Author:   ph10
Date:     2009-10-17 20:55:02 +0100 (Sat, 17 Oct 2009)

Log Message:
-----------
Fix PCRE_PARTIAL_HARD for patterns that end optionally, e.g. abc*

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/configure.ac
    code/trunk/pcre_dfa_exec.c
    code/trunk/pcre_exec.c
    code/trunk/pcre_internal.h
    code/trunk/testdata/testinput2
    code/trunk/testdata/testinput5
    code/trunk/testdata/testinput7
    code/trunk/testdata/testinput8
    code/trunk/testdata/testoutput2
    code/trunk/testdata/testoutput5
    code/trunk/testdata/testoutput7
    code/trunk/testdata/testoutput8

Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/ChangeLog    2009-10-17 19:55:02 UTC (rev 462)
@@ -58,8 +58,7 @@
 10. Partial matching has been split into two forms: PCRE_PARTIAL_SOFT, which is
     synonymous with PCRE_PARTIAL, for backwards compatibility, and
     PCRE_PARTIAL_HARD, which causes a partial match to supersede a full match,
-    and may be more useful for multi-segment matching, especially with
-    pcre_exec().
+    and may be more useful for multi-segment matching.

 11. Partial matching with pcre_exec() is now more intuitive. A partial match
     used to be given if ever the end of the subject was reached; now it is

Modified: code/trunk/configure.ac
===================================================================
--- code/trunk/configure.ac    2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/configure.ac    2009-10-17 19:55:02 UTC (rev 462)
@@ -8,8 +8,8 @@

m4_define(pcre_major, [8])
m4_define(pcre_minor, [00])
-m4_define(pcre_prerelease, [-RC1])
-m4_define(pcre_date, [2009-10-05])
+m4_define(pcre_prerelease, [-RC2])
+m4_define(pcre_date, [2009-10-17])

# Libtool shared library interface versions (current:revision:age)
m4_define(libpcre_version, [0:1:0])

Modified: code/trunk/pcre_dfa_exec.c
===================================================================
--- code/trunk/pcre_dfa_exec.c    2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/pcre_dfa_exec.c    2009-10-17 19:55:02 UTC (rev 462)
@@ -109,8 +109,9 @@
 character that is to be tested in some way. This makes is possible to
 centralize the loading of these characters. In the case of Type * etc, the
 "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
-small value. ***NOTE*** If the start of this table is modified, the two tables
-that follow must also be modified. */
+small value. Non-zero values in the table are the offsets from the opcode where 
+the character is to be found. ***NOTE*** If the start of this table is
+modified, the three tables that follow must also be modified. */

 static const uschar coptable[] = {
   0,                             /* End                                    */
@@ -160,9 +161,66 @@
   0,                             /* DEF                                    */
   0, 0,                          /* BRAZERO, BRAMINZERO                    */
   0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
-  0, 0, 0                        /* FAIL, ACCEPT, SKIPZERO                 */
+  0, 0, 0, 0                     /* FAIL, ACCEPT, CLOSE, SKIPZERO          */
 };

+/* This table identifies those opcodes that inspect a character. It is used to 
+remember the fact that a character could have been inspected when the end of
+the subject is reached, in order to support PCRE_PARTIAL_HARD behaviour.
+***NOTE*** If the start of this table is modified, the two tables that follow
+must also be modified. */
+
+static const uschar poptable[] = {
+  0,                             /* End                                    */
+  0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
+  1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
+  1, 1, 1,                       /* Any, AllAny, Anybyte                   */
+  1, 1, 1,                       /* NOTPROP, PROP, EXTUNI                  */
+  1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */
+  0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
+  1,                             /* Char                                   */
+  1,                             /* Charnc                                 */
+  1,                             /* not                                    */
+  /* Positive single-char repeats                                          */
+  1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
+  1, 1, 1,                       /* upto, minupto, exact                   */
+  1, 1, 1, 1,                    /* *+, ++, ?+, upto+                      */
+  /* Negative single-char repeats - only for chars < 256                   */
+  1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
+  1, 1, 1,                       /* NOT upto, minupto, exact               */
+  1, 1, 1, 1,                    /* NOT *+, ++, ?+, upto+                  */
+  /* Positive type repeats                                                 */
+  1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
+  1, 1, 1,                       /* Type upto, minupto, exact              */
+  1, 1, 1, 1,                    /* Type *+, ++, ?+, upto+                 */
+  /* Character class & ref repeats                                         */
+  1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
+  1, 1,                          /* CRRANGE, CRMINRANGE                    */
+  1,                             /* CLASS                                  */
+  1,                             /* NCLASS                                 */
+  1,                             /* XCLASS - variable length               */
+  0,                             /* REF                                    */
+  0,                             /* RECURSE                                */
+  0,                             /* CALLOUT                                */
+  0,                             /* Alt                                    */
+  0,                             /* Ket                                    */
+  0,                             /* KetRmax                                */
+  0,                             /* KetRmin                                */
+  0,                             /* Assert                                 */
+  0,                             /* Assert not                             */
+  0,                             /* Assert behind                          */
+  0,                             /* Assert behind not                      */
+  0,                             /* Reverse                                */
+  0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */
+  0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */
+  0,                             /* CREF                                   */
+  0,                             /* RREF                                   */
+  0,                             /* DEF                                    */
+  0, 0,                          /* BRAZERO, BRAMINZERO                    */
+  0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
+  0, 0, 0, 0                     /* FAIL, ACCEPT, CLOSE, SKIPZERO          */
+};
+
 /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
 and \w */

@@ -489,6 +547,7 @@
unsigned int c, d;
int forced_fail = 0;
int reached_end = 0;
+ BOOL could_continue = FALSE;

/* Make the new state list into the active state list and empty the
new state list. */
@@ -596,7 +655,13 @@

     code = start_code + state_offset;
     codevalue = *code;
+    
+    /* If this opcode inspects a character, but we are at the end of the 
+    subject, remember the fact so that we can support PCRE_PARTIAL_HARD. */

+    if (clen == 0 && poptable[codevalue] != 0)
+      could_continue = TRUE; 
+
     /* If this opcode is followed by an inline character, load it. It is
     tempting to test for the presence of a subject character here, but that
     is wrong, because sometimes zero repetitions of the subject are
@@ -2522,16 +2587,24 @@
   /* We have finished the processing at the current subject character. If no
   new states have been set for the next character, we have found all the
   matches that we are going to find. If we are at the top level and partial
-  matching has been requested, check for appropriate conditions. The "forced_
-  fail" variable counts the number of (*F) encountered for the character. If it
-  is equal to the original active_count (saved in workspace[1]) it means that
-  (*F) was found on every active state. In this case we don't want to give a
-  partial match. */
+  matching has been requested, check for appropriate conditions. 
+  
+  The "forced_ fail" variable counts the number of (*F) encountered for the
+  character. If it is equal to the original active_count (saved in
+  workspace[1]) it means that (*F) was found on every active state. In this
+  case we don't want to give a partial match. 
+  
+  The "reached_end" variable counts the number of threads that have reached the 
+  end of the pattern. The "could_continue" variable is true if a thread could 
+  have continued but for the fact that the end of the subject was reached. */

   if (new_count <= 0)
     {
     if (rlevel == 1 &&                               /* Top level, and */
-        reached_end != workspace[1] &&               /* Not all reached end */
+        (                                            /* either... */
+        reached_end != workspace[1] ||               /* Not all reached end */
+          could_continue                             /* or some could go on */
+        ) &&                                         /* and... */
         forced_fail != workspace[1] &&               /* Not all forced fail & */
         (                                            /* either... */
         (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */

Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c    2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/pcre_exec.c    2009-10-17 19:55:02 UTC (rev 462)
@@ -415,7 +415,7 @@
     }

 #define SCHECK_PARTIAL()\
-  if (md->partial && eptr > mstart)\
+  if (md->partial != 0 && eptr > mstart)\
     {\
     md->hitend = TRUE;\
     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
@@ -2146,7 +2146,11 @@
         pp = eptr;
         for (i = min; i < max; i++)
           {
-          if (!match_ref(offset, eptr, length, md, ims)) break;
+          if (!match_ref(offset, eptr, length, md, ims)) 
+            {
+            CHECK_PARTIAL(); 
+            break;
+            } 
           eptr += length;
           }
         while (eptr >= pp)
@@ -2315,7 +2319,11 @@
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             if (c > 255)
               {
@@ -2341,7 +2349,11 @@
           {
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             c = *eptr;
             if ((data[c/8] & (1 << (c&7))) == 0) break;
             eptr++;
@@ -2446,7 +2458,11 @@
         for (i = min; i < max; i++)
           {
           int len = 1;
-          if (eptr >= md->end_subject) break;
+          if (eptr >= md->end_subject) 
+            {
+            SCHECK_PARTIAL(); 
+            break;
+            } 
           GETCHARLENTEST(c, eptr, len);
           if (!_pcre_xclass(c, data)) break;
           eptr += len;
@@ -2685,7 +2701,11 @@
                      eptr <= md->end_subject - oclength &&
                      memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
 #endif  /* SUPPORT_UCP */
-            else break;
+            else 
+              {
+              CHECK_PARTIAL(); 
+              break;
+              } 
             }

           if (possessive) continue;
@@ -2763,7 +2783,12 @@
         pp = eptr;
         for (i = min; i < max; i++)
           {
-          if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
+          if (eptr >= md->end_subject) 
+            {
+            SCHECK_PARTIAL();
+            break;
+            } 
+          if (fc != md->lcc[*eptr]) break;
           eptr++;
           }

@@ -2817,7 +2842,12 @@
         pp = eptr;
         for (i = min; i < max; i++)
           {
-          if (eptr >= md->end_subject || fc != *eptr) break;
+          if (eptr >= md->end_subject) 
+            {
+            SCHECK_PARTIAL(); 
+            break;
+            } 
+          if (fc != *eptr) break;
           eptr++;
           }
         if (possessive) continue;
@@ -3029,7 +3059,11 @@
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(d, eptr, len);
             if (d < 256) d = md->lcc[d];
             if (fc == d) break;
@@ -3050,7 +3084,12 @@
           {
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL();
+              break;
+              } 
+            if (fc == md->lcc[*eptr]) break;
             eptr++;
             }
           if (possessive) continue;
@@ -3159,7 +3198,11 @@
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(d, eptr, len);
             if (fc == d) break;
             eptr += len;
@@ -3179,7 +3222,12 @@
           {
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || fc == *eptr) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
+            if (fc == *eptr) break;
             eptr++;
             }
           if (possessive) continue;
@@ -4335,7 +4383,11 @@
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             if (prop_fail_result) break;
             eptr+= len;
@@ -4346,7 +4398,11 @@
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             prop_chartype = UCD_CHARTYPE(c);
             if ((prop_chartype == ucp_Lu ||
@@ -4361,7 +4417,11 @@
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             prop_category = UCD_CATEGORY(c);
             if ((prop_category == prop_value) == prop_fail_result)
@@ -4374,7 +4434,11 @@
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             prop_chartype = UCD_CHARTYPE(c);
             if ((prop_chartype == prop_value) == prop_fail_result)
@@ -4387,7 +4451,11 @@
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             prop_script = UCD_SCRIPT(c);
             if ((prop_script == prop_value) == prop_fail_result)
@@ -4416,7 +4484,11 @@
         {
         for (i = min; i < max; i++)
           {
-          if (eptr >= md->end_subject) break;
+          if (eptr >= md->end_subject) 
+            {
+            SCHECK_PARTIAL(); 
+            break;
+            } 
           GETCHARINCTEST(c, eptr);
           prop_category = UCD_CATEGORY(c);
           if (prop_category == ucp_M) break;
@@ -4436,6 +4508,7 @@
         /* eptr is now past the end of the maximum run */

         if (possessive) continue;
+
         for(;;)
           {
           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
@@ -4471,7 +4544,12 @@
             {
             for (i = min; i < max; i++)
               {
-              if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
+              if (eptr >= md->end_subject) 
+                {
+                SCHECK_PARTIAL(); 
+                break;
+                } 
+              if (IS_NEWLINE(eptr)) break;
               eptr++;
               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
               }
@@ -4483,7 +4561,12 @@
             {
             for (i = min; i < max; i++)
               {
-              if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
+              if (eptr >= md->end_subject) 
+                {
+                SCHECK_PARTIAL(); 
+                break;
+                } 
+              if (IS_NEWLINE(eptr)) break;
               eptr++;
               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
               }
@@ -4495,7 +4578,11 @@
             {
             for (i = min; i < max; i++)
               {
-              if (eptr >= md->end_subject) break;
+              if (eptr >= md->end_subject) 
+                {
+                SCHECK_PARTIAL(); 
+                break;
+                } 
               eptr++;
               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
               }
@@ -4508,15 +4595,22 @@
           case OP_ANYBYTE:
           c = max - min;
           if (c > (unsigned int)(md->end_subject - eptr))
-            c = md->end_subject - eptr;
-          eptr += c;
+            {
+            eptr = md->end_subject;
+            SCHECK_PARTIAL();
+            }  
+          else eptr += c;
           break;

           case OP_ANYNL:
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             if (c == 0x000d)
               {
@@ -4541,7 +4635,11 @@
             {
             BOOL gotspace;
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             switch(c)
               {
@@ -4579,7 +4677,11 @@
             {
             BOOL gotspace;
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             switch(c)
               {
@@ -4603,7 +4705,11 @@
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
             eptr+= len;
@@ -4614,7 +4720,11 @@
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
             eptr+= len;
@@ -4625,7 +4735,11 @@
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
             eptr+= len;
@@ -4636,7 +4750,11 @@
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
             eptr+= len;
@@ -4647,7 +4765,11 @@
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
             eptr+= len;
@@ -4658,7 +4780,11 @@
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
             eptr+= len;
@@ -4690,7 +4816,12 @@
           case OP_ANY:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL();
+              break;
+              } 
+            if (IS_NEWLINE(eptr)) break;
             eptr++;
             }
           break;
@@ -4699,14 +4830,21 @@
           case OP_ANYBYTE:
           c = max - min;
           if (c > (unsigned int)(md->end_subject - eptr))
-            c = md->end_subject - eptr;
-          eptr += c;
+            {
+            eptr = md->end_subject;
+            SCHECK_PARTIAL();
+            }    
+          else eptr += c;
           break;

           case OP_ANYNL:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             c = *eptr;
             if (c == 0x000d)
               {
@@ -4727,7 +4865,11 @@
           case OP_NOT_HSPACE:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             c = *eptr;
             if (c == 0x09 || c == 0x20 || c == 0xa0) break;
             eptr++;
@@ -4737,7 +4879,11 @@
           case OP_HSPACE:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             c = *eptr;
             if (c != 0x09 && c != 0x20 && c != 0xa0) break;
             eptr++;
@@ -4747,7 +4893,11 @@
           case OP_NOT_VSPACE:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             c = *eptr;
             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
               break;
@@ -4758,7 +4908,11 @@
           case OP_VSPACE:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             c = *eptr;
             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
               break;
@@ -4769,8 +4923,12 @@
           case OP_NOT_DIGIT:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
               break;
+              } 
+            if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
             eptr++;
             }
           break;
@@ -4778,8 +4936,12 @@
           case OP_DIGIT:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
               break;
+              } 
+            if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
             eptr++;
             }
           break;
@@ -4787,8 +4949,12 @@
           case OP_NOT_WHITESPACE:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
               break;
+              } 
+            if ((md->ctypes[*eptr] & ctype_space) != 0) break;
             eptr++;
             }
           break;
@@ -4796,8 +4962,12 @@
           case OP_WHITESPACE:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
               break;
+              } 
+            if ((md->ctypes[*eptr] & ctype_space) == 0) break;
             eptr++;
             }
           break;
@@ -4805,8 +4975,12 @@
           case OP_NOT_WORDCHAR:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
               break;
+              } 
+            if ((md->ctypes[*eptr] & ctype_word) != 0) break;
             eptr++;
             }
           break;
@@ -4814,8 +4988,12 @@
           case OP_WORDCHAR:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
               break;
+              } 
+            if ((md->ctypes[*eptr] & ctype_word) == 0) break;
             eptr++;
             }
           break;

Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h    2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/pcre_internal.h    2009-10-17 19:55:02 UTC (rev 462)
@@ -1210,8 +1210,8 @@
 OP_EOD must correspond in order to the list of escapes immediately above.

*** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions
-that follow must also be updated to match. There is also a table called
-"coptable" in pcre_dfa_exec.c that must be updated. */
+that follow must also be updated to match. There are also tables called
+"coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */

 enum {
   OP_END,            /* 0 End of pattern */
@@ -1376,7 +1376,11 @@
   OP_SKIPZERO        /* 114 */
 };

+/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
+definitions that follow must also be updated to match. There are also tables
+called "coptable" cna "poptable" in pcre_dfa_exec.c that must be updated. */

+
/* This macro defines textual names for all the opcodes. These are used only
for debugging. The macro is referenced only in pcre_printint.c. */

Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/testdata/testinput2    2009-10-17 19:55:02 UTC (rev 462)
@@ -3125,4 +3125,26 @@
     ** Failers
     abcdde

+/abcd*/
+    xxxxabcd\P
+    xxxxabcd\P\P
+
+/abcd*/i
+    xxxxabcd\P
+    xxxxabcd\P\P
+    XXXXABCD\P
+    XXXXABCD\P\P
+
+/abc\d*/
+    xxxxabc1\P
+    xxxxabc1\P\P
+
+/(a)bc\1*/
+    xxxxabca\P
+    xxxxabca\P\P
+
+/abc[de]*/
+    xxxxabcde\P
+    xxxxabcde\P\P
+
 /-- End of testinput2 --/

Modified: code/trunk/testdata/testinput5
===================================================================
--- code/trunk/testdata/testinput5    2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/testdata/testinput5    2009-10-17 19:55:02 UTC (rev 462)
@@ -720,4 +720,26 @@
     the cat\P
     the cat\P\P

+/abcd*/8
+    xxxxabcd\P
+    xxxxabcd\P\P
+
+/abcd*/i8
+    xxxxabcd\P
+    xxxxabcd\P\P
+    XXXXABCD\P
+    XXXXABCD\P\P
+
+/abc\d*/8
+    xxxxabc1\P
+    xxxxabc1\P\P
+
+/(a)bc\1*/8
+    xxxxabca\P
+    xxxxabca\P\P
+
+/abc[de]*/8
+    xxxxabcde\P
+    xxxxabcde\P\P
+
 /-- End of testinput5 --/

Modified: code/trunk/testdata/testinput7
===================================================================
--- code/trunk/testdata/testinput7    2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/testdata/testinput7    2009-10-17 19:55:02 UTC (rev 462)
@@ -4507,4 +4507,22 @@
     thejk;adlfj aenjl;fda asdfasd ehj;kjxyasiupd
     \Ythejk;adlfj aenjl;fda asdfasd ehj;kjxyasiupd

+/abcd*/
+    xxxxabcd\P
+    xxxxabcd\P\P
+
+/abcd*/i
+    xxxxabcd\P
+    xxxxabcd\P\P
+    XXXXABCD\P
+    XXXXABCD\P\P
+
+/abc\d*/
+    xxxxabc1\P
+    xxxxabc1\P\P
+
+/abc[de]*/
+    xxxxabcde\P
+    xxxxabcde\P\P
+
 /-- End of testinput7 --/

Modified: code/trunk/testdata/testinput8
===================================================================
--- code/trunk/testdata/testinput8    2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/testdata/testinput8    2009-10-17 19:55:02 UTC (rev 462)
@@ -667,4 +667,22 @@
 /X/8f<any> 
     A\x{1ec5}ABCXYZ

+/abcd*/8
+    xxxxabcd\P
+    xxxxabcd\P\P
+
+/abcd*/i8
+    xxxxabcd\P
+    xxxxabcd\P\P
+    XXXXABCD\P
+    XXXXABCD\P\P
+
+/abc\d*/8
+    xxxxabc1\P
+    xxxxabc1\P\P
+
+/abc[de]*/8
+    xxxxabcde\P
+    xxxxabcde\P\P
+
 /-- End of testinput8 --/

Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/testdata/testoutput2    2009-10-17 19:55:02 UTC (rev 462)
@@ -10372,4 +10372,39 @@
     abcdde  
 No match

+/abcd*/
+    xxxxabcd\P
+ 0: abcd
+    xxxxabcd\P\P
+Partial match: abcd
+
+/abcd*/i
+    xxxxabcd\P
+ 0: abcd
+    xxxxabcd\P\P
+Partial match: abcd
+    XXXXABCD\P
+ 0: ABCD
+    XXXXABCD\P\P
+Partial match: ABCD
+
+/abc\d*/
+    xxxxabc1\P
+ 0: abc1
+    xxxxabc1\P\P
+Partial match: abc1
+
+/(a)bc\1*/
+    xxxxabca\P
+ 0: abca
+ 1: a
+    xxxxabca\P\P
+Partial match: abca
+
+/abc[de]*/
+    xxxxabcde\P
+ 0: abcde
+    xxxxabcde\P\P
+Partial match: abcde
+
 /-- End of testinput2 --/

Modified: code/trunk/testdata/testoutput5
===================================================================
--- code/trunk/testdata/testoutput5    2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/testdata/testoutput5    2009-10-17 19:55:02 UTC (rev 462)
@@ -2037,4 +2037,39 @@
     the cat\P\P
 Partial match: the cat

+/abcd*/8
+    xxxxabcd\P
+ 0: abcd
+    xxxxabcd\P\P
+Partial match: abcd
+
+/abcd*/i8
+    xxxxabcd\P
+ 0: abcd
+    xxxxabcd\P\P
+Partial match: abcd
+    XXXXABCD\P
+ 0: ABCD
+    XXXXABCD\P\P
+Partial match: ABCD
+
+/abc\d*/8
+    xxxxabc1\P
+ 0: abc1
+    xxxxabc1\P\P
+Partial match: abc1
+
+/(a)bc\1*/8
+    xxxxabca\P
+ 0: abca
+ 1: a
+    xxxxabca\P\P
+Partial match: abca
+
+/abc[de]*/8
+    xxxxabcde\P
+ 0: abcde
+    xxxxabcde\P\P
+Partial match: abcde
+
 /-- End of testinput5 --/

Modified: code/trunk/testdata/testoutput7
===================================================================
--- code/trunk/testdata/testoutput7    2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/testdata/testoutput7    2009-10-17 19:55:02 UTC (rev 462)
@@ -7514,4 +7514,38 @@
     \Ythejk;adlfj aenjl;fda asdfasd ehj;kjxyasiupd
 No match

+/abcd*/
+    xxxxabcd\P
+ 0: abcd
+ 1: abc
+    xxxxabcd\P\P
+Partial match: abcd
+
+/abcd*/i
+    xxxxabcd\P
+ 0: abcd
+ 1: abc
+    xxxxabcd\P\P
+Partial match: abcd
+    XXXXABCD\P
+ 0: ABCD
+ 1: ABC
+    XXXXABCD\P\P
+Partial match: ABCD
+
+/abc\d*/
+    xxxxabc1\P
+ 0: abc1
+ 1: abc
+    xxxxabc1\P\P
+Partial match: abc1
+
+/abc[de]*/
+    xxxxabcde\P
+ 0: abcde
+ 1: abcd
+ 2: abc
+    xxxxabcde\P\P
+Partial match: abcde
+
 /-- End of testinput7 --/

Modified: code/trunk/testdata/testoutput8
===================================================================
--- code/trunk/testdata/testoutput8    2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/testdata/testoutput8    2009-10-17 19:55:02 UTC (rev 462)
@@ -1286,4 +1286,38 @@
     A\x{1ec5}ABCXYZ
  0: X

+/abcd*/8
+    xxxxabcd\P
+ 0: abcd
+ 1: abc
+    xxxxabcd\P\P
+Partial match: abcd
+
+/abcd*/i8
+    xxxxabcd\P
+ 0: abcd
+ 1: abc
+    xxxxabcd\P\P
+Partial match: abcd
+    XXXXABCD\P
+ 0: ABCD
+ 1: ABC
+    XXXXABCD\P\P
+Partial match: ABCD
+
+/abc\d*/8
+    xxxxabc1\P
+ 0: abc1
+ 1: abc
+    xxxxabc1\P\P
+Partial match: abc1
+
+/abc[de]*/8
+    xxxxabcde\P
+ 0: abcde
+ 1: abcd
+ 2: abc
+    xxxxabcde\P\P
+Partial match: abcde
+
 /-- End of testinput8 --/

Questo messaggio è parte di questo thread:
	il thread completo ordinato per data

[Pcre-svn] [462] code/trunk: Fix PCRE_PARTIAL_HARD for patte…