[Pcre-svn] [427] code/trunk: Add new PCRE_PARTIAL_HARD optio…

Página Inicial
Delete this message
Autor: Subversion repository
Data:  
Para: pcre-svn
Assunto: [Pcre-svn] [427] code/trunk: Add new PCRE_PARTIAL_HARD option.
Revision: 427
          http://vcs.pcre.org/viewvc?view=rev&revision=427
Author:   ph10
Date:     2009-08-28 10:55:54 +0100 (Fri, 28 Aug 2009)


Log Message:
-----------
Add new PCRE_PARTIAL_HARD option.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre.h.in
    code/trunk/pcre_dfa_exec.c
    code/trunk/pcre_exec.c
    code/trunk/pcre_internal.h
    code/trunk/pcretest.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testinput7
    code/trunk/testdata/testoutput2
    code/trunk/testdata/testoutput7


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2009-08-26 15:38:32 UTC (rev 426)
+++ code/trunk/ChangeLog    2009-08-28 09:55:54 UTC (rev 427)
@@ -51,6 +51,12 @@
     slots in the offset vector, the offsets of the first-encountered partial
     match are set in them when PCRE_ERROR_PARTIAL is returned.


+10. Partial matching has been split into two forms: PCRE_PARTIAL_SOFT, which is 
+    synonymous with PCRE_PARTIAL, for backwards compatibility, and 
+    PCRE_PARTIAL_HARD, which causes a longer partial match to supersede a 
+    shorter full match, and may be more useful for multi-segment matching, 
+    especially with pcre_exec().
+    


Version 7.9 11-Apr-09
---------------------

Modified: code/trunk/pcre.h.in
===================================================================
--- code/trunk/pcre.h.in    2009-08-26 15:38:32 UTC (rev 426)
+++ code/trunk/pcre.h.in    2009-08-28 09:55:54 UTC (rev 427)
@@ -113,7 +113,8 @@
 #define PCRE_NO_AUTO_CAPTURE    0x00001000
 #define PCRE_NO_UTF8_CHECK      0x00002000
 #define PCRE_AUTO_CALLOUT       0x00004000
-#define PCRE_PARTIAL            0x00008000
+#define PCRE_PARTIAL_SOFT       0x00008000
+#define PCRE_PARTIAL            0x00008000  /* Backwards compatible synonym */
 #define PCRE_DFA_SHORTEST       0x00010000
 #define PCRE_DFA_RESTART        0x00020000
 #define PCRE_FIRSTLINE          0x00040000
@@ -128,6 +129,7 @@
 #define PCRE_JAVASCRIPT_COMPAT  0x02000000
 #define PCRE_NO_START_OPTIMIZE  0x04000000
 #define PCRE_NO_START_OPTIMISE  0x04000000
+#define PCRE_PARTIAL_HARD       0x08000000


/* Exec-time and get/set-time error codes */


Modified: code/trunk/pcre_dfa_exec.c
===================================================================
--- code/trunk/pcre_dfa_exec.c    2009-08-26 15:38:32 UTC (rev 426)
+++ code/trunk/pcre_dfa_exec.c    2009-08-28 09:55:54 UTC (rev 427)
@@ -2473,11 +2473,15 @@


   if (new_count <= 0)
     {
-    if (match_count < 0 &&                     /* No matches found */
-        rlevel == 1 &&                         /* Top level match function */
-        (md->moptions & PCRE_PARTIAL) != 0 &&  /* Want partial matching */
-        ptr >= end_subject &&                  /* Reached end of subject */
-        ptr > current_subject)                 /* Matched non-empty string */
+    if (rlevel == 1 &&                               /* Top level, and */
+        (                                            /* either... */
+        (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
+        ||                                           /* or... */
+        ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
+         match_count < 0)                            /* no matches */
+        ) &&                                         /* And... */
+        ptr >= end_subject &&                     /* Reached end of subject */
+        ptr > current_subject)                    /* Matched non-empty string */
       {
       if (offsetcount >= 2)
         {


Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c    2009-08-26 15:38:32 UTC (rev 426)
+++ code/trunk/pcre_exec.c    2009-08-28 09:55:54 UTC (rev 427)
@@ -403,16 +403,26 @@
 /* These macros pack up tests that are used for partial matching, and which
 appears several times in the code. We set the "hit end" flag if the pointer is
 at the end of the subject and also past the start of the subject (i.e.
-something has been matched). The second one is used when we already know we are
-past the end of the subject. */
+something has been matched). For hard partial matching, we then return
+immediately. The second one is used when we already know we are past the end of
+the subject. */


 #define CHECK_PARTIAL()\
   if (md->partial && eptr >= md->end_subject && eptr > mstart)\
-    md->hitend = TRUE
+    {\
+    md->hitend = TRUE;\
+    if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
+    }


 #define SCHECK_PARTIAL()\
-  if (md->partial && eptr > mstart) md->hitend = TRUE
+  if (md->partial && eptr > mstart)\
+    {\
+    md->hitend = TRUE;\
+    md->hitend = TRUE;\
+    if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
+    }


+
/* Performance note: It might be tempting to extract commonly used fields from
the md structure (e.g. utf8, end_subject) into individual variables to improve
performance. Tests using gcc on a SPARC disproved this; in the first case, it
@@ -1868,11 +1878,11 @@

       for (i = 1; i <= min; i++)
         {
-        if (!match_ref(offset, eptr, length, md, ims)) 
+        if (!match_ref(offset, eptr, length, md, ims))
           {
-          CHECK_PARTIAL(); 
+          CHECK_PARTIAL();
           RRETURN(MATCH_NOMATCH);
-          } 
+          }
         eptr += length;
         }


@@ -1891,9 +1901,9 @@
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           if (fi >= max || !match_ref(offset, eptr, length, md, ims))
             {
-            CHECK_PARTIAL();  
+            CHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           eptr += length;
           }
         /* Control never gets here */
@@ -1909,7 +1919,7 @@
           if (!match_ref(offset, eptr, length, md, ims)) break;
           eptr += length;
           }
-        CHECK_PARTIAL();   
+        CHECK_PARTIAL();
         while (eptr >= pp)
           {
           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
@@ -1977,11 +1987,11 @@
         {
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) 
+          if (eptr >= md->end_subject)
             {
             CHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           GETCHARINC(c, eptr);
           if (c > 255)
             {
@@ -1999,11 +2009,11 @@
         {
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) 
+          if (eptr >= md->end_subject)
             {
             CHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           c = *eptr++;
           if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
           }
@@ -2027,16 +2037,16 @@
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (fi >= max) 
+            if (fi >= max)
               {
-              CHECK_PARTIAL(); 
+              CHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              } 
-            if (eptr >= md->end_subject) 
+              }
+            if (eptr >= md->end_subject)
               {
-              SCHECK_PARTIAL(); 
+              SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              } 
+              }
             GETCHARINC(c, eptr);
             if (c > 255)
               {
@@ -2056,16 +2066,16 @@
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (fi >= max) 
+            if (fi >= max)
               {
-              CHECK_PARTIAL(); 
+              CHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              } 
-            if (eptr >= md->end_subject) 
+              }
+            if (eptr >= md->end_subject)
               {
-              SCHECK_PARTIAL(); 
+              SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              } 
+              }
             c = *eptr++;
             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
             }
@@ -2098,7 +2108,7 @@
               }
             eptr += len;
             }
-          CHECK_PARTIAL();   
+          CHECK_PARTIAL();
           for (;;)
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
@@ -2118,7 +2128,7 @@
             if ((data[c/8] & (1 << (c&7))) == 0) break;
             eptr++;
             }
-          CHECK_PARTIAL();   
+          CHECK_PARTIAL();
           while (eptr >= pp)
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
@@ -2176,11 +2186,11 @@


       for (i = 1; i <= min; i++)
         {
-        if (eptr >= md->end_subject) 
+        if (eptr >= md->end_subject)
           {
           SCHECK_PARTIAL();
           RRETURN(MATCH_NOMATCH);
-          } 
+          }
         GETCHARINCTEST(c, eptr);
         if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
         }
@@ -2199,16 +2209,16 @@
           {
           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-          if (fi >= max) 
+          if (fi >= max)
             {
-            CHECK_PARTIAL(); 
+            CHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
-          if (eptr >= md->end_subject) 
+            }
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           GETCHARINCTEST(c, eptr);
           if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
           }
@@ -2228,7 +2238,7 @@
           if (!_pcre_xclass(c, data)) break;
           eptr += len;
           }
-        CHECK_PARTIAL();   
+        CHECK_PARTIAL();
         for(;;)
           {
           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
@@ -2455,7 +2465,7 @@


           CHECK_PARTIAL();
           if (possessive) continue;
-           
+
           for(;;)
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
@@ -2516,12 +2526,12 @@
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           if (fi >= max)
             {
-            CHECK_PARTIAL(); 
+            CHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
           if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
           if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
@@ -2536,10 +2546,10 @@
           if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
           eptr++;
           }
-           
-        CHECK_PARTIAL();   
+
+        CHECK_PARTIAL();
         if (possessive) continue;
-         
+
         while (eptr >= pp)
           {
           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
@@ -2555,7 +2565,7 @@


     else
       {
-      for (i = 1; i <= min; i++) 
+      for (i = 1; i <= min; i++)
         {
         if (eptr >= md->end_subject)
           {
@@ -2563,7 +2573,7 @@
           RRETURN(MATCH_NOMATCH);
           }
         if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
-        } 
+        }
       if (min == max) continue;
       if (minimize)
         {
@@ -2577,10 +2587,10 @@
             RRETURN(MATCH_NOMATCH);
             }
           if (eptr >= md->end_subject)
-            {        
+            {
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            }  
+            }
           if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
           }
         /* Control never gets here */
@@ -2593,7 +2603,7 @@
           if (eptr >= md->end_subject || fc != *eptr) break;
           eptr++;
           }
-        CHECK_PARTIAL();   
+        CHECK_PARTIAL();
         if (possessive) continue;
         while (eptr >= pp)
           {
@@ -2717,8 +2727,8 @@
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
-            RRETURN(MATCH_NOMATCH);  
-            }    
+            RRETURN(MATCH_NOMATCH);
+            }
           GETCHARINC(d, eptr);
           if (d < 256) d = md->lcc[d];
           if (fc == d) RRETURN(MATCH_NOMATCH);
@@ -2734,10 +2744,10 @@
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
-            RRETURN(MATCH_NOMATCH);  
-            }    
+            RRETURN(MATCH_NOMATCH);
+            }
           if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
-          } 
+          }
         }


       if (min == max) continue;
@@ -2757,12 +2767,12 @@
               {
               CHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              }     
-            if (eptr >= md->end_subject) 
+              }
+            if (eptr >= md->end_subject)
               {
-              SCHECK_PARTIAL(); 
+              SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              } 
+              }
             GETCHARINC(d, eptr);
             if (d < 256) d = md->lcc[d];
             if (fc == d) RRETURN(MATCH_NOMATCH);
@@ -2812,7 +2822,7 @@
             if (fc == d) break;
             eptr += len;
             }
-        CHECK_PARTIAL();     
+        CHECK_PARTIAL();
         if (possessive) continue;
         for(;;)
             {
@@ -2831,7 +2841,7 @@
             if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
             eptr++;
             }
-          CHECK_PARTIAL();   
+          CHECK_PARTIAL();
           if (possessive) continue;
           while (eptr >= pp)
             {
@@ -2860,8 +2870,8 @@
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
-            RRETURN(MATCH_NOMATCH);  
-            }    
+            RRETURN(MATCH_NOMATCH);
+            }
           GETCHARINC(d, eptr);
           if (fc == d) RRETURN(MATCH_NOMATCH);
           }
@@ -2875,10 +2885,10 @@
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
-            RRETURN(MATCH_NOMATCH);  
-            }    
+            RRETURN(MATCH_NOMATCH);
+            }
           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
-          } 
+          }
         }


       if (min == max) continue;
@@ -2894,16 +2904,16 @@
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (fi >= max) 
+            if (fi >= max)
               {
-              CHECK_PARTIAL(); 
+              CHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              } 
-            if (eptr >= md->end_subject) 
+              }
+            if (eptr >= md->end_subject)
               {
-              SCHECK_PARTIAL(); 
+              SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              } 
+              }
             GETCHARINC(d, eptr);
             if (fc == d) RRETURN(MATCH_NOMATCH);
             }
@@ -2925,7 +2935,7 @@
               {
               SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              }   
+              }
             if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
             }
           }
@@ -2951,7 +2961,7 @@
             if (fc == d) break;
             eptr += len;
             }
-          CHECK_PARTIAL();   
+          CHECK_PARTIAL();
           if (possessive) continue;
           for(;;)
             {
@@ -2970,7 +2980,7 @@
             if (eptr >= md->end_subject || fc == *eptr) break;
             eptr++;
             }
-          CHECK_PARTIAL();   
+          CHECK_PARTIAL();
           if (possessive) continue;
           while (eptr >= pp)
             {
@@ -3077,11 +3087,11 @@
           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
           for (i = 1; i <= min; i++)
             {
-            if (eptr >= md->end_subject) 
+            if (eptr >= md->end_subject)
               {
-              SCHECK_PARTIAL(); 
+              SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              } 
+              }
             GETCHARINCTEST(c, eptr);
             }
           break;
@@ -3089,11 +3099,11 @@
           case PT_LAMP:
           for (i = 1; i <= min; i++)
             {
-            if (eptr >= md->end_subject) 
+            if (eptr >= md->end_subject)
               {
-              SCHECK_PARTIAL(); 
+              SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              } 
+              }
             GETCHARINCTEST(c, eptr);
             prop_chartype = UCD_CHARTYPE(c);
             if ((prop_chartype == ucp_Lu ||
@@ -3106,11 +3116,11 @@
           case PT_GC:
           for (i = 1; i <= min; i++)
             {
-            if (eptr >= md->end_subject) 
+            if (eptr >= md->end_subject)
               {
-              SCHECK_PARTIAL(); 
+              SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              } 
+              }
             GETCHARINCTEST(c, eptr);
             prop_category = UCD_CATEGORY(c);
             if ((prop_category == prop_value) == prop_fail_result)
@@ -3121,11 +3131,11 @@
           case PT_PC:
           for (i = 1; i <= min; i++)
             {
-            if (eptr >= md->end_subject) 
+            if (eptr >= md->end_subject)
               {
-              SCHECK_PARTIAL(); 
+              SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              } 
+              }
             GETCHARINCTEST(c, eptr);
             prop_chartype = UCD_CHARTYPE(c);
             if ((prop_chartype == prop_value) == prop_fail_result)
@@ -3136,11 +3146,11 @@
           case PT_SC:
           for (i = 1; i <= min; i++)
             {
-            if (eptr >= md->end_subject) 
+            if (eptr >= md->end_subject)
               {
-              SCHECK_PARTIAL(); 
+              SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              } 
+              }
             GETCHARINCTEST(c, eptr);
             prop_script = UCD_SCRIPT(c);
             if ((prop_script == prop_value) == prop_fail_result)
@@ -3160,11 +3170,11 @@
         {
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) 
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           GETCHARINCTEST(c, eptr);
           prop_category = UCD_CATEGORY(c);
           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
@@ -3193,9 +3203,9 @@
           {
           if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL();  
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
           eptr++;
           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
@@ -3205,29 +3215,29 @@
         case OP_ALLANY:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) 
+          if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           eptr++;
           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
           }
         break;


         case OP_ANYBYTE:
-        if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH); 
+        if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
         eptr += min;
         break;


         case OP_ANYNL:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) 
+          if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           GETCHARINC(c, eptr);
           switch(c)
             {
@@ -3253,11 +3263,11 @@
         case OP_NOT_HSPACE:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) 
+          if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           GETCHARINC(c, eptr);
           switch(c)
             {
@@ -3289,11 +3299,11 @@
         case OP_HSPACE:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) 
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           GETCHARINC(c, eptr);
           switch(c)
             {
@@ -3325,11 +3335,11 @@
         case OP_NOT_VSPACE:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) 
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           GETCHARINC(c, eptr);
           switch(c)
             {
@@ -3349,11 +3359,11 @@
         case OP_VSPACE:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) 
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           GETCHARINC(c, eptr);
           switch(c)
             {
@@ -3373,11 +3383,11 @@
         case OP_NOT_DIGIT:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) 
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           GETCHARINC(c, eptr);
           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
             RRETURN(MATCH_NOMATCH);
@@ -3387,11 +3397,11 @@
         case OP_DIGIT:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) 
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
             RRETURN(MATCH_NOMATCH);
           /* No need to skip more bytes - we know it's a 1-byte character */
@@ -3401,11 +3411,11 @@
         case OP_NOT_WHITESPACE:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) 
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
             RRETURN(MATCH_NOMATCH);
           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
@@ -3415,11 +3425,11 @@
         case OP_WHITESPACE:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) 
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
             RRETURN(MATCH_NOMATCH);
           /* No need to skip more bytes - we know it's a 1-byte character */
@@ -3439,11 +3449,11 @@
         case OP_WORDCHAR:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) 
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
             RRETURN(MATCH_NOMATCH);
           /* No need to skip more bytes - we know it's a 1-byte character */
@@ -3465,34 +3475,34 @@
         case OP_ANY:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) 
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
           eptr++;
           }
         break;


         case OP_ALLANY:
-        if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH); 
+        if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
         eptr += min;
         break;


         case OP_ANYBYTE:
-        if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH); 
+        if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
         eptr += min;
         break;


         case OP_ANYNL:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) 
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           switch(*eptr++)
             {
             default: RRETURN(MATCH_NOMATCH);
@@ -3514,11 +3524,11 @@
         case OP_NOT_HSPACE:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) 
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           switch(*eptr++)
             {
             default: break;
@@ -3533,11 +3543,11 @@
         case OP_HSPACE:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) 
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           switch(*eptr++)
             {
             default: RRETURN(MATCH_NOMATCH);
@@ -3552,11 +3562,11 @@
         case OP_NOT_VSPACE:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) 
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           switch(*eptr++)
             {
             default: break;
@@ -3573,11 +3583,11 @@
         case OP_VSPACE:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) 
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           switch(*eptr++)
             {
             default: RRETURN(MATCH_NOMATCH);
@@ -3593,76 +3603,76 @@


         case OP_NOT_DIGIT:
         for (i = 1; i <= min; i++)
-          { 
-          if (eptr >= md->end_subject) 
+          {
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
-          } 
+          }
         break;


         case OP_DIGIT:
         for (i = 1; i <= min; i++)
-          { 
-          if (eptr >= md->end_subject) 
+          {
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
-          } 
+          }
         break;


         case OP_NOT_WHITESPACE:
         for (i = 1; i <= min; i++)
-          { 
-          if (eptr >= md->end_subject) 
+          {
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
-          } 
+          }
         break;


         case OP_WHITESPACE:
         for (i = 1; i <= min; i++)
-          { 
-          if (eptr >= md->end_subject) 
+          {
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
-          } 
+          }
         break;


         case OP_NOT_WORDCHAR:
         for (i = 1; i <= min; i++)
-          { 
-          if (eptr >= md->end_subject) 
+          {
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           if ((md->ctypes[*eptr++] & ctype_word) != 0)
             RRETURN(MATCH_NOMATCH);
-          }   
+          }
         break;


         case OP_WORDCHAR:
         for (i = 1; i <= min; i++)
-          { 
-          if (eptr >= md->end_subject) 
+          {
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           if ((md->ctypes[*eptr++] & ctype_word) == 0)
             RRETURN(MATCH_NOMATCH);
-          }   
+          }
         break;


         default:
@@ -3690,16 +3700,16 @@
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (fi >= max) 
+            if (fi >= max)
               {
-              CHECK_PARTIAL(); 
+              CHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              } 
-            if (eptr >= md->end_subject) 
+              }
+            if (eptr >= md->end_subject)
               {
-              SCHECK_PARTIAL(); 
+              SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              } 
+              }
             GETCHARINC(c, eptr);
             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
             }
@@ -3710,16 +3720,16 @@
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (fi >= max) 
+            if (fi >= max)
               {
-              CHECK_PARTIAL(); 
+              CHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              } 
-            if (eptr >= md->end_subject) 
+              }
+            if (eptr >= md->end_subject)
               {
-              SCHECK_PARTIAL(); 
+              SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              } 
+              }
             GETCHARINC(c, eptr);
             prop_chartype = UCD_CHARTYPE(c);
             if ((prop_chartype == ucp_Lu ||
@@ -3734,16 +3744,16 @@
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (fi >= max) 
+            if (fi >= max)
               {
-              CHECK_PARTIAL(); 
+              CHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              } 
-            if (eptr >= md->end_subject) 
+              }
+            if (eptr >= md->end_subject)
               {
-              SCHECK_PARTIAL(); 
+              SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              } 
+              }
             GETCHARINC(c, eptr);
             prop_category = UCD_CATEGORY(c);
             if ((prop_category == prop_value) == prop_fail_result)
@@ -3756,16 +3766,16 @@
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (fi >= max) 
+            if (fi >= max)
               {
-              CHECK_PARTIAL(); 
+              CHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              } 
-            if (eptr >= md->end_subject) 
+              }
+            if (eptr >= md->end_subject)
               {
-              SCHECK_PARTIAL(); 
+              SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              } 
+              }
             GETCHARINC(c, eptr);
             prop_chartype = UCD_CHARTYPE(c);
             if ((prop_chartype == prop_value) == prop_fail_result)
@@ -3778,16 +3788,16 @@
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (fi >= max) 
+            if (fi >= max)
               {
-              CHECK_PARTIAL(); 
+              CHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              } 
-            if (eptr >= md->end_subject) 
+              }
+            if (eptr >= md->end_subject)
               {
-              SCHECK_PARTIAL(); 
+              SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
-              } 
+              }
             GETCHARINC(c, eptr);
             prop_script = UCD_SCRIPT(c);
             if ((prop_script == prop_value) == prop_fail_result)
@@ -3809,16 +3819,16 @@
           {
           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-          if (fi >= max) 
+          if (fi >= max)
             {
-            CHECK_PARTIAL(); 
+            CHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
-          if (eptr >= md->end_subject) 
+            }
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           GETCHARINCTEST(c, eptr);
           prop_category = UCD_CATEGORY(c);
           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
@@ -3845,16 +3855,16 @@
           {
           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-          if (fi >= max) 
+          if (fi >= max)
             {
-            CHECK_PARTIAL(); 
+            CHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
-          if (eptr >= md->end_subject) 
+            }
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           if (ctype == OP_ANY && IS_NEWLINE(eptr))
             RRETURN(MATCH_NOMATCH);
           GETCHARINC(c, eptr);
@@ -4012,16 +4022,16 @@
           {
           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-          if (fi >= max) 
+          if (fi >= max)
             {
-            CHECK_PARTIAL(); 
+            CHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
-          if (eptr >= md->end_subject) 
+            }
+          if (eptr >= md->end_subject)
             {
-            SCHECK_PARTIAL(); 
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
-            } 
+            }
           if (ctype == OP_ANY && IS_NEWLINE(eptr))
             RRETURN(MATCH_NOMATCH);
           c = *eptr++;
@@ -4894,7 +4904,8 @@
 md->notbol = (options & PCRE_NOTBOL) != 0;
 md->noteol = (options & PCRE_NOTEOL) != 0;
 md->notempty = (options & PCRE_NOTEMPTY) != 0;
-md->partial = (options & PCRE_PARTIAL) != 0;
+md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
+              ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
 md->hitend = FALSE;


 md->recursive = NULL;                   /* No recursion at top level */
@@ -5252,7 +5263,7 @@
       }
     }


- /* OK, we can now run the match. If "hitend" is set afterwards, remember the
+ /* OK, we can now run the match. If "hitend" is set afterwards, remember the
first starting point for which a partial match was found. */

md->start_match_ptr = start_match;
@@ -5394,19 +5405,19 @@
(pcre_free)(md->offset_vector);
}

-if (rc != MATCH_NOMATCH)
+if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
   {
   DPRINTF((">>>> error: returning %d\n", rc));
   return rc;
   }
-else if (md->partial && start_partial != NULL)
+else if (start_partial != NULL)
   {
   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
-  if (offsetcount > 1) 
+  if (offsetcount > 1)
     {
     offsets[0] = start_partial - (USPTR)subject;
     offsets[1] = end_subject - (USPTR)subject;
-    }  
+    }
   return PCRE_ERROR_PARTIAL;
   }
 else


Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h    2009-08-26 15:38:32 UTC (rev 426)
+++ code/trunk/pcre_internal.h    2009-08-28 09:55:54 UTC (rev 427)
@@ -536,7 +536,7 @@
 /* Private flags containing information about the compiled regex. They used to
 live at the top end of the options word, but that got almost full, so now they
 are in a 16-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as
-the restrictions on partial matching have been lifted. It remains for backwards 
+the restrictions on partial matching have been lifted. It remains for backwards
 compatibility. */


 #define PCRE_NOPARTIAL     0x0001  /* can't use partial with this regex */
@@ -565,13 +565,13 @@


#define PUBLIC_EXEC_OPTIONS \
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
- PCRE_PARTIAL|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
- PCRE_NO_START_OPTIMIZE)
+ PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF| \
+ PCRE_BSR_UNICODE|PCRE_NO_START_OPTIMIZE)

#define PUBLIC_DFA_EXEC_OPTIONS \
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
- PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART|PCRE_NEWLINE_BITS| \
- PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE|PCRE_NO_START_OPTIMIZE)
+ PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART| \
+ PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE|PCRE_NO_START_OPTIMIZE)

#define PUBLIC_STUDY_OPTIONS 0 /* None defined */

@@ -1601,7 +1601,7 @@
   BOOL   jscript_compat;        /* JAVASCRIPT_COMPAT flag */
   BOOL   endonly;               /* Dollar not before final \n */
   BOOL   notempty;              /* Empty string match not wanted */
-  BOOL   partial;               /* PARTIAL flag */
+  int    partial;               /* PARTIAL options */
   BOOL   hitend;                /* Hit the end of the subject at some point */
   BOOL   bsr_anycrlf;           /* \R is just any CRLF, not full Unicode */
   const uschar *start_code;     /* For use when recursing */


Modified: code/trunk/pcretest.c
===================================================================
--- code/trunk/pcretest.c    2009-08-26 15:38:32 UTC (rev 426)
+++ code/trunk/pcretest.c    2009-08-28 09:55:54 UTC (rev 427)
@@ -1992,7 +1992,8 @@
         continue;


         case 'P':
-        options |= PCRE_PARTIAL;
+        options |= ((options & PCRE_PARTIAL_SOFT) == 0)? 
+          PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
         continue;


         case 'Q':


Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2009-08-26 15:38:32 UTC (rev 426)
+++ code/trunk/testdata/testinput2    2009-08-28 09:55:54 UTC (rev 427)
@@ -2912,4 +2912,12 @@
     Z\P 
     ZA\P 


+/dog(sbody)?/
+    dogs\P
+    dogs\P\P 
+    
+/dog|dogsbody/
+    dogs\P
+    dogs\P\P 
+ 
 / End of testinput2 /


Modified: code/trunk/testdata/testinput7
===================================================================
--- code/trunk/testdata/testinput7    2009-08-26 15:38:32 UTC (rev 426)
+++ code/trunk/testdata/testinput7    2009-08-28 09:55:54 UTC (rev 427)
@@ -4433,4 +4433,12 @@
     Z\P 
     ZA\P 


+/dog(sbody)?/
+    dogs\P
+    dogs\P\P 
+    
+/dog|dogsbody/
+    dogs\P
+    dogs\P\P 
+ 
 / End of testinput7 /


Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2009-08-26 15:38:32 UTC (rev 426)
+++ code/trunk/testdata/testoutput2    2009-08-28 09:55:54 UTC (rev 427)
@@ -9928,4 +9928,16 @@
     ZA\P 
 No match


+/dog(sbody)?/
+    dogs\P
+ 0: dog
+    dogs\P\P 
+Partial match: dogs
+    
+/dog|dogsbody/
+    dogs\P
+ 0: dog
+    dogs\P\P 
+ 0: dog
+ 
 / End of testinput2 /


Modified: code/trunk/testdata/testoutput7
===================================================================
--- code/trunk/testdata/testoutput7    2009-08-26 15:38:32 UTC (rev 426)
+++ code/trunk/testdata/testoutput7    2009-08-28 09:55:54 UTC (rev 427)
@@ -7386,4 +7386,16 @@
     ZA\P 
 No match


+/dog(sbody)?/
+    dogs\P
+ 0: dog
+    dogs\P\P 
+Partial match: dogs
+    
+/dog|dogsbody/
+    dogs\P
+ 0: dog
+    dogs\P\P 
+Partial match: dogs
+ 
 / End of testinput7 /