[Pcre-svn] [623] code/trunk: Fix \X* bug when first characte…

トップ ページ
このメッセージを削除
著者: Subversion repository
日付:  
To: pcre-svn
題目: [Pcre-svn] [623] code/trunk: Fix \X* bug when first character has the mark property.
Revision: 623
          http://vcs.pcre.org/viewvc?view=rev&revision=623
Author:   ph10
Date:     2011-07-19 10:58:42 +0100 (Tue, 19 Jul 2011)


Log Message:
-----------
Fix \X* bug when first character has the mark property. Also improve code for
property and script handling.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_exec.c
    code/trunk/testdata/testinput5
    code/trunk/testdata/testoutput5


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2011-07-18 10:46:51 UTC (rev 622)
+++ code/trunk/ChangeLog    2011-07-19 09:58:42 UTC (rev 623)
@@ -148,6 +148,12 @@
 26. Updated RunTest.bat in the distribution to the version supplied by Tom
     Fortmann. This supports explicit test numbers on the command line, and has
     argument validation and error reporting.
+    
+27. An instance of \X with an unlimited repeat could fail if at any point the 
+    first character it looked at was a mark character. 
+    
+28. Some minor code refactoring concerning Unicode properties and scripts 
+    should reduce the stack requirement of match() slightly. 





Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c    2011-07-18 10:46:51 UTC (rev 622)
+++ code/trunk/pcre_exec.c    2011-07-19 09:58:42 UTC (rev 623)
@@ -277,7 +277,7 @@
        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
-       RM61,  RM62, RM63, RM64, RM65, RM66 };
+       RM61,  RM62, RM63 };


 /* These versions of the macros use the stack, as normal. There are debugging
 versions and production versions. Note that the "rw" argument of RMATCH isn't
@@ -384,9 +384,6 @@
   int Xprop_type;
   int Xprop_value;
   int Xprop_fail_result;
-  int Xprop_category;
-  int Xprop_chartype;
-  int Xprop_script;
   int Xoclength;
   uschar Xocchars[8];
 #endif
@@ -551,9 +548,6 @@
 #define prop_type          frame->Xprop_type
 #define prop_value         frame->Xprop_value
 #define prop_fail_result   frame->Xprop_fail_result
-#define prop_category      frame->Xprop_category
-#define prop_chartype      frame->Xprop_chartype
-#define prop_script        frame->Xprop_script
 #define oclength           frame->Xoclength
 #define occhars            frame->Xocchars
 #endif
@@ -611,9 +605,6 @@
 int prop_type;
 int prop_value;
 int prop_fail_result;
-int prop_category;
-int prop_chartype;
-int prop_script;
 int oclength;
 uschar occhars[8];
 #endif
@@ -1765,11 +1756,11 @@


     if (*ecode == OP_KETRMIN)
       {
-      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
+      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       if (*prev == OP_ONCE)
         {
-        RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
+        RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
         RRETURN(MATCH_ONCE); 
@@ -1791,7 +1782,7 @@
       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       if (*prev == OP_ONCE)
         {
-        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
+        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
         md->once_target = prev;
         RRETURN(MATCH_ONCE); 
@@ -2364,20 +2355,13 @@
       MRRETURN(MATCH_NOMATCH);
       }
     GETCHARINCTEST(c, eptr);
+    if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH);
+    while (eptr < md->end_subject)
       {
-      int category = UCD_CATEGORY(c);
-      if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
-      while (eptr < md->end_subject)
-        {
-        int len = 1;
-        if (!utf8) c = *eptr; else
-          {
-          GETCHARLEN(c, eptr, len);
-          }
-        category = UCD_CATEGORY(c);
-        if (category != ucp_M) break;
-        eptr += len;
-        }
+      int len = 1;
+      if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
+      if (UCD_CATEGORY(c) != ucp_M) break;
+      eptr += len;
       }
     ecode++;
     break;
@@ -3731,16 +3715,17 @@
           case PT_LAMP:
           for (i = 1; i <= min; i++)
             {
+            int chartype; 
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               MRRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(c, eptr);
-            prop_chartype = UCD_CHARTYPE(c);
-            if ((prop_chartype == ucp_Lu ||
-                 prop_chartype == ucp_Ll ||
-                 prop_chartype == ucp_Lt) == prop_fail_result)
+            chartype = UCD_CHARTYPE(c);
+            if ((chartype == ucp_Lu ||
+                 chartype == ucp_Ll ||
+                 chartype == ucp_Lt) == prop_fail_result)
               MRRETURN(MATCH_NOMATCH);
             }
           break;
@@ -3754,8 +3739,7 @@
               MRRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(c, eptr);
-            prop_category = UCD_CATEGORY(c);
-            if ((prop_category == prop_value) == prop_fail_result)
+            if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
               MRRETURN(MATCH_NOMATCH);
             }
           break;
@@ -3769,8 +3753,7 @@
               MRRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(c, eptr);
-            prop_chartype = UCD_CHARTYPE(c);
-            if ((prop_chartype == prop_value) == prop_fail_result)
+            if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
               MRRETURN(MATCH_NOMATCH);
             }
           break;
@@ -3784,8 +3767,7 @@
               MRRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(c, eptr);
-            prop_script = UCD_SCRIPT(c);
-            if ((prop_script == prop_value) == prop_fail_result)
+            if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
               MRRETURN(MATCH_NOMATCH);
             }
           break;
@@ -3793,15 +3775,15 @@
           case PT_ALNUM:
           for (i = 1; i <= min; i++)
             {
+            int category; 
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               MRRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(c, eptr);
-            prop_category = UCD_CATEGORY(c);
-            if ((prop_category == ucp_L || prop_category == ucp_N)
-                   == prop_fail_result)
+            category = UCD_CATEGORY(c);
+            if ((category == ucp_L || category == ucp_N) == prop_fail_result)
               MRRETURN(MATCH_NOMATCH);
             }
           break;
@@ -3815,8 +3797,7 @@
               MRRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(c, eptr);
-            prop_category = UCD_CATEGORY(c);
-            if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
+            if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                  c == CHAR_FF || c == CHAR_CR)
                    == prop_fail_result)
               MRRETURN(MATCH_NOMATCH);
@@ -3832,8 +3813,7 @@
               MRRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(c, eptr);
-            prop_category = UCD_CATEGORY(c);
-            if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
+            if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                    == prop_fail_result)
               MRRETURN(MATCH_NOMATCH);
@@ -3843,15 +3823,15 @@
           case PT_WORD:
           for (i = 1; i <= min; i++)
             {
+            int category; 
             if (eptr >= md->end_subject)
               {
               SCHECK_PARTIAL();
               MRRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(c, eptr);
-            prop_category = UCD_CATEGORY(c);
-            if ((prop_category == ucp_L || prop_category == ucp_N ||
-                 c == CHAR_UNDERSCORE)
+            category = UCD_CATEGORY(c);
+            if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
                    == prop_fail_result)
               MRRETURN(MATCH_NOMATCH);
             }
@@ -3877,15 +3857,12 @@
             MRRETURN(MATCH_NOMATCH);
             }
           GETCHARINCTEST(c, eptr);
-          prop_category = UCD_CATEGORY(c);
-          if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
+          if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH);
           while (eptr < md->end_subject)
             {
             int len = 1;
-            if (!utf8) c = *eptr;
-              else { GETCHARLEN(c, eptr, len); }
-            prop_category = UCD_CATEGORY(c);
-            if (prop_category != ucp_M) break;
+            if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
+            if (UCD_CATEGORY(c) != ucp_M) break;
             eptr += len;
             }
           }
@@ -4430,6 +4407,7 @@
           case PT_LAMP:
           for (fi = min;; fi++)
             {
+            int chartype; 
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (fi >= max) MRRETURN(MATCH_NOMATCH);
@@ -4439,10 +4417,10 @@
               MRRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(c, eptr);
-            prop_chartype = UCD_CHARTYPE(c);
-            if ((prop_chartype == ucp_Lu ||
-                 prop_chartype == ucp_Ll ||
-                 prop_chartype == ucp_Lt) == prop_fail_result)
+            chartype = UCD_CHARTYPE(c);
+            if ((chartype == ucp_Lu ||
+                 chartype == ucp_Ll ||
+                 chartype == ucp_Lt) == prop_fail_result)
               MRRETURN(MATCH_NOMATCH);
             }
           /* Control never gets here */
@@ -4459,8 +4437,7 @@
               MRRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(c, eptr);
-            prop_category = UCD_CATEGORY(c);
-            if ((prop_category == prop_value) == prop_fail_result)
+            if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
               MRRETURN(MATCH_NOMATCH);
             }
           /* Control never gets here */
@@ -4477,8 +4454,7 @@
               MRRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(c, eptr);
-            prop_chartype = UCD_CHARTYPE(c);
-            if ((prop_chartype == prop_value) == prop_fail_result)
+            if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
               MRRETURN(MATCH_NOMATCH);
             }
           /* Control never gets here */
@@ -4495,8 +4471,7 @@
               MRRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(c, eptr);
-            prop_script = UCD_SCRIPT(c);
-            if ((prop_script == prop_value) == prop_fail_result)
+            if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
               MRRETURN(MATCH_NOMATCH);
             }
           /* Control never gets here */
@@ -4504,6 +4479,7 @@
           case PT_ALNUM:
           for (fi = min;; fi++)
             {
+            int category; 
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (fi >= max) MRRETURN(MATCH_NOMATCH);
@@ -4513,9 +4489,8 @@
               MRRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(c, eptr);
-            prop_category = UCD_CATEGORY(c);
-            if ((prop_category == ucp_L || prop_category == ucp_N)
-                   == prop_fail_result)
+            category = UCD_CATEGORY(c);
+            if ((category == ucp_L || category == ucp_N) == prop_fail_result)
               MRRETURN(MATCH_NOMATCH);
             }
           /* Control never gets here */
@@ -4532,8 +4507,7 @@
               MRRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(c, eptr);
-            prop_category = UCD_CATEGORY(c);
-            if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
+            if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                  c == CHAR_FF || c == CHAR_CR)
                    == prop_fail_result)
               MRRETURN(MATCH_NOMATCH);
@@ -4552,8 +4526,7 @@
               MRRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(c, eptr);
-            prop_category = UCD_CATEGORY(c);
-            if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
+            if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                    == prop_fail_result)
               MRRETURN(MATCH_NOMATCH);
@@ -4563,6 +4536,7 @@
           case PT_WORD:
           for (fi = min;; fi++)
             {
+            int category; 
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (fi >= max) MRRETURN(MATCH_NOMATCH);
@@ -4572,9 +4546,9 @@
               MRRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(c, eptr);
-            prop_category = UCD_CATEGORY(c);
-            if ((prop_category == ucp_L ||
-                 prop_category == ucp_N ||
+            category = UCD_CATEGORY(c);
+            if ((category == ucp_L ||
+                 category == ucp_N ||
                  c == CHAR_UNDERSCORE)
                    == prop_fail_result)
               MRRETURN(MATCH_NOMATCH);
@@ -4604,20 +4578,16 @@
             MRRETURN(MATCH_NOMATCH);
             }
           GETCHARINCTEST(c, eptr);
-          prop_category = UCD_CATEGORY(c);
-          if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
+          if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH);
           while (eptr < md->end_subject)
             {
             int len = 1;
-            if (!utf8) c = *eptr;
-              else { GETCHARLEN(c, eptr, len); }
-            prop_category = UCD_CATEGORY(c);
-            if (prop_category != ucp_M) break;
+            if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
+            if (UCD_CATEGORY(c) != ucp_M) break;
             eptr += len;
             }
           }
         }
-
       else
 #endif     /* SUPPORT_UCP */


@@ -4938,6 +4908,7 @@
           case PT_LAMP:
           for (i = min; i < max; i++)
             {
+            int chartype; 
             int len = 1;
             if (eptr >= md->end_subject)
               {
@@ -4945,10 +4916,10 @@
               break;
               }
             GETCHARLENTEST(c, eptr, len);
-            prop_chartype = UCD_CHARTYPE(c);
-            if ((prop_chartype == ucp_Lu ||
-                 prop_chartype == ucp_Ll ||
-                 prop_chartype == ucp_Lt) == prop_fail_result)
+            chartype = UCD_CHARTYPE(c);
+            if ((chartype == ucp_Lu ||
+                 chartype == ucp_Ll ||
+                 chartype == ucp_Lt) == prop_fail_result)
               break;
             eptr+= len;
             }
@@ -4964,9 +4935,7 @@
               break;
               }
             GETCHARLENTEST(c, eptr, len);
-            prop_category = UCD_CATEGORY(c);
-            if ((prop_category == prop_value) == prop_fail_result)
-              break;
+            if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
             eptr+= len;
             }
           break;
@@ -4981,9 +4950,7 @@
               break;
               }
             GETCHARLENTEST(c, eptr, len);
-            prop_chartype = UCD_CHARTYPE(c);
-            if ((prop_chartype == prop_value) == prop_fail_result)
-              break;
+            if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
             eptr+= len;
             }
           break;
@@ -4998,9 +4965,7 @@
               break;
               }
             GETCHARLENTEST(c, eptr, len);
-            prop_script = UCD_SCRIPT(c);
-            if ((prop_script == prop_value) == prop_fail_result)
-              break;
+            if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
             eptr+= len;
             }
           break;
@@ -5008,6 +4973,7 @@
           case PT_ALNUM:
           for (i = min; i < max; i++)
             {
+            int category; 
             int len = 1;
             if (eptr >= md->end_subject)
               {
@@ -5015,9 +4981,8 @@
               break;
               }
             GETCHARLENTEST(c, eptr, len);
-            prop_category = UCD_CATEGORY(c);
-            if ((prop_category == ucp_L || prop_category == ucp_N)
-                 == prop_fail_result)
+            category = UCD_CATEGORY(c);
+            if ((category == ucp_L || category == ucp_N) == prop_fail_result)
               break;
             eptr+= len;
             }
@@ -5033,8 +4998,7 @@
               break;
               }
             GETCHARLENTEST(c, eptr, len);
-            prop_category = UCD_CATEGORY(c);
-            if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
+            if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                  c == CHAR_FF || c == CHAR_CR)
                  == prop_fail_result)
               break;
@@ -5052,8 +5016,7 @@
               break;
               }
             GETCHARLENTEST(c, eptr, len);
-            prop_category = UCD_CATEGORY(c);
-            if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
+            if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
                  == prop_fail_result)
               break;
@@ -5064,6 +5027,7 @@
           case PT_WORD:
           for (i = min; i < max; i++)
             {
+            int category; 
             int len = 1;
             if (eptr >= md->end_subject)
               {
@@ -5071,8 +5035,8 @@
               break;
               }
             GETCHARLENTEST(c, eptr, len);
-            prop_category = UCD_CATEGORY(c);
-            if ((prop_category == ucp_L || prop_category == ucp_N ||
+            category = UCD_CATEGORY(c);
+            if ((category == ucp_L || category == ucp_N ||
                  c == CHAR_UNDERSCORE) == prop_fail_result)
               break;
             eptr+= len;
@@ -5102,23 +5066,20 @@
         {
         for (i = min; i < max; i++)
           {
+          int len = 1; 
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
             break;
             }
-          GETCHARINCTEST(c, eptr);
-          prop_category = UCD_CATEGORY(c);
-          if (prop_category == ucp_M) break;
+          if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
+          if (UCD_CATEGORY(c) == ucp_M) break;
+          eptr += len; 
           while (eptr < md->end_subject)
             {
-            int len = 1;
-            if (!utf8) c = *eptr; else
-              {
-              GETCHARLEN(c, eptr, len);
-              }
-            prop_category = UCD_CATEGORY(c);
-            if (prop_category != ucp_M) break;
+            len = 1;
+            if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
+            if (UCD_CATEGORY(c) != ucp_M) break;
             eptr += len;
             }
           }
@@ -5134,14 +5095,12 @@
           if (eptr-- == pp) break;        /* Stop if tried at original pos */
           for (;;)                        /* Move back over one extended */
             {
-            int len = 1;
             if (!utf8) c = *eptr; else
               {
               BACKCHAR(eptr);
-              GETCHARLEN(c, eptr, len);
+              GETCHAR(c, eptr);
               }
-            prop_category = UCD_CATEGORY(c);
-            if (prop_category != ucp_M) break;
+            if (UCD_CATEGORY(c) != ucp_M) break;
             eptr--;
             }
           }
@@ -5678,8 +5637,7 @@
   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
-  LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
-  LBL(65) LBL(66) 
+  LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) 
 #ifdef SUPPORT_UTF8
   LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
   LBL(32) LBL(34) LBL(42) LBL(46)


Modified: code/trunk/testdata/testinput5
===================================================================
--- code/trunk/testdata/testinput5    2011-07-18 10:46:51 UTC (rev 622)
+++ code/trunk/testdata/testinput5    2011-07-19 09:58:42 UTC (rev 623)
@@ -873,4 +873,10 @@


/[^\x{1234}]{2}/iS8I

+/^S(\X*)e(\X*)$/8
+    Stéréo
+    
+/^\X/8 
+    ́réo
+
 /-- End of testinput5 --/


Modified: code/trunk/testdata/testoutput5
===================================================================
--- code/trunk/testdata/testoutput5    2011-07-18 10:46:51 UTC (rev 622)
+++ code/trunk/testdata/testoutput5    2011-07-19 09:58:42 UTC (rev 623)
@@ -2421,4 +2421,12 @@
 Subject length lower bound = 2
 No set of starting bytes


+/^S(\X*)e(\X*)$/8
+    Stéréo
+No match
+    
+/^\X/8 
+    ́réo
+No match
+
 /-- End of testinput5 --/