[Pcre-svn] [529] code/trunk: Fix crash for property test in …

Página Inicial
Delete this message
Autor: Subversion repository
Data:  
Para: pcre-svn
Assunto: [Pcre-svn] [529] code/trunk: Fix crash for property test in non-UTF-8 mode.
Revision: 529
          http://vcs.pcre.org/viewvc?view=rev&revision=529
Author:   ph10
Date:     2010-05-31 18:28:08 +0100 (Mon, 31 May 2010)


Log Message:
-----------
Fix crash for property test in non-UTF-8 mode.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_exec.c
    code/trunk/pcre_internal.h
    code/trunk/testdata/testinput6
    code/trunk/testdata/testoutput6


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2010-05-29 16:40:22 UTC (rev 528)
+++ code/trunk/ChangeLog    2010-05-31 17:28:08 UTC (rev 529)
@@ -59,6 +59,11 @@
     possible starting bytes for non-anchored patterns. 


 15. The "auto-possessify" feature of pcre_compile() now recognizes \R. 
+
+16. If a repeated Unicode property match (e.g. \p{Lu}*) was used with non-UTF-8
+    input, it could crash or give wrong results if characters with values 
+    greater than 0xc0 were present in the subject string. (Detail: it assumed 
+    UTF-8 input when processing these items.)





Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c    2010-05-29 16:40:22 UTC (rev 528)
+++ code/trunk/pcre_exec.c    2010-05-31 17:28:08 UTC (rev 529)
@@ -4213,7 +4213,7 @@
               SCHECK_PARTIAL();
               MRRETURN(MATCH_NOMATCH);
               }
-            GETCHARINC(c, eptr);
+            GETCHARINCTEST(c, eptr);
             if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
             }
           /* Control never gets here */
@@ -4229,7 +4229,7 @@
               SCHECK_PARTIAL();
               MRRETURN(MATCH_NOMATCH);
               }
-            GETCHARINC(c, eptr);
+            GETCHARINCTEST(c, eptr);
             prop_chartype = UCD_CHARTYPE(c);
             if ((prop_chartype == ucp_Lu ||
                  prop_chartype == ucp_Ll ||
@@ -4249,7 +4249,7 @@
               SCHECK_PARTIAL();
               MRRETURN(MATCH_NOMATCH);
               }
-            GETCHARINC(c, eptr);
+            GETCHARINCTEST(c, eptr);
             prop_category = UCD_CATEGORY(c);
             if ((prop_category == prop_value) == prop_fail_result)
               MRRETURN(MATCH_NOMATCH);
@@ -4267,7 +4267,7 @@
               SCHECK_PARTIAL();
               MRRETURN(MATCH_NOMATCH);
               }
-            GETCHARINC(c, eptr);
+            GETCHARINCTEST(c, eptr);
             prop_chartype = UCD_CHARTYPE(c);
             if ((prop_chartype == prop_value) == prop_fail_result)
               MRRETURN(MATCH_NOMATCH);
@@ -4285,7 +4285,7 @@
               SCHECK_PARTIAL();
               MRRETURN(MATCH_NOMATCH);
               }
-            GETCHARINC(c, eptr);
+            GETCHARINCTEST(c, eptr);
             prop_script = UCD_SCRIPT(c);
             if ((prop_script == prop_value) == prop_fail_result)
               MRRETURN(MATCH_NOMATCH);
@@ -4303,7 +4303,7 @@
               SCHECK_PARTIAL();
               MRRETURN(MATCH_NOMATCH);
               }
-            GETCHARINC(c, eptr);
+            GETCHARINCTEST(c, eptr);
             prop_category = UCD_CATEGORY(c);
             if ((prop_category == ucp_L || prop_category == ucp_N)
                    == prop_fail_result)
@@ -4322,7 +4322,7 @@
               SCHECK_PARTIAL();
               MRRETURN(MATCH_NOMATCH);
               }
-            GETCHARINC(c, eptr);
+            GETCHARINCTEST(c, eptr);
             prop_category = UCD_CATEGORY(c);
             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                  c == CHAR_FF || c == CHAR_CR)
@@ -4342,7 +4342,7 @@
               SCHECK_PARTIAL();
               MRRETURN(MATCH_NOMATCH);
               }
-            GETCHARINC(c, eptr);
+            GETCHARINCTEST(c, eptr);
             prop_category = UCD_CATEGORY(c);
             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
@@ -4362,7 +4362,7 @@
               SCHECK_PARTIAL();
               MRRETURN(MATCH_NOMATCH);
               }
-            GETCHARINC(c, eptr);
+            GETCHARINCTEST(c, eptr);
             prop_category = UCD_CATEGORY(c);
             if ((prop_category == ucp_L ||
                  prop_category == ucp_N ||
@@ -4720,7 +4720,7 @@
               SCHECK_PARTIAL();
               break;
               }
-            GETCHARLEN(c, eptr, len);
+            GETCHARLENTEST(c, eptr, len);
             if (prop_fail_result) break;
             eptr+= len;
             }
@@ -4735,7 +4735,7 @@
               SCHECK_PARTIAL();
               break;
               }
-            GETCHARLEN(c, eptr, len);
+            GETCHARLENTEST(c, eptr, len);
             prop_chartype = UCD_CHARTYPE(c);
             if ((prop_chartype == ucp_Lu ||
                  prop_chartype == ucp_Ll ||
@@ -4754,7 +4754,7 @@
               SCHECK_PARTIAL();
               break;
               }
-            GETCHARLEN(c, eptr, len);
+            GETCHARLENTEST(c, eptr, len);
             prop_category = UCD_CATEGORY(c);
             if ((prop_category == prop_value) == prop_fail_result)
               break;
@@ -4771,7 +4771,7 @@
               SCHECK_PARTIAL();
               break;
               }
-            GETCHARLEN(c, eptr, len);
+            GETCHARLENTEST(c, eptr, len);
             prop_chartype = UCD_CHARTYPE(c);
             if ((prop_chartype == prop_value) == prop_fail_result)
               break;
@@ -4788,7 +4788,7 @@
               SCHECK_PARTIAL();
               break;
               }
-            GETCHARLEN(c, eptr, len);
+            GETCHARLENTEST(c, eptr, len);
             prop_script = UCD_SCRIPT(c);
             if ((prop_script == prop_value) == prop_fail_result)
               break;
@@ -4805,7 +4805,7 @@
               SCHECK_PARTIAL();
               break;
               }
-            GETCHARLEN(c, eptr, len);
+            GETCHARLENTEST(c, eptr, len);
             prop_category = UCD_CATEGORY(c);
             if ((prop_category == ucp_L || prop_category == ucp_N)
                  == prop_fail_result)
@@ -4823,7 +4823,7 @@
               SCHECK_PARTIAL();
               break;
               }
-            GETCHARLEN(c, eptr, len);
+            GETCHARLENTEST(c, eptr, len);
             prop_category = UCD_CATEGORY(c);
             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                  c == CHAR_FF || c == CHAR_CR)
@@ -4842,7 +4842,7 @@
               SCHECK_PARTIAL();
               break;
               }
-            GETCHARLEN(c, eptr, len);
+            GETCHARLENTEST(c, eptr, len);
             prop_category = UCD_CATEGORY(c);
             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
@@ -4861,7 +4861,7 @@
               SCHECK_PARTIAL();
               break;
               }
-            GETCHARLEN(c, eptr, len);
+            GETCHARLENTEST(c, eptr, len);
             prop_category = UCD_CATEGORY(c);
             if ((prop_category == ucp_L || prop_category == ucp_N ||
                  c == CHAR_UNDERSCORE) == prop_fail_result)


Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h    2010-05-29 16:40:22 UTC (rev 528)
+++ code/trunk/pcre_internal.h    2010-05-31 17:28:08 UTC (rev 529)
@@ -475,7 +475,8 @@
       } \
     }


-/* Get the next character, testing for UTF-8 mode, and advancing the pointer */
+/* Get the next character, testing for UTF-8 mode, and advancing the pointer.
+This is called when we don't know if we are in UTF-8 mode. */

#define GETCHARINCTEST(c, eptr) \
c = *eptr++; \
@@ -512,7 +513,7 @@

/* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the
pointer, incrementing length if there are extra bytes. This is called when we
-know we are in UTF-8 mode. */
+do not know if we are in UTF-8 mode. */

#define GETCHARLENTEST(c, eptr, len) \
c = *eptr; \

Modified: code/trunk/testdata/testinput6
===================================================================
--- code/trunk/testdata/testinput6    2010-05-29 16:40:22 UTC (rev 528)
+++ code/trunk/testdata/testinput6    2010-05-31 17:28:08 UTC (rev 529)
@@ -753,45 +753,53 @@
     \x{10b00}\x{a6ef}\x{13007}\x{10857}\x{10b78}\x{10b58}\x{a980}\x{110c1}\x{a4ff}\x{abc0}\x{10a7d}\x{10c48}\x{0800}\x{1aad}\x{aac0}


 /^\w+/8W
-  Az_\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee}
+    Az_\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee}


 /^[[:xdigit:]]*/8W
-  1a\x{660}\x{bef}\x{16ee}
+    1a\x{660}\x{bef}\x{16ee}


 /^\d+/8W
-  1\x{660}\x{bef}\x{16ee}
+    1\x{660}\x{bef}\x{16ee}


 /^[[:digit:]]+/8W
-  1\x{660}\x{bef}\x{16ee}
+    1\x{660}\x{bef}\x{16ee}


 /^>\s+/8W
-  >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b} 
+    >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b} 


 /^>\pZ+/8W
-  >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b} 
+    >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b} 


 /^>[[:space:]]*/8W
-  >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b} 
+    >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b} 


 /^>[[:blank:]]*/8W
-  >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028} 
+    >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028} 


 /^[[:alpha:]]*/8W
-  Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}
+    Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}


 /^[[:alnum:]]*/8W
-  Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee}
+    Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee}


 /^[[:cntrl:]]*/8W
-  \x{0}\x{09}\x{1f}\x{7f}\x{9f} 
+    \x{0}\x{09}\x{1f}\x{7f}\x{9f} 


 /^[[:graph:]]*/8W
-  A\x{a1}\x{a0}
+    A\x{a1}\x{a0}


 /^[[:print:]]*/8W
-  A z\x{a0}\x{a1}
+    A z\x{a0}\x{a1}


 /^[[:punct:]]*/8W
-  .+\x{a1}\x{a0}
+    .+\x{a1}\x{a0}


+/\p{Zs}*?\R/
+    ** Failers
+    a\xFCb   
+
+/\p{Zs}*\R/                                                                    
+    ** Failers 
+    a\xFCb   
+
 /-- End of testinput6 --/


Modified: code/trunk/testdata/testoutput6
===================================================================
--- code/trunk/testdata/testoutput6    2010-05-29 16:40:22 UTC (rev 528)
+++ code/trunk/testdata/testoutput6    2010-05-31 17:28:08 UTC (rev 529)
@@ -1286,59 +1286,71 @@
  0: \x{10b00}\x{a6ef}\x{13007}\x{10857}\x{10b78}\x{10b58}\x{a980}\x{110c1}\x{a4ff}\x{abc0}\x{10a7d}\x{10c48}\x{800}\x{1aad}\x{aac0}


 /^\w+/8W
-  Az_\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee}
+    Az_\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee}
  0: Az_\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee}


 /^[[:xdigit:]]*/8W
-  1a\x{660}\x{bef}\x{16ee}
+    1a\x{660}\x{bef}\x{16ee}
  0: 1a


 /^\d+/8W
-  1\x{660}\x{bef}\x{16ee}
+    1\x{660}\x{bef}\x{16ee}
  0: 1\x{660}\x{bef}


 /^[[:digit:]]+/8W
-  1\x{660}\x{bef}\x{16ee}
+    1\x{660}\x{bef}\x{16ee}
  0: 1\x{660}\x{bef}


 /^>\s+/8W
-  >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b} 
+    >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b} 
  0: > \x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{09}


 /^>\pZ+/8W
-  >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b} 
+    >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b} 
  0: > \x{a0}\x{1680}\x{2028}\x{2029}\x{202f}


 /^>[[:space:]]*/8W
-  >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b} 
+    >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b} 
  0: > \x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{09}\x{0b}


 /^>[[:blank:]]*/8W
-  >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028} 
+    >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028} 
  0: > \x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{09}


 /^[[:alpha:]]*/8W
-  Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}
+    Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}
  0: Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}


 /^[[:alnum:]]*/8W
-  Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee}
+    Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee}
  0: Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee}


 /^[[:cntrl:]]*/8W
-  \x{0}\x{09}\x{1f}\x{7f}\x{9f} 
+    \x{0}\x{09}\x{1f}\x{7f}\x{9f} 
  0: \x{00}\x{09}\x{1f}\x{7f}


 /^[[:graph:]]*/8W
-  A\x{a1}\x{a0}
+    A\x{a1}\x{a0}
  0: A


 /^[[:print:]]*/8W
-  A z\x{a0}\x{a1}
+    A z\x{a0}\x{a1}
  0: A z


 /^[[:punct:]]*/8W
-  .+\x{a1}\x{a0}
+    .+\x{a1}\x{a0}
  0: .+


+/\p{Zs}*?\R/
+    ** Failers
+No match
+    a\xFCb   
+No match
+
+/\p{Zs}*\R/                                                                    
+    ** Failers 
+No match
+    a\xFCb   
+No match
+
 /-- End of testinput6 --/