Revision: 529
http://vcs.pcre.org/viewvc?view=rev&revision=529
Author: ph10
Date: 2010-05-31 18:28:08 +0100 (Mon, 31 May 2010)
Log Message:
-----------
Fix crash for property test in non-UTF-8 mode.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_exec.c
code/trunk/pcre_internal.h
code/trunk/testdata/testinput6
code/trunk/testdata/testoutput6
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2010-05-29 16:40:22 UTC (rev 528)
+++ code/trunk/ChangeLog 2010-05-31 17:28:08 UTC (rev 529)
@@ -59,6 +59,11 @@
possible starting bytes for non-anchored patterns.
15. The "auto-possessify" feature of pcre_compile() now recognizes \R.
+
+16. If a repeated Unicode property match (e.g. \p{Lu}*) was used with non-UTF-8
+ input, it could crash or give wrong results if characters with values
+ greater than 0xc0 were present in the subject string. (Detail: it assumed
+ UTF-8 input when processing these items.)
Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c 2010-05-29 16:40:22 UTC (rev 528)
+++ code/trunk/pcre_exec.c 2010-05-31 17:28:08 UTC (rev 529)
@@ -4213,7 +4213,7 @@
SCHECK_PARTIAL();
MRRETURN(MATCH_NOMATCH);
}
- GETCHARINC(c, eptr);
+ GETCHARINCTEST(c, eptr);
if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
@@ -4229,7 +4229,7 @@
SCHECK_PARTIAL();
MRRETURN(MATCH_NOMATCH);
}
- GETCHARINC(c, eptr);
+ GETCHARINCTEST(c, eptr);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == ucp_Lu ||
prop_chartype == ucp_Ll ||
@@ -4249,7 +4249,7 @@
SCHECK_PARTIAL();
MRRETURN(MATCH_NOMATCH);
}
- GETCHARINC(c, eptr);
+ GETCHARINCTEST(c, eptr);
prop_category = UCD_CATEGORY(c);
if ((prop_category == prop_value) == prop_fail_result)
MRRETURN(MATCH_NOMATCH);
@@ -4267,7 +4267,7 @@
SCHECK_PARTIAL();
MRRETURN(MATCH_NOMATCH);
}
- GETCHARINC(c, eptr);
+ GETCHARINCTEST(c, eptr);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == prop_value) == prop_fail_result)
MRRETURN(MATCH_NOMATCH);
@@ -4285,7 +4285,7 @@
SCHECK_PARTIAL();
MRRETURN(MATCH_NOMATCH);
}
- GETCHARINC(c, eptr);
+ GETCHARINCTEST(c, eptr);
prop_script = UCD_SCRIPT(c);
if ((prop_script == prop_value) == prop_fail_result)
MRRETURN(MATCH_NOMATCH);
@@ -4303,7 +4303,7 @@
SCHECK_PARTIAL();
MRRETURN(MATCH_NOMATCH);
}
- GETCHARINC(c, eptr);
+ GETCHARINCTEST(c, eptr);
prop_category = UCD_CATEGORY(c);
if ((prop_category == ucp_L || prop_category == ucp_N)
== prop_fail_result)
@@ -4322,7 +4322,7 @@
SCHECK_PARTIAL();
MRRETURN(MATCH_NOMATCH);
}
- GETCHARINC(c, eptr);
+ GETCHARINCTEST(c, eptr);
prop_category = UCD_CATEGORY(c);
if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
c == CHAR_FF || c == CHAR_CR)
@@ -4342,7 +4342,7 @@
SCHECK_PARTIAL();
MRRETURN(MATCH_NOMATCH);
}
- GETCHARINC(c, eptr);
+ GETCHARINCTEST(c, eptr);
prop_category = UCD_CATEGORY(c);
if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
@@ -4362,7 +4362,7 @@
SCHECK_PARTIAL();
MRRETURN(MATCH_NOMATCH);
}
- GETCHARINC(c, eptr);
+ GETCHARINCTEST(c, eptr);
prop_category = UCD_CATEGORY(c);
if ((prop_category == ucp_L ||
prop_category == ucp_N ||
@@ -4720,7 +4720,7 @@
SCHECK_PARTIAL();
break;
}
- GETCHARLEN(c, eptr, len);
+ GETCHARLENTEST(c, eptr, len);
if (prop_fail_result) break;
eptr+= len;
}
@@ -4735,7 +4735,7 @@
SCHECK_PARTIAL();
break;
}
- GETCHARLEN(c, eptr, len);
+ GETCHARLENTEST(c, eptr, len);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == ucp_Lu ||
prop_chartype == ucp_Ll ||
@@ -4754,7 +4754,7 @@
SCHECK_PARTIAL();
break;
}
- GETCHARLEN(c, eptr, len);
+ GETCHARLENTEST(c, eptr, len);
prop_category = UCD_CATEGORY(c);
if ((prop_category == prop_value) == prop_fail_result)
break;
@@ -4771,7 +4771,7 @@
SCHECK_PARTIAL();
break;
}
- GETCHARLEN(c, eptr, len);
+ GETCHARLENTEST(c, eptr, len);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == prop_value) == prop_fail_result)
break;
@@ -4788,7 +4788,7 @@
SCHECK_PARTIAL();
break;
}
- GETCHARLEN(c, eptr, len);
+ GETCHARLENTEST(c, eptr, len);
prop_script = UCD_SCRIPT(c);
if ((prop_script == prop_value) == prop_fail_result)
break;
@@ -4805,7 +4805,7 @@
SCHECK_PARTIAL();
break;
}
- GETCHARLEN(c, eptr, len);
+ GETCHARLENTEST(c, eptr, len);
prop_category = UCD_CATEGORY(c);
if ((prop_category == ucp_L || prop_category == ucp_N)
== prop_fail_result)
@@ -4823,7 +4823,7 @@
SCHECK_PARTIAL();
break;
}
- GETCHARLEN(c, eptr, len);
+ GETCHARLENTEST(c, eptr, len);
prop_category = UCD_CATEGORY(c);
if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
c == CHAR_FF || c == CHAR_CR)
@@ -4842,7 +4842,7 @@
SCHECK_PARTIAL();
break;
}
- GETCHARLEN(c, eptr, len);
+ GETCHARLENTEST(c, eptr, len);
prop_category = UCD_CATEGORY(c);
if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
@@ -4861,7 +4861,7 @@
SCHECK_PARTIAL();
break;
}
- GETCHARLEN(c, eptr, len);
+ GETCHARLENTEST(c, eptr, len);
prop_category = UCD_CATEGORY(c);
if ((prop_category == ucp_L || prop_category == ucp_N ||
c == CHAR_UNDERSCORE) == prop_fail_result)
Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h 2010-05-29 16:40:22 UTC (rev 528)
+++ code/trunk/pcre_internal.h 2010-05-31 17:28:08 UTC (rev 529)
@@ -475,7 +475,8 @@
} \
}
-/* Get the next character, testing for UTF-8 mode, and advancing the pointer */
+/* Get the next character, testing for UTF-8 mode, and advancing the pointer.
+This is called when we don't know if we are in UTF-8 mode. */
#define GETCHARINCTEST(c, eptr) \
c = *eptr++; \
@@ -512,7 +513,7 @@
/* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the
pointer, incrementing length if there are extra bytes. This is called when we
-know we are in UTF-8 mode. */
+do not know if we are in UTF-8 mode. */
#define GETCHARLENTEST(c, eptr, len) \
c = *eptr; \
Modified: code/trunk/testdata/testinput6
===================================================================
--- code/trunk/testdata/testinput6 2010-05-29 16:40:22 UTC (rev 528)
+++ code/trunk/testdata/testinput6 2010-05-31 17:28:08 UTC (rev 529)
@@ -753,45 +753,53 @@
\x{10b00}\x{a6ef}\x{13007}\x{10857}\x{10b78}\x{10b58}\x{a980}\x{110c1}\x{a4ff}\x{abc0}\x{10a7d}\x{10c48}\x{0800}\x{1aad}\x{aac0}
/^\w+/8W
- Az_\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee}
+ Az_\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee}
/^[[:xdigit:]]*/8W
- 1a\x{660}\x{bef}\x{16ee}
+ 1a\x{660}\x{bef}\x{16ee}
/^\d+/8W
- 1\x{660}\x{bef}\x{16ee}
+ 1\x{660}\x{bef}\x{16ee}
/^[[:digit:]]+/8W
- 1\x{660}\x{bef}\x{16ee}
+ 1\x{660}\x{bef}\x{16ee}
/^>\s+/8W
- >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b}
+ >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b}
/^>\pZ+/8W
- >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b}
+ >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b}
/^>[[:space:]]*/8W
- >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b}
+ >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b}
/^>[[:blank:]]*/8W
- >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028}
+ >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028}
/^[[:alpha:]]*/8W
- Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}
+ Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}
/^[[:alnum:]]*/8W
- Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee}
+ Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee}
/^[[:cntrl:]]*/8W
- \x{0}\x{09}\x{1f}\x{7f}\x{9f}
+ \x{0}\x{09}\x{1f}\x{7f}\x{9f}
/^[[:graph:]]*/8W
- A\x{a1}\x{a0}
+ A\x{a1}\x{a0}
/^[[:print:]]*/8W
- A z\x{a0}\x{a1}
+ A z\x{a0}\x{a1}
/^[[:punct:]]*/8W
- .+\x{a1}\x{a0}
+ .+\x{a1}\x{a0}
+/\p{Zs}*?\R/
+ ** Failers
+ a\xFCb
+
+/\p{Zs}*\R/
+ ** Failers
+ a\xFCb
+
/-- End of testinput6 --/
Modified: code/trunk/testdata/testoutput6
===================================================================
--- code/trunk/testdata/testoutput6 2010-05-29 16:40:22 UTC (rev 528)
+++ code/trunk/testdata/testoutput6 2010-05-31 17:28:08 UTC (rev 529)
@@ -1286,59 +1286,71 @@
0: \x{10b00}\x{a6ef}\x{13007}\x{10857}\x{10b78}\x{10b58}\x{a980}\x{110c1}\x{a4ff}\x{abc0}\x{10a7d}\x{10c48}\x{800}\x{1aad}\x{aac0}
/^\w+/8W
- Az_\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee}
+ Az_\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee}
0: Az_\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee}
/^[[:xdigit:]]*/8W
- 1a\x{660}\x{bef}\x{16ee}
+ 1a\x{660}\x{bef}\x{16ee}
0: 1a
/^\d+/8W
- 1\x{660}\x{bef}\x{16ee}
+ 1\x{660}\x{bef}\x{16ee}
0: 1\x{660}\x{bef}
/^[[:digit:]]+/8W
- 1\x{660}\x{bef}\x{16ee}
+ 1\x{660}\x{bef}\x{16ee}
0: 1\x{660}\x{bef}
/^>\s+/8W
- >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b}
+ >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b}
0: > \x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{09}
/^>\pZ+/8W
- >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b}
+ >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b}
0: > \x{a0}\x{1680}\x{2028}\x{2029}\x{202f}
/^>[[:space:]]*/8W
- >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b}
+ >\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b}
0: > \x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{09}\x{0b}
/^>[[:blank:]]*/8W
- >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028}
+ >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028}
0: > \x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{09}
/^[[:alpha:]]*/8W
- Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}
+ Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}
0: Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}
/^[[:alnum:]]*/8W
- Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee}
+ Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee}
0: Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}1\x{660}\x{bef}\x{16ee}
/^[[:cntrl:]]*/8W
- \x{0}\x{09}\x{1f}\x{7f}\x{9f}
+ \x{0}\x{09}\x{1f}\x{7f}\x{9f}
0: \x{00}\x{09}\x{1f}\x{7f}
/^[[:graph:]]*/8W
- A\x{a1}\x{a0}
+ A\x{a1}\x{a0}
0: A
/^[[:print:]]*/8W
- A z\x{a0}\x{a1}
+ A z\x{a0}\x{a1}
0: A z
/^[[:punct:]]*/8W
- .+\x{a1}\x{a0}
+ .+\x{a1}\x{a0}
0: .+
+/\p{Zs}*?\R/
+ ** Failers
+No match
+ a\xFCb
+No match
+
+/\p{Zs}*\R/
+ ** Failers
+No match
+ a\xFCb
+No match
+
/-- End of testinput6 --/