Revision: 802
http://vcs.pcre.org/viewvc?view=rev&revision=802
Author: ph10
Date: 2011-12-13 09:52:20 +0000 (Tue, 13 Dec 2011)
Log Message:
-----------
Minor issues after merge.
Modified Paths:
--------------
code/branches/pcre16/pcre_compile.c
code/branches/pcre16/pcre_study.c
code/branches/pcre16/testdata/testinput13
code/branches/pcre16/testdata/testoutput13
Modified: code/branches/pcre16/pcre_compile.c
===================================================================
--- code/branches/pcre16/pcre_compile.c 2011-12-12 16:23:37 UTC (rev 801)
+++ code/branches/pcre16/pcre_compile.c 2011-12-13 09:52:20 UTC (rev 802)
@@ -3762,7 +3762,7 @@
/* For optimization purposes, we track some properties of the class.
class_has_8bitchar will be non-zero, if the class contains at least one
- < 256 character. class_single_char will be 1, if the class only contains
+ < 256 character. class_single_char will be 1 if the class contains only
a single character. */
class_has_8bitchar = 0;
@@ -3933,7 +3933,7 @@
of the specials, which just set a flag. The sequence \b is a special
case. Inside a class (and only there) it is treated as backspace. We
assume that other escapes have more than one character in them, so
- speculatively set both class_has_8bitchar class_single_char bigger
+ speculatively set both class_has_8bitchar and class_single_char bigger
than one. Unrecognized escapes fall through and are either treated
as literal characters (by default), or are faulted if
PCRE_EXTRA is set. */
@@ -4420,6 +4420,7 @@
class_lastchar = c;
/* Handle a character that cannot go in the bit map */
+
#if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)
if ((c > 255) || (utf && ((options & PCRE_CASELESS) != 0 && c > 127)))
#elif defined SUPPORT_UTF
@@ -4427,15 +4428,15 @@
#elif !(defined COMPILE_PCRE8)
if (c > 255)
#endif
+
#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
{
xclass = TRUE;
*class_uchardata++ = XCL_SINGLE;
#ifdef SUPPORT_UTF
#ifndef COMPILE_PCRE8
- /* In non 8 bit mode, we can get here even
- if we are not in UTF mode. */
- if (!utf)
+ /* In non 8 bit mode, we can get here even if we are not in UTF mode. */
+ if (!utf)
*class_uchardata++ = c;
else
#endif
@@ -4448,8 +4449,7 @@
#ifdef COMPILE_PCRE8
if ((options & PCRE_CASELESS) != 0)
#else
- /* In non 8 bit mode, we can get here even
- if we are not in UTF mode. */
+ /* In non 8 bit mode, we can get here even if we are not in UTF mode. */
if (utf && (options & PCRE_CASELESS) != 0)
#endif
{
@@ -4465,7 +4465,7 @@
However, that uses less memory, and so if this happens to be at the
end of the regex, there will not be enough memory in the real
compile for this temporary storage. */
-
+
if (lengthptr != NULL)
{
*lengthptr += class_uchardata - class_uchardata_base;
@@ -4478,6 +4478,7 @@
}
else
#endif /* SUPPORT_UTF || COMPILE_PCRE16 */
+
/* Handle a single-byte character */
{
class_has_8bitchar = 1;
@@ -4488,7 +4489,6 @@
classbits[c/8] |= (1 << (c&7));
}
}
-
}
/* Loop until ']' reached. This "while" is the end of the "do" far above.
@@ -4508,11 +4508,9 @@
goto FAILED;
}
- /* COMMENT NEEDS FIXING - no longer true.
- If class_charcount is 1, we saw precisely one character whose value is
- less than 256. As long as there were no characters >= 128 and there was no
- use of \p or \P, in other words, no use of any XCLASS features, we can
- optimize.
+ /* If class_charcount is 1, we saw precisely one character. As long as
+ there were no negated characters >= 128 and there was no use of \p or \P,
+ in other words, no use of any XCLASS features, we can optimize.
In UTF-8 mode, we can optimize the negative case only if there were no
characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR
Modified: code/branches/pcre16/pcre_study.c
===================================================================
--- code/branches/pcre16/pcre_study.c 2011-12-12 16:23:37 UTC (rev 801)
+++ code/branches/pcre16/pcre_study.c 2011-12-13 09:52:20 UTC (rev 802)
@@ -1433,7 +1433,7 @@
study->flags |= PCRE_STUDY_MAPPED;
memcpy(study->start_bits, start_bits, sizeof(start_bits));
}
- else memset(study->start_bits, 0, 32 * sizeof(pcre_uchar));
+ else memset(study->start_bits, 0, 32 * sizeof(pcre_uint8));
#ifdef PCRE_DEBUG
if (bits_set)
Modified: code/branches/pcre16/testdata/testinput13
===================================================================
--- code/branches/pcre16/testdata/testinput13 2011-12-12 16:23:37 UTC (rev 801)
+++ code/branches/pcre16/testdata/testinput13 2011-12-13 09:52:20 UTC (rev 802)
@@ -580,4 +580,8 @@
/(?<=ab\Cde)X/8
+/[ⱥ]/8iBZ
+
+/[^ⱥ]/8iBZ
+
/-- End of testinput13 --/
Modified: code/branches/pcre16/testdata/testoutput13
===================================================================
--- code/branches/pcre16/testdata/testoutput13 2011-12-12 16:23:37 UTC (rev 801)
+++ code/branches/pcre16/testdata/testoutput13 2011-12-13 09:52:20 UTC (rev 802)
@@ -1289,4 +1289,20 @@
/(?<=ab\Cde)X/8
Failed: \C not allowed in lookbehind assertion at offset 10
+/[ⱥ]/8iBZ
+------------------------------------------------------------------
+ Bra
+ /i \x{2c65}
+ Ket
+ End
+------------------------------------------------------------------
+
+/[^ⱥ]/8iBZ
+------------------------------------------------------------------
+ Bra
+ [^\x{2c65}\x{23a}]
+ Ket
+ End
+------------------------------------------------------------------
+
/-- End of testinput13 --/