Revision: 1414
http://vcs.pcre.org/viewvc?view=rev&revision=1414
Author: zherczeg
Date: 2013-12-22 16:27:35 +0000 (Sun, 22 Dec 2013)
Log Message:
-----------
A new flag is set, when property checks are present in an XCLASS.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_compile.c
code/trunk/pcre_exec.c
code/trunk/pcre_internal.h
code/trunk/pcre_jit_compile.c
code/trunk/pcre_printint.c
code/trunk/pcre_study.c
code/trunk/pcre_xclass.c
code/trunk/testdata/saved16BE-1
code/trunk/testdata/saved16LE-1
code/trunk/testdata/saved32BE-1
code/trunk/testdata/saved32LE-1
code/trunk/testdata/testoutput17
code/trunk/testdata/testoutput23
code/trunk/testdata/testoutput25
code/trunk/testdata/testoutput5
code/trunk/testdata/testoutput7
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2013-12-15 17:02:58 UTC (rev 1413)
+++ code/trunk/ChangeLog 2013-12-22 16:27:35 UTC (rev 1414)
@@ -1,6 +1,14 @@
ChangeLog for PCRE
------------------
+Version 8.35-RC1 xx-xxxx-201x
+-----------------------------
+
+1. A new flag is set, when property checks are present in an XCLASS.
+ When this flag is not set, PCRE can perform certain optimizations
+ such as studying these XCLASS-es.
+
+
Version 8.34 15-December-2013
-----------------------------
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2013-12-15 17:02:58 UTC (rev 1413)
+++ code/trunk/pcre_compile.c 2013-12-22 16:27:35 UTC (rev 1414)
@@ -4105,6 +4105,7 @@
compile_data *cd, pcre_uint32 start, pcre_uint32 end)
{
pcre_uint32 c;
+pcre_uint32 classbits_end = (end <= 0xff ? end : 0xff);
int n8 = 0;
/* If caseless matching is required, scan the range and process alternate
@@ -4148,7 +4149,7 @@
/* Not UTF-mode, or no UCP */
- for (c = start; c <= end && c < 256; c++)
+ for (c = start; c <= classbits_end; c++)
{
SETBIT(classbits, cd->fcc[c]);
n8++;
@@ -4173,20 +4174,19 @@
#endif /* COMPILE_PCRE[8|16] */
-/* If all characters are less than 256, use the bit map. Otherwise use extra
-data. */
+/* Use the bitmap for characters < 256. Otherwise use extra data.*/
-if (end < 0x100)
+for (c = start; c <= classbits_end; c++)
{
- for (c = start; c <= end; c++)
- {
- n8++;
- SETBIT(classbits, c);
- }
+ /* Regardless of start, c will always be <= 255. */
+ SETBIT(classbits, c);
+ n8++;
}
-else
- {
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+if (start <= 0xff) start = 0xff + 1;
+
+if (end >= start) {
pcre_uchar *uchardata = *uchardptr;
#ifdef SUPPORT_UTF
@@ -4228,6 +4228,7 @@
*uchardptr = uchardata; /* Updata extra data pointer */
}
+#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
return n8; /* Number of 8-bit characters */
}
@@ -4449,6 +4450,9 @@
BOOL reset_bracount;
int class_has_8bitchar;
int class_one_char;
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+ BOOL xclass_has_prop;
+#endif
int newoptions;
int recno;
int refsign;
@@ -4783,13 +4787,26 @@
should_flip_negation = FALSE;
+ /* Extended class (xclass) will be used when characters > 255
+ might match. */
+
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+ xclass = FALSE;
+ class_uchardata = code + LINK_SIZE + 2; /* For XCLASS items */
+ class_uchardata_base = class_uchardata; /* Save the start */
+#endif
+
/* For optimization purposes, we track some properties of the class:
class_has_8bitchar will be non-zero if the class contains at least one <
256 character; class_one_char will be 1 if the class contains just one
- character. */
+ character; xclass_has_prop will be TRUE if unicode property checks
+ are present in the class. */
class_has_8bitchar = 0;
class_one_char = 0;
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+ xclass_has_prop = FALSE;
+#endif
/* Initialize the 32-char bit map to all zeros. We build the map in a
temporary bit of memory, in case the class contains fewer than two
@@ -4798,12 +4815,6 @@
memset(classbits, 0, 32 * sizeof(pcre_uint8));
-#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
- xclass = FALSE;
- class_uchardata = code + LINK_SIZE + 2; /* For XCLASS items */
- class_uchardata_base = class_uchardata; /* Save the start */
-#endif
-
/* Process characters until ] is reached. By writing this as a "do" it
means that an initial ] is taken as a data character. At the start of the
loop, c contains the first byte of the character. */
@@ -4927,6 +4938,7 @@
*class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP;
*class_uchardata++ = ptype;
*class_uchardata++ = 0;
+ xclass_has_prop = TRUE;
ptr = tempptr + 1;
continue;
@@ -5109,6 +5121,7 @@
XCL_PROP : XCL_NOTPROP;
*class_uchardata++ = ptype;
*class_uchardata++ = pdata;
+ xclass_has_prop = TRUE;
class_has_8bitchar--; /* Undo! */
continue;
}
@@ -5403,6 +5416,7 @@
*code++ = OP_XCLASS;
code += LINK_SIZE;
*code = negate_class? XCL_NOT:0;
+ if (xclass_has_prop) *code |= XCL_HASPROP;
/* If the map is required, move up the extra data to make room for it;
otherwise just move the code pointer to the end of the extra data. */
@@ -5412,6 +5426,8 @@
*code++ |= XCL_MAP;
memmove(code + (32 / sizeof(pcre_uchar)), code,
IN_UCHARS(class_uchardata - code));
+ if (negate_class && !xclass_has_prop)
+ for (c = 0; c < 32; c++) classbits[c] = ~classbits[c];
memcpy(code, classbits, 32);
code = class_uchardata + (32 / sizeof(pcre_uchar));
}
Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c 2013-12-15 17:02:58 UTC (rev 1413)
+++ code/trunk/pcre_exec.c 2013-12-22 16:27:35 UTC (rev 1414)
@@ -6833,18 +6833,8 @@
#ifndef COMPILE_PCRE8
if (c > 255) c = 255;
#endif
- if ((start_bits[c/8] & (1 << (c&7))) == 0)
- {
- start_match++;
-#if defined SUPPORT_UTF && defined COMPILE_PCRE8
- /* In non 8-bit mode, the iteration will stop for
- characters > 255 at the beginning or not stop at all. */
- if (utf)
- ACROSSCHAR(start_match < end_subject, *start_match,
- start_match++);
-#endif
- }
- else break;
+ if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
+ start_match++;
}
}
} /* Starting optimizations */
Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h 2013-12-15 17:02:58 UTC (rev 1413)
+++ code/trunk/pcre_internal.h 2013-12-22 16:27:35 UTC (rev 1414)
@@ -1874,8 +1874,9 @@
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
contain characters with values greater than 255. */
-#define XCL_NOT 0x01 /* Flag: this is a negative class */
-#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */
+#define XCL_NOT 0x01 /* Flag: this is a negative class */
+#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */
+#define XCL_HASPROP 0x04 /* Flag: property checks are present. */
#define XCL_END 0 /* Marks end of individual items */
#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */
Modified: code/trunk/pcre_jit_compile.c
===================================================================
--- code/trunk/pcre_jit_compile.c 2013-12-15 17:02:58 UTC (rev 1413)
+++ code/trunk/pcre_jit_compile.c 2013-12-22 16:27:35 UTC (rev 1414)
@@ -3206,24 +3206,19 @@
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
}
-static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
+static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
-static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
+static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
{
DEFINE_COMPILER;
struct sljit_label *start;
struct sljit_jump *quit;
struct sljit_jump *found = NULL;
jump_list *matches = NULL;
-pcre_uint8 inverted_start_bits[32];
-int i;
#ifndef COMPILE_PCRE8
struct sljit_jump *jump;
#endif
-for (i = 0; i < 32; ++i)
- inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
-
if (firstline)
{
SLJIT_ASSERT(common->first_line_end != 0);
@@ -3239,7 +3234,7 @@
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
#endif
-if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
+if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
{
#ifndef COMPILE_PCRE8
jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
@@ -3248,7 +3243,7 @@
#endif
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
+ OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
found = JUMP(SLJIT_C_NOT_ZERO);
@@ -3524,10 +3519,42 @@
case 2:
if (readch)
read_char(common);
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
- add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
+ if (ranges[2] + 1 != ranges[3])
+ {
+ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
+ add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
+ }
+ else
+ add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
return TRUE;
+ case 3:
+ if (readch)
+ read_char(common);
+ if (ranges[1] != 0)
+ {
+ add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
+ if (ranges[2] + 1 != ranges[3])
+ {
+ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
+ add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
+ }
+ else
+ add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
+ }
+ else
+ {
+ add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2]));
+ if (ranges[3] + 1 != ranges[4])
+ {
+ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[3]);
+ add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[3]));
+ }
+ else
+ add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3]));
+ }
+ return TRUE;
+
case 4:
if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
{
@@ -3592,15 +3619,15 @@
ranges[0] = length;
}
-static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
+static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
{
int ranges[2 + MAX_RANGE_SIZE];
pcre_uint8 bit, cbit, all;
int i, byte, length = 0;
bit = bits[0] & 0x1;
-ranges[1] = bit;
-/* Can be 0 or 255. */
+ranges[1] = !invert ? bit : (bit ^ 0x1);
+/* All bits will be zero or one (since bit is zero or one). */
all = -bit;
for (i = 0; i < 256; )
@@ -4021,7 +4048,7 @@
{
DEFINE_COMPILER;
jump_list *found = NULL;
-jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
+jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
pcre_int32 c, charoffset;
struct sljit_jump *jump = NULL;
pcre_uchar *ccbegin;
@@ -4040,36 +4067,62 @@
detect_partial_match(common, backtracks);
read_char(common);
-if ((*cc++ & XCL_MAP) != 0)
+cc++;
+if ((cc[-1] & XCL_HASPROP) == 0)
{
+ if ((cc[-1] & XCL_MAP) != 0)
+ {
+ OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
+#ifdef SUPPORT_UCP
+ charsaved = TRUE;
+#endif
+ if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, FALSE, backtracks))
+ {
+ jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
+
+ OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
+ OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
+ OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
+ OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
+ OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+ add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
+ add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+
+ JUMPHERE(jump);
+ }
+ else
+ add_jump(compiler, &found, CMP(SLJIT_C_LESS_EQUAL, TMP3, 0, SLJIT_IMM, 0xff));
+
+ OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
+ cc += 32 / sizeof(pcre_uchar);
+ }
+ else
+ add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff));
+ }
+else if ((cc[-1] & XCL_MAP) != 0)
+ {
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
-#ifndef COMPILE_PCRE8
- jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
-#elif defined SUPPORT_UTF
- if (common->utf)
+#ifdef SUPPORT_UCP
+ charsaved = TRUE;
+#endif
+ if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
+ {
+#ifdef COMPILE_PCRE8
+ SLJIT_ASSERT(common->utf);
+#endif
jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
-#endif
- if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
- {
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
+
+ JUMPHERE(jump);
}
-#ifndef COMPILE_PCRE8
- JUMPHERE(jump);
-#elif defined SUPPORT_UTF
- if (common->utf)
- JUMPHERE(jump);
-#endif
OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
-#ifdef SUPPORT_UCP
- charsaved = TRUE;
-#endif
cc += 32 / sizeof(pcre_uchar);
}
@@ -4624,7 +4677,7 @@
#ifdef SUPPORT_UCP
case OP_NOTPROP:
case OP_PROP:
- propdata[0] = 0;
+ propdata[0] = XCL_HASPROP;
propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
propdata[2] = cc[0];
propdata[3] = cc[1];
@@ -4983,7 +5036,7 @@
case OP_NCLASS:
detect_partial_match(common, backtracks);
read_char(common);
- if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
+ if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
return cc + 32 / sizeof(pcre_uchar);
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
@@ -9218,7 +9271,7 @@
else if ((re->flags & PCRE_STARTLINE) != 0)
fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
- fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
+ fast_forward_start_bits(common, study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
}
}
else
Modified: code/trunk/pcre_printint.c
===================================================================
--- code/trunk/pcre_printint.c 2013-12-15 17:02:58 UTC (rev 1413)
+++ code/trunk/pcre_printint.c 2013-12-22 16:27:35 UTC (rev 1414)
@@ -644,7 +644,9 @@
int i;
unsigned int min, max;
BOOL printmap;
+ BOOL invertmap = FALSE;
pcre_uint8 *map;
+ pcre_uint8 inverted_map[32];
fprintf(f, " [");
@@ -653,7 +655,12 @@
extra = GET(code, 1);
ccode = code + LINK_SIZE + 1;
printmap = (*ccode & XCL_MAP) != 0;
- if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
+ if ((*ccode & XCL_NOT) != 0)
+ {
+ invertmap = (*ccode & XCL_HASPROP) == 0;
+ fprintf(f, "^");
+ }
+ ccode++;
}
else
{
@@ -666,6 +673,12 @@
if (printmap)
{
map = (pcre_uint8 *)ccode;
+ if (invertmap)
+ {
+ for (i = 0; i < 32; i++) inverted_map[i] = ~map[i];
+ map = inverted_map;
+ }
+
for (i = 0; i < 256; i++)
{
if ((map[i/8] & (1 << (i&7))) != 0)
Modified: code/trunk/pcre_study.c
===================================================================
--- code/trunk/pcre_study.c 2013-12-15 17:02:58 UTC (rev 1413)
+++ code/trunk/pcre_study.c 2013-12-22 16:27:35 UTC (rev 1414)
@@ -879,9 +879,6 @@
case OP_SOM:
case OP_THEN:
case OP_THEN_ARG:
-#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
- case OP_XCLASS:
-#endif
return SSB_FAIL;
/* We can ignore word boundary tests. */
@@ -1257,6 +1254,16 @@
with a value >= 0xc4 is a potentially valid starter because it starts a
character with a value > 255. */
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+ case OP_XCLASS:
+ if ((tcode[1 + LINK_SIZE] & XCL_HASPROP) != 0)
+ return SSB_FAIL;
+ /* All bits are set. */
+ if ((tcode[1 + LINK_SIZE] & XCL_MAP) == 0 && (tcode[1 + LINK_SIZE] & XCL_NOT) != 0)
+ return SSB_FAIL;
+#endif
+ /* Fall through */
+
case OP_NCLASS:
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
if (utf)
@@ -1273,8 +1280,21 @@
case OP_CLASS:
{
pcre_uint8 *map;
- tcode++;
- map = (pcre_uint8 *)tcode;
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+ map = NULL;
+ if (*tcode == OP_XCLASS)
+ {
+ if ((tcode[1 + LINK_SIZE] & XCL_MAP) != 0)
+ map = (pcre_uint8 *)(tcode + 1 + LINK_SIZE + 1);
+ tcode += GET(tcode, 1);
+ }
+ else
+#endif
+ {
+ tcode++;
+ map = (pcre_uint8 *)tcode;
+ tcode += 32 / sizeof(pcre_uchar);
+ }
/* In UTF-8 mode, the bits in a bit map correspond to character
values, not to byte values. However, the bit map we are constructing is
@@ -1282,31 +1302,35 @@
value is > 127. In fact, there are only two possible starting bytes for
characters in the range 128 - 255. */
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+ if (map != NULL)
+#endif
+ {
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
- if (utf)
- {
- for (c = 0; c < 16; c++) start_bits[c] |= map[c];
- for (c = 128; c < 256; c++)
+ if (utf)
{
- if ((map[c/8] && (1 << (c&7))) != 0)
+ for (c = 0; c < 16; c++) start_bits[c] |= map[c];
+ for (c = 128; c < 256; c++)
{
- int d = (c >> 6) | 0xc0; /* Set bit for this starter */
- start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */
- c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */
+ if ((map[c/8] && (1 << (c&7))) != 0)
+ {
+ int d = (c >> 6) | 0xc0; /* Set bit for this starter */
+ start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */
+ c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */
+ }
}
}
- }
- else
+ else
#endif
- {
- /* In non-UTF-8 mode, the two bit maps are completely compatible. */
- for (c = 0; c < 32; c++) start_bits[c] |= map[c];
+ {
+ /* In non-UTF-8 mode, the two bit maps are completely compatible. */
+ for (c = 0; c < 32; c++) start_bits[c] |= map[c];
+ }
}
/* Advance past the bit map, and act on what follows. For a zero
minimum repeat, continue; otherwise stop processing. */
- tcode += 32 / sizeof(pcre_uchar);
switch (*tcode)
{
case OP_CRSTAR:
Modified: code/trunk/pcre_xclass.c
===================================================================
--- code/trunk/pcre_xclass.c 2013-12-15 17:02:58 UTC (rev 1413)
+++ code/trunk/pcre_xclass.c 2013-12-22 16:27:35 UTC (rev 1414)
@@ -81,6 +81,11 @@
if (c < 256)
{
+ if ((*data & XCL_HASPROP) == 0)
+ {
+ if ((*data & XCL_MAP) == 0) return negated;
+ return (((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0;
+ }
if ((*data & XCL_MAP) != 0 &&
(((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0)
return !negated; /* char found */
Modified: code/trunk/testdata/saved16BE-1
===================================================================
(Binary files differ)
Modified: code/trunk/testdata/saved16LE-1
===================================================================
(Binary files differ)
Modified: code/trunk/testdata/saved32BE-1
===================================================================
--- code/trunk/testdata/saved32BE-1 2013-12-15 17:02:58 UTC (rev 1413)
+++ code/trunk/testdata/saved32BE-1 2013-12-22 16:27:35 UTC (rev 1414)
@@ -1 +1 @@
-???\xF4???,PCRE???\xF4????????\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF???????????????????????????????????????n???a???m???e???????????????o???t???h???e???r???????\x83???^???????n???????????????????????????????????\x85???????????\x83???????n???????????????????????????????????d???x???????o\xFF\xFF\xFF\xFF\xFF\xDF\xFF\xFF\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF???c???x???????\x86???????????p??? ???????????P???P???????????????w???????p??????????????????8???8???????????????????????????????\xD8???\xDF\xFF???????{???????p???????????????????????????????????????????????????????????????h???????????????x???^???????,????????????????????????????????????????
\ No newline at end of file
+???\xFC???,PCRE???\xFC????????\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF???????????????????????????????????????????????n???a???m???e???????????????o???t???h???e???r???????\x83???^???????n???????????????????????????????????\x85???????????\x83???????n???????????????????????????????????d???x???????o\xFF\xFF\xFF\xFF\xFF\xDF\xFF\xFF\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF???c???x???????\x86???????????p??? ???????????P???P???????????????w???????p????????\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD\xFF\xC7\xFF\xFD\xFF\xC7\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF??????????????\xD8???\xDF\xFF???????{???????p????????\xFF\xFE\xFF\xFF\xF7\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF???????????????????????h???????????????x???^???????,????????????????????????????????????????
\ No newline at end of file
Modified: code/trunk/testdata/saved32LE-1
===================================================================
--- code/trunk/testdata/saved32LE-1 2013-12-15 17:02:58 UTC (rev 1413)
+++ code/trunk/testdata/saved32LE-1 2013-12-22 16:27:35 UTC (rev 1414)
@@ -1 +1 @@
-???\xF4???,ERCP\xF4???????????\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF????????????????????????????????????n???a???m???e???????????????o???t???h???e???r???????\x83???^???????n???????????????????????????????????\x85???????????\x83???????n???????????????????????????????????d???x???????o???\xFF\xFF\xFF\xFF\xFF\xDF\xFF\xFF\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFFc???x???????\x86???????????p??? ???????????P???P???????????????w???????p?????????????????????8???8??????????????????????????????\xD8??\xFF\xDF??????{???????p???????????????????????????????????????????????????????????????h???????????????x???^???????,???????????????????????????????????????????
\ No newline at end of file
+???\xFC???,ERCP\xFC???????????\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF????????????????????????????????????????????n???a???m???e???????????????o???t???h???e???r???????\x83???^???????n???????????????????????????????????\x85???????????\x83???????n???????????????????????????????????d???x???????o???\xFF\xFF\xFF\xFF\xFF\xDF\xFF\xFF\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFFc???x???????\x86???????????p??? ???????????P???P???????????????w???????p???????????\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD\xFF\xC7\xFF\xFD\xFF\xC7\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF?????????????\xD8??\xFF\xDF??????{???????p???????????\xFF\xFE\xFF\xFF\xF7\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF????????????????????h???????????????x???^???????,???????????????????????????????????????????
\ No newline at end of file
Modified: code/trunk/testdata/testoutput17
===================================================================
--- code/trunk/testdata/testoutput17 2013-12-15 17:02:58 UTC (rev 1413)
+++ code/trunk/testdata/testoutput17 2013-12-22 16:27:35 UTC (rev 1414)
@@ -292,7 +292,7 @@
No first char
No need char
Subject length lower bound = 1
-No set of starting bytes
+Starting byte set: \x09 \x20 \xa0 \xff
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
0: \x{1680}\x{2000}\x{202f}\x{3000}
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
@@ -348,7 +348,7 @@
No first char
No need char
Subject length lower bound = 1
-No set of starting bytes
+Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff
\x{2027}\x{2030}\x{2028}\x{2029}
0: \x{2028}\x{2029}
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
@@ -534,18 +534,18 @@
------------------------------------------------------------------
Bra
a*
- [b-\x{200}]?+
+ [b-\xff\x{100}-\x{200}]?+
a#
a*+
- [b-\x{200}]?
+ [b-\xff\x{100}-\x{200}]?
b#
[a-f]*
- [g-\x{200}]*+
+ [g-\xff\x{100}-\x{200}]*+
#
- [g-\x{200}]*
+ [g-\xff\x{100}-\x{200}]*
[a-c]*+
#
- [g-\x{200}]*
+ [g-\xff\x{100}-\x{200}]*
[a-h]*+
Ket
End
Modified: code/trunk/testdata/testoutput23
===================================================================
--- code/trunk/testdata/testoutput23 2013-12-15 17:02:58 UTC (rev 1413)
+++ code/trunk/testdata/testoutput23 2013-12-22 16:27:35 UTC (rev 1414)
@@ -18,7 +18,7 @@
/[\H]/BZSI
------------------------------------------------------------------
Bra
- [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}]
+ [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}]
Ket
End
------------------------------------------------------------------
@@ -27,12 +27,25 @@
No first char
No need char
Subject length lower bound = 1
-No set of starting bytes
+Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b
+ \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a
+ \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9
+ : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^
+ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80
+ \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f
+ \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e
+ \x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae
+ \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd
+ \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc
+ \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb
+ \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea
+ \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9
+ \xfa \xfb \xfc \xfd \xfe \xff
/[\V]/BZSI
------------------------------------------------------------------
Bra
- [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{ffff}]
+ [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffff}]
Ket
End
------------------------------------------------------------------
@@ -41,6 +54,19 @@
No first char
No need char
Subject length lower bound = 1
-No set of starting bytes
+Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e
+ \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d
+ \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = >
+ ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c
+ d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82
+ \x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92
+ \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1
+ \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0
+ \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf
+ \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce
+ \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd
+ \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec
+ \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb
+ \xfc \xfd \xfe \xff
/-- End of testinput23 --/
Modified: code/trunk/testdata/testoutput25
===================================================================
--- code/trunk/testdata/testoutput25 2013-12-15 17:02:58 UTC (rev 1413)
+++ code/trunk/testdata/testoutput25 2013-12-22 16:27:35 UTC (rev 1414)
@@ -65,7 +65,7 @@
/[\H]/BZSI
------------------------------------------------------------------
Bra
- [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffffffff}]
+ [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffffffff}]
Ket
End
------------------------------------------------------------------
@@ -74,12 +74,25 @@
No first char
No need char
Subject length lower bound = 1
-No set of starting bytes
+Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b
+ \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a
+ \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9
+ : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^
+ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80
+ \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f
+ \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e
+ \x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae
+ \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd
+ \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc
+ \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb
+ \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea
+ \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9
+ \xfa \xfb \xfc \xfd \xfe \xff
/[\V]/BZSI
------------------------------------------------------------------
Bra
- [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{ffffffff}]
+ [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffffffff}]
Ket
End
------------------------------------------------------------------
@@ -88,6 +101,19 @@
No first char
No need char
Subject length lower bound = 1
-No set of starting bytes
+Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e
+ \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d
+ \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = >
+ ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c
+ d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82
+ \x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92
+ \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1
+ \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0
+ \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf
+ \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce
+ \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd
+ \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec
+ \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb
+ \xfc \xfd \xfe \xff
/-- End of testinput25 --/
Modified: code/trunk/testdata/testoutput5
===================================================================
--- code/trunk/testdata/testoutput5 2013-12-15 17:02:58 UTC (rev 1413)
+++ code/trunk/testdata/testoutput5 2013-12-22 16:27:35 UTC (rev 1414)
@@ -270,7 +270,7 @@
/[z-\x{100}]/8DZ
------------------------------------------------------------------
Bra
- [z-\x{100}]
+ [z-\xff\x{100}]
Ket
End
------------------------------------------------------------------
@@ -812,7 +812,7 @@
/[\H]/8BZ
------------------------------------------------------------------
Bra
- [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}]
+ [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}]
Ket
End
------------------------------------------------------------------
@@ -820,7 +820,7 @@
/[\V]/8BZ
------------------------------------------------------------------
Bra
- [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{10ffff}]
+ [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}]
Ket
End
------------------------------------------------------------------
@@ -1622,7 +1622,7 @@
/[\H\x{d7ff}]+/8BZ
------------------------------------------------------------------
Bra
- [\x00-\x08\x0a-\x1f!-\x9f\x{a1}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}\x{d7ff}]++
+ [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}\x{d7ff}]++
Ket
End
------------------------------------------------------------------
@@ -1662,7 +1662,7 @@
/[\V\x{d7ff}]+/8BZ
------------------------------------------------------------------
Bra
- [\x00-\x09\x0e-\x84\x{86}-\x{2027}\x{202a}-\x{10ffff}\x{d7ff}]++
+ [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}\x{d7ff}]++
Ket
End
------------------------------------------------------------------
Modified: code/trunk/testdata/testoutput7
===================================================================
--- code/trunk/testdata/testoutput7 2013-12-15 17:02:58 UTC (rev 1413)
+++ code/trunk/testdata/testoutput7 2013-12-22 16:27:35 UTC (rev 1414)
@@ -124,7 +124,7 @@
/[z-\x{100}]/8iDZ
------------------------------------------------------------------
Bra
- [Z\x{39c}\x{3bc}\x{1e9e}\x{178}z-\x{101}]
+ [Zz-\xff\x{39c}\x{3bc}\x{1e9e}\x{178}\x{100}-\x{101}]
Ket
End
------------------------------------------------------------------
@@ -162,7 +162,7 @@
/[z-\x{100}]/8DZi
------------------------------------------------------------------
Bra
- [Z\x{39c}\x{3bc}\x{1e9e}\x{178}z-\x{101}]
+ [Zz-\xff\x{39c}\x{3bc}\x{1e9e}\x{178}\x{100}-\x{101}]
Ket
End
------------------------------------------------------------------