Revision: 462
http://vcs.pcre.org/viewvc?view=rev&revision=462
Author: ph10
Date: 2009-10-17 20:55:02 +0100 (Sat, 17 Oct 2009)
Log Message:
-----------
Fix PCRE_PARTIAL_HARD for patterns that end optionally, e.g. abc*
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/configure.ac
code/trunk/pcre_dfa_exec.c
code/trunk/pcre_exec.c
code/trunk/pcre_internal.h
code/trunk/testdata/testinput2
code/trunk/testdata/testinput5
code/trunk/testdata/testinput7
code/trunk/testdata/testinput8
code/trunk/testdata/testoutput2
code/trunk/testdata/testoutput5
code/trunk/testdata/testoutput7
code/trunk/testdata/testoutput8
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/ChangeLog 2009-10-17 19:55:02 UTC (rev 462)
@@ -58,8 +58,7 @@
10. Partial matching has been split into two forms: PCRE_PARTIAL_SOFT, which is
synonymous with PCRE_PARTIAL, for backwards compatibility, and
PCRE_PARTIAL_HARD, which causes a partial match to supersede a full match,
- and may be more useful for multi-segment matching, especially with
- pcre_exec().
+ and may be more useful for multi-segment matching.
11. Partial matching with pcre_exec() is now more intuitive. A partial match
used to be given if ever the end of the subject was reached; now it is
Modified: code/trunk/configure.ac
===================================================================
--- code/trunk/configure.ac 2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/configure.ac 2009-10-17 19:55:02 UTC (rev 462)
@@ -8,8 +8,8 @@
m4_define(pcre_major, [8])
m4_define(pcre_minor, [00])
-m4_define(pcre_prerelease, [-RC1])
-m4_define(pcre_date, [2009-10-05])
+m4_define(pcre_prerelease, [-RC2])
+m4_define(pcre_date, [2009-10-17])
# Libtool shared library interface versions (current:revision:age)
m4_define(libpcre_version, [0:1:0])
Modified: code/trunk/pcre_dfa_exec.c
===================================================================
--- code/trunk/pcre_dfa_exec.c 2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/pcre_dfa_exec.c 2009-10-17 19:55:02 UTC (rev 462)
@@ -109,8 +109,9 @@
character that is to be tested in some way. This makes is possible to
centralize the loading of these characters. In the case of Type * etc, the
"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
-small value. ***NOTE*** If the start of this table is modified, the two tables
-that follow must also be modified. */
+small value. Non-zero values in the table are the offsets from the opcode where
+the character is to be found. ***NOTE*** If the start of this table is
+modified, the three tables that follow must also be modified. */
static const uschar coptable[] = {
0, /* End */
@@ -160,9 +161,66 @@
0, /* DEF */
0, 0, /* BRAZERO, BRAMINZERO */
0, 0, 0, 0, /* PRUNE, SKIP, THEN, COMMIT */
- 0, 0, 0 /* FAIL, ACCEPT, SKIPZERO */
+ 0, 0, 0, 0 /* FAIL, ACCEPT, CLOSE, SKIPZERO */
};
+/* This table identifies those opcodes that inspect a character. It is used to
+remember the fact that a character could have been inspected when the end of
+the subject is reached, in order to support PCRE_PARTIAL_HARD behaviour.
+***NOTE*** If the start of this table is modified, the two tables that follow
+must also be modified. */
+
+static const uschar poptable[] = {
+ 0, /* End */
+ 0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */
+ 1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */
+ 1, 1, 1, /* Any, AllAny, Anybyte */
+ 1, 1, 1, /* NOTPROP, PROP, EXTUNI */
+ 1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */
+ 0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */
+ 1, /* Char */
+ 1, /* Charnc */
+ 1, /* not */
+ /* Positive single-char repeats */
+ 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
+ 1, 1, 1, /* upto, minupto, exact */
+ 1, 1, 1, 1, /* *+, ++, ?+, upto+ */
+ /* Negative single-char repeats - only for chars < 256 */
+ 1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
+ 1, 1, 1, /* NOT upto, minupto, exact */
+ 1, 1, 1, 1, /* NOT *+, ++, ?+, upto+ */
+ /* Positive type repeats */
+ 1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
+ 1, 1, 1, /* Type upto, minupto, exact */
+ 1, 1, 1, 1, /* Type *+, ++, ?+, upto+ */
+ /* Character class & ref repeats */
+ 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
+ 1, 1, /* CRRANGE, CRMINRANGE */
+ 1, /* CLASS */
+ 1, /* NCLASS */
+ 1, /* XCLASS - variable length */
+ 0, /* REF */
+ 0, /* RECURSE */
+ 0, /* CALLOUT */
+ 0, /* Alt */
+ 0, /* Ket */
+ 0, /* KetRmax */
+ 0, /* KetRmin */
+ 0, /* Assert */
+ 0, /* Assert not */
+ 0, /* Assert behind */
+ 0, /* Assert behind not */
+ 0, /* Reverse */
+ 0, 0, 0, 0, /* ONCE, BRA, CBRA, COND */
+ 0, 0, 0, /* SBRA, SCBRA, SCOND */
+ 0, /* CREF */
+ 0, /* RREF */
+ 0, /* DEF */
+ 0, 0, /* BRAZERO, BRAMINZERO */
+ 0, 0, 0, 0, /* PRUNE, SKIP, THEN, COMMIT */
+ 0, 0, 0, 0 /* FAIL, ACCEPT, CLOSE, SKIPZERO */
+};
+
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
and \w */
@@ -489,6 +547,7 @@
unsigned int c, d;
int forced_fail = 0;
int reached_end = 0;
+ BOOL could_continue = FALSE;
/* Make the new state list into the active state list and empty the
new state list. */
@@ -596,7 +655,13 @@
code = start_code + state_offset;
codevalue = *code;
+
+ /* If this opcode inspects a character, but we are at the end of the
+ subject, remember the fact so that we can support PCRE_PARTIAL_HARD. */
+ if (clen == 0 && poptable[codevalue] != 0)
+ could_continue = TRUE;
+
/* If this opcode is followed by an inline character, load it. It is
tempting to test for the presence of a subject character here, but that
is wrong, because sometimes zero repetitions of the subject are
@@ -2522,16 +2587,24 @@
/* We have finished the processing at the current subject character. If no
new states have been set for the next character, we have found all the
matches that we are going to find. If we are at the top level and partial
- matching has been requested, check for appropriate conditions. The "forced_
- fail" variable counts the number of (*F) encountered for the character. If it
- is equal to the original active_count (saved in workspace[1]) it means that
- (*F) was found on every active state. In this case we don't want to give a
- partial match. */
+ matching has been requested, check for appropriate conditions.
+
+ The "forced_ fail" variable counts the number of (*F) encountered for the
+ character. If it is equal to the original active_count (saved in
+ workspace[1]) it means that (*F) was found on every active state. In this
+ case we don't want to give a partial match.
+
+ The "reached_end" variable counts the number of threads that have reached the
+ end of the pattern. The "could_continue" variable is true if a thread could
+ have continued but for the fact that the end of the subject was reached. */
if (new_count <= 0)
{
if (rlevel == 1 && /* Top level, and */
- reached_end != workspace[1] && /* Not all reached end */
+ ( /* either... */
+ reached_end != workspace[1] || /* Not all reached end */
+ could_continue /* or some could go on */
+ ) && /* and... */
forced_fail != workspace[1] && /* Not all forced fail & */
( /* either... */
(md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */
Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c 2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/pcre_exec.c 2009-10-17 19:55:02 UTC (rev 462)
@@ -415,7 +415,7 @@
}
#define SCHECK_PARTIAL()\
- if (md->partial && eptr > mstart)\
+ if (md->partial != 0 && eptr > mstart)\
{\
md->hitend = TRUE;\
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
@@ -2146,7 +2146,11 @@
pp = eptr;
for (i = min; i < max; i++)
{
- if (!match_ref(offset, eptr, length, md, ims)) break;
+ if (!match_ref(offset, eptr, length, md, ims))
+ {
+ CHECK_PARTIAL();
+ break;
+ }
eptr += length;
}
while (eptr >= pp)
@@ -2315,7 +2319,11 @@
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c > 255)
{
@@ -2341,7 +2349,11 @@
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
c = *eptr;
if ((data[c/8] & (1 << (c&7))) == 0) break;
eptr++;
@@ -2446,7 +2458,11 @@
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLENTEST(c, eptr, len);
if (!_pcre_xclass(c, data)) break;
eptr += len;
@@ -2685,7 +2701,11 @@
eptr <= md->end_subject - oclength &&
memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
#endif /* SUPPORT_UCP */
- else break;
+ else
+ {
+ CHECK_PARTIAL();
+ break;
+ }
}
if (possessive) continue;
@@ -2763,7 +2783,12 @@
pp = eptr;
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ if (fc != md->lcc[*eptr]) break;
eptr++;
}
@@ -2817,7 +2842,12 @@
pp = eptr;
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || fc != *eptr) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ if (fc != *eptr) break;
eptr++;
}
if (possessive) continue;
@@ -3029,7 +3059,11 @@
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(d, eptr, len);
if (d < 256) d = md->lcc[d];
if (fc == d) break;
@@ -3050,7 +3084,12 @@
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ if (fc == md->lcc[*eptr]) break;
eptr++;
}
if (possessive) continue;
@@ -3159,7 +3198,11 @@
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(d, eptr, len);
if (fc == d) break;
eptr += len;
@@ -3179,7 +3222,12 @@
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || fc == *eptr) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ if (fc == *eptr) break;
eptr++;
}
if (possessive) continue;
@@ -4335,7 +4383,11 @@
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (prop_fail_result) break;
eptr+= len;
@@ -4346,7 +4398,11 @@
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == ucp_Lu ||
@@ -4361,7 +4417,11 @@
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
prop_category = UCD_CATEGORY(c);
if ((prop_category == prop_value) == prop_fail_result)
@@ -4374,7 +4434,11 @@
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == prop_value) == prop_fail_result)
@@ -4387,7 +4451,11 @@
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
prop_script = UCD_SCRIPT(c);
if ((prop_script == prop_value) == prop_fail_result)
@@ -4416,7 +4484,11 @@
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARINCTEST(c, eptr);
prop_category = UCD_CATEGORY(c);
if (prop_category == ucp_M) break;
@@ -4436,6 +4508,7 @@
/* eptr is now past the end of the maximum run */
if (possessive) continue;
+
for(;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
@@ -4471,7 +4544,12 @@
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ if (IS_NEWLINE(eptr)) break;
eptr++;
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
}
@@ -4483,7 +4561,12 @@
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ if (IS_NEWLINE(eptr)) break;
eptr++;
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
}
@@ -4495,7 +4578,11 @@
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
eptr++;
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
}
@@ -4508,15 +4595,22 @@
case OP_ANYBYTE:
c = max - min;
if (c > (unsigned int)(md->end_subject - eptr))
- c = md->end_subject - eptr;
- eptr += c;
+ {
+ eptr = md->end_subject;
+ SCHECK_PARTIAL();
+ }
+ else eptr += c;
break;
case OP_ANYNL:
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c == 0x000d)
{
@@ -4541,7 +4635,11 @@
{
BOOL gotspace;
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
switch(c)
{
@@ -4579,7 +4677,11 @@
{
BOOL gotspace;
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
switch(c)
{
@@ -4603,7 +4705,11 @@
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
eptr+= len;
@@ -4614,7 +4720,11 @@
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
eptr+= len;
@@ -4625,7 +4735,11 @@
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
eptr+= len;
@@ -4636,7 +4750,11 @@
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
eptr+= len;
@@ -4647,7 +4765,11 @@
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
eptr+= len;
@@ -4658,7 +4780,11 @@
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
eptr+= len;
@@ -4690,7 +4816,12 @@
case OP_ANY:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ if (IS_NEWLINE(eptr)) break;
eptr++;
}
break;
@@ -4699,14 +4830,21 @@
case OP_ANYBYTE:
c = max - min;
if (c > (unsigned int)(md->end_subject - eptr))
- c = md->end_subject - eptr;
- eptr += c;
+ {
+ eptr = md->end_subject;
+ SCHECK_PARTIAL();
+ }
+ else eptr += c;
break;
case OP_ANYNL:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
c = *eptr;
if (c == 0x000d)
{
@@ -4727,7 +4865,11 @@
case OP_NOT_HSPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
c = *eptr;
if (c == 0x09 || c == 0x20 || c == 0xa0) break;
eptr++;
@@ -4737,7 +4879,11 @@
case OP_HSPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
c = *eptr;
if (c != 0x09 && c != 0x20 && c != 0xa0) break;
eptr++;
@@ -4747,7 +4893,11 @@
case OP_NOT_VSPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
c = *eptr;
if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
break;
@@ -4758,7 +4908,11 @@
case OP_VSPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
c = *eptr;
if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
break;
@@ -4769,8 +4923,12 @@
case OP_NOT_DIGIT:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
break;
+ }
+ if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
eptr++;
}
break;
@@ -4778,8 +4936,12 @@
case OP_DIGIT:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
break;
+ }
+ if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
eptr++;
}
break;
@@ -4787,8 +4949,12 @@
case OP_NOT_WHITESPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
break;
+ }
+ if ((md->ctypes[*eptr] & ctype_space) != 0) break;
eptr++;
}
break;
@@ -4796,8 +4962,12 @@
case OP_WHITESPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
break;
+ }
+ if ((md->ctypes[*eptr] & ctype_space) == 0) break;
eptr++;
}
break;
@@ -4805,8 +4975,12 @@
case OP_NOT_WORDCHAR:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
break;
+ }
+ if ((md->ctypes[*eptr] & ctype_word) != 0) break;
eptr++;
}
break;
@@ -4814,8 +4988,12 @@
case OP_WORDCHAR:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
break;
+ }
+ if ((md->ctypes[*eptr] & ctype_word) == 0) break;
eptr++;
}
break;
Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h 2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/pcre_internal.h 2009-10-17 19:55:02 UTC (rev 462)
@@ -1210,8 +1210,8 @@
OP_EOD must correspond in order to the list of escapes immediately above.
*** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions
-that follow must also be updated to match. There is also a table called
-"coptable" in pcre_dfa_exec.c that must be updated. */
+that follow must also be updated to match. There are also tables called
+"coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
enum {
OP_END, /* 0 End of pattern */
@@ -1376,7 +1376,11 @@
OP_SKIPZERO /* 114 */
};
+/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
+definitions that follow must also be updated to match. There are also tables
+called "coptable" cna "poptable" in pcre_dfa_exec.c that must be updated. */
+
/* This macro defines textual names for all the opcodes. These are used only
for debugging. The macro is referenced only in pcre_printint.c. */
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/testdata/testinput2 2009-10-17 19:55:02 UTC (rev 462)
@@ -3125,4 +3125,26 @@
** Failers
abcdde
+/abcd*/
+ xxxxabcd\P
+ xxxxabcd\P\P
+
+/abcd*/i
+ xxxxabcd\P
+ xxxxabcd\P\P
+ XXXXABCD\P
+ XXXXABCD\P\P
+
+/abc\d*/
+ xxxxabc1\P
+ xxxxabc1\P\P
+
+/(a)bc\1*/
+ xxxxabca\P
+ xxxxabca\P\P
+
+/abc[de]*/
+ xxxxabcde\P
+ xxxxabcde\P\P
+
/-- End of testinput2 --/
Modified: code/trunk/testdata/testinput5
===================================================================
--- code/trunk/testdata/testinput5 2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/testdata/testinput5 2009-10-17 19:55:02 UTC (rev 462)
@@ -720,4 +720,26 @@
the cat\P
the cat\P\P
+/abcd*/8
+ xxxxabcd\P
+ xxxxabcd\P\P
+
+/abcd*/i8
+ xxxxabcd\P
+ xxxxabcd\P\P
+ XXXXABCD\P
+ XXXXABCD\P\P
+
+/abc\d*/8
+ xxxxabc1\P
+ xxxxabc1\P\P
+
+/(a)bc\1*/8
+ xxxxabca\P
+ xxxxabca\P\P
+
+/abc[de]*/8
+ xxxxabcde\P
+ xxxxabcde\P\P
+
/-- End of testinput5 --/
Modified: code/trunk/testdata/testinput7
===================================================================
--- code/trunk/testdata/testinput7 2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/testdata/testinput7 2009-10-17 19:55:02 UTC (rev 462)
@@ -4507,4 +4507,22 @@
thejk;adlfj aenjl;fda asdfasd ehj;kjxyasiupd
\Ythejk;adlfj aenjl;fda asdfasd ehj;kjxyasiupd
+/abcd*/
+ xxxxabcd\P
+ xxxxabcd\P\P
+
+/abcd*/i
+ xxxxabcd\P
+ xxxxabcd\P\P
+ XXXXABCD\P
+ XXXXABCD\P\P
+
+/abc\d*/
+ xxxxabc1\P
+ xxxxabc1\P\P
+
+/abc[de]*/
+ xxxxabcde\P
+ xxxxabcde\P\P
+
/-- End of testinput7 --/
Modified: code/trunk/testdata/testinput8
===================================================================
--- code/trunk/testdata/testinput8 2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/testdata/testinput8 2009-10-17 19:55:02 UTC (rev 462)
@@ -667,4 +667,22 @@
/X/8f<any>
A\x{1ec5}ABCXYZ
+/abcd*/8
+ xxxxabcd\P
+ xxxxabcd\P\P
+
+/abcd*/i8
+ xxxxabcd\P
+ xxxxabcd\P\P
+ XXXXABCD\P
+ XXXXABCD\P\P
+
+/abc\d*/8
+ xxxxabc1\P
+ xxxxabc1\P\P
+
+/abc[de]*/8
+ xxxxabcde\P
+ xxxxabcde\P\P
+
/-- End of testinput8 --/
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/testdata/testoutput2 2009-10-17 19:55:02 UTC (rev 462)
@@ -10372,4 +10372,39 @@
abcdde
No match
+/abcd*/
+ xxxxabcd\P
+ 0: abcd
+ xxxxabcd\P\P
+Partial match: abcd
+
+/abcd*/i
+ xxxxabcd\P
+ 0: abcd
+ xxxxabcd\P\P
+Partial match: abcd
+ XXXXABCD\P
+ 0: ABCD
+ XXXXABCD\P\P
+Partial match: ABCD
+
+/abc\d*/
+ xxxxabc1\P
+ 0: abc1
+ xxxxabc1\P\P
+Partial match: abc1
+
+/(a)bc\1*/
+ xxxxabca\P
+ 0: abca
+ 1: a
+ xxxxabca\P\P
+Partial match: abca
+
+/abc[de]*/
+ xxxxabcde\P
+ 0: abcde
+ xxxxabcde\P\P
+Partial match: abcde
+
/-- End of testinput2 --/
Modified: code/trunk/testdata/testoutput5
===================================================================
--- code/trunk/testdata/testoutput5 2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/testdata/testoutput5 2009-10-17 19:55:02 UTC (rev 462)
@@ -2037,4 +2037,39 @@
the cat\P\P
Partial match: the cat
+/abcd*/8
+ xxxxabcd\P
+ 0: abcd
+ xxxxabcd\P\P
+Partial match: abcd
+
+/abcd*/i8
+ xxxxabcd\P
+ 0: abcd
+ xxxxabcd\P\P
+Partial match: abcd
+ XXXXABCD\P
+ 0: ABCD
+ XXXXABCD\P\P
+Partial match: ABCD
+
+/abc\d*/8
+ xxxxabc1\P
+ 0: abc1
+ xxxxabc1\P\P
+Partial match: abc1
+
+/(a)bc\1*/8
+ xxxxabca\P
+ 0: abca
+ 1: a
+ xxxxabca\P\P
+Partial match: abca
+
+/abc[de]*/8
+ xxxxabcde\P
+ 0: abcde
+ xxxxabcde\P\P
+Partial match: abcde
+
/-- End of testinput5 --/
Modified: code/trunk/testdata/testoutput7
===================================================================
--- code/trunk/testdata/testoutput7 2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/testdata/testoutput7 2009-10-17 19:55:02 UTC (rev 462)
@@ -7514,4 +7514,38 @@
\Ythejk;adlfj aenjl;fda asdfasd ehj;kjxyasiupd
No match
+/abcd*/
+ xxxxabcd\P
+ 0: abcd
+ 1: abc
+ xxxxabcd\P\P
+Partial match: abcd
+
+/abcd*/i
+ xxxxabcd\P
+ 0: abcd
+ 1: abc
+ xxxxabcd\P\P
+Partial match: abcd
+ XXXXABCD\P
+ 0: ABCD
+ 1: ABC
+ XXXXABCD\P\P
+Partial match: ABCD
+
+/abc\d*/
+ xxxxabc1\P
+ 0: abc1
+ 1: abc
+ xxxxabc1\P\P
+Partial match: abc1
+
+/abc[de]*/
+ xxxxabcde\P
+ 0: abcde
+ 1: abcd
+ 2: abc
+ xxxxabcde\P\P
+Partial match: abcde
+
/-- End of testinput7 --/
Modified: code/trunk/testdata/testoutput8
===================================================================
--- code/trunk/testdata/testoutput8 2009-10-05 10:59:35 UTC (rev 461)
+++ code/trunk/testdata/testoutput8 2009-10-17 19:55:02 UTC (rev 462)
@@ -1286,4 +1286,38 @@
A\x{1ec5}ABCXYZ
0: X
+/abcd*/8
+ xxxxabcd\P
+ 0: abcd
+ 1: abc
+ xxxxabcd\P\P
+Partial match: abcd
+
+/abcd*/i8
+ xxxxabcd\P
+ 0: abcd
+ 1: abc
+ xxxxabcd\P\P
+Partial match: abcd
+ XXXXABCD\P
+ 0: ABCD
+ 1: ABC
+ XXXXABCD\P\P
+Partial match: ABCD
+
+/abc\d*/8
+ xxxxabc1\P
+ 0: abc1
+ 1: abc
+ xxxxabc1\P\P
+Partial match: abc1
+
+/abc[de]*/8
+ xxxxabcde\P
+ 0: abcde
+ 1: abcd
+ 2: abc
+ xxxxabcde\P\P
+Partial match: abcde
+
/-- End of testinput8 --/