Revision: 1452
http://vcs.pcre.org/viewvc?view=rev&revision=1452
Author: zherczeg
Date: 2014-01-28 16:07:52 +0000 (Tue, 28 Jan 2014)
Log Message:
-----------
Fix an infinite fast-forward newline on invalid UTF input.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_jit_compile.c
code/trunk/pcre_jit_test.c
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2014-01-21 01:49:14 UTC (rev 1451)
+++ code/trunk/ChangeLog 2014-01-28 16:07:52 UTC (rev 1452)
@@ -96,7 +96,11 @@
19. Little endian PowerPC systems are supported now by the JIT compiler.
+20. The fast forward newline mechanism could enter to an infinite loop on
+ certain invalid UTF-8 input. Although we don't support these cases
+ this issue can be fixed by a performance optimization.
+
Version 8.34 15-December-2013
-----------------------------
Modified: code/trunk/pcre_jit_compile.c
===================================================================
--- code/trunk/pcre_jit_compile.c 2014-01-21 01:49:14 UTC (rev 1451)
+++ code/trunk/pcre_jit_compile.c 2014-01-28 16:07:52 UTC (rev 1452)
@@ -398,6 +398,7 @@
struct sljit_label *quit_label;
struct sljit_label *forced_quit_label;
struct sljit_label *accept_label;
+ struct sljit_label *ff_newline_shortcut;
stub_list *stubs;
label_addr_list *label_addrs;
recurse_entry *entries;
@@ -3871,7 +3872,7 @@
JUMPHERE(lastchar);
if (firstline)
- OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
+ OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
return;
}
@@ -3881,6 +3882,8 @@
skip_char_back(common);
loop = LABEL();
+common->ff_newline_shortcut = loop;
+
read_char_range(common, common->nlmin, common->nlmax, TRUE);
lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
@@ -10133,10 +10136,19 @@
if ((re->options & PCRE_ANCHORED) == 0)
{
- if ((re->options & PCRE_FIRSTLINE) == 0)
- CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);
+ if (common->ff_newline_shortcut != NULL)
+ {
+ if ((re->options & PCRE_FIRSTLINE) == 0)
+ CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
+ /* There cannot be more newlines here. */
+ }
else
- CMPTO(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);
+ {
+ if ((re->options & PCRE_FIRSTLINE) == 0)
+ CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);
+ else
+ CMPTO(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);
+ }
}
/* No more remaining characters. */
Modified: code/trunk/pcre_jit_test.c
===================================================================
--- code/trunk/pcre_jit_test.c 2014-01-21 01:49:14 UTC (rev 1451)
+++ code/trunk/pcre_jit_test.c 2014-01-28 16:07:52 UTC (rev 1452)
@@ -392,6 +392,10 @@
{ PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
{ PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
{ PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
+ { MUA | PCRE_NO_UTF8_CHECK, 1, "^.a", "\n\x80\nxa" },
+ { MUA, 1, "^", "\r\n" },
+ { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 1 | F_NOMATCH, "^", "\r\n" },
+ { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 1, "^", "\r\na" },
/* Any character except newline or any newline. */
{ PCRE_NEWLINE_CRLF, 0, ".", "\r" },
@@ -650,6 +654,7 @@
{ PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
{ PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
{ PCRE_FIRSTLINE | PCRE_NEWLINE_LF | PCRE_DOTALL, 0 | F_NOMATCH, "ab.", "ab" },
+ { MUA | PCRE_FIRSTLINE, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
/* Recurse. */
{ MUA, 0, "(a)(?1)", "aa" },
@@ -980,7 +985,7 @@
if (offsetmap)
*offsetmap++ = (int)(iptr - (unsigned char*)input);
- if (!(*iptr & 0x80))
+ if (*iptr < 0xc0)
c = *iptr++;
else if (!(*iptr & 0x20)) {
c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
@@ -1052,7 +1057,7 @@
if (offsetmap)
*offsetmap++ = (int)(iptr - (unsigned char*)input);
- if (!(*iptr & 0x80))
+ if (*iptr < 0xc0)
c = *iptr++;
else if (!(*iptr & 0x20)) {
c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
@@ -1326,10 +1331,10 @@
if ((counter & 0x1) != 0) {
setstack8(extra8);
return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
- current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_1, 32);
} else
return_value8[0] = pcre_jit_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
- current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32, getstack8());
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_1, 32, getstack8());
memset(&dummy_extra8, 0, sizeof(pcre_extra));
dummy_extra8.flags = PCRE_EXTRA_MARK;
if (current->start_offset & F_STUDY) {
@@ -1338,7 +1343,7 @@
}
dummy_extra8.mark = &mark8_2;
return_value8[1] = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
- current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32);
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_2, 32);
}
#endif
@@ -1360,10 +1365,10 @@
if ((counter & 0x1) != 0) {
setstack16(extra16);
return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
- current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_1, 32);
} else
return_value16[0] = pcre16_jit_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
- current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32, getstack16());
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_1, 32, getstack16());
memset(&dummy_extra16, 0, sizeof(pcre16_extra));
dummy_extra16.flags = PCRE_EXTRA_MARK;
if (current->start_offset & F_STUDY) {
@@ -1372,7 +1377,7 @@
}
dummy_extra16.mark = &mark16_2;
return_value16[1] = pcre16_exec(re16, &dummy_extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
- current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32);
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_2, 32);
}
#endif
@@ -1394,10 +1399,10 @@
if ((counter & 0x1) != 0) {
setstack32(extra32);
return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
- current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32);
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_1, 32);
} else
return_value32[0] = pcre32_jit_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
- current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32, getstack32());
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_1, 32, getstack32());
memset(&dummy_extra32, 0, sizeof(pcre32_extra));
dummy_extra32.flags = PCRE_EXTRA_MARK;
if (current->start_offset & F_STUDY) {
@@ -1406,7 +1411,7 @@
}
dummy_extra32.mark = &mark32_2;
return_value32[1] = pcre32_exec(re32, &dummy_extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
- current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_2, 32);
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_2, 32);
}
#endif