[Pcre-svn] [1452] code/trunk: Fix an infinite fast-forward n…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [1452] code/trunk: Fix an infinite fast-forward newline on invalid UTF input.
Revision: 1452
          http://vcs.pcre.org/viewvc?view=rev&revision=1452
Author:   zherczeg
Date:     2014-01-28 16:07:52 +0000 (Tue, 28 Jan 2014)


Log Message:
-----------
Fix an infinite fast-forward newline on invalid UTF input.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_jit_compile.c
    code/trunk/pcre_jit_test.c


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2014-01-21 01:49:14 UTC (rev 1451)
+++ code/trunk/ChangeLog    2014-01-28 16:07:52 UTC (rev 1452)
@@ -96,7 +96,11 @@


19. Little endian PowerPC systems are supported now by the JIT compiler.

+20. The fast forward newline mechanism could enter to an infinite loop on
+    certain invalid UTF-8 input. Although we don't support these cases
+    this issue can be fixed by a performance optimization.


+
Version 8.34 15-December-2013
-----------------------------


Modified: code/trunk/pcre_jit_compile.c
===================================================================
--- code/trunk/pcre_jit_compile.c    2014-01-21 01:49:14 UTC (rev 1451)
+++ code/trunk/pcre_jit_compile.c    2014-01-28 16:07:52 UTC (rev 1452)
@@ -398,6 +398,7 @@
   struct sljit_label *quit_label;
   struct sljit_label *forced_quit_label;
   struct sljit_label *accept_label;
+  struct sljit_label *ff_newline_shortcut;
   stub_list *stubs;
   label_addr_list *label_addrs;
   recurse_entry *entries;
@@ -3871,7 +3872,7 @@
   JUMPHERE(lastchar);


   if (firstline)
-    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
+    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
   return;
   }


@@ -3881,6 +3882,8 @@
skip_char_back(common);

loop = LABEL();
+common->ff_newline_shortcut = loop;
+
read_char_range(common, common->nlmin, common->nlmax, TRUE);
lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
@@ -10133,10 +10136,19 @@

 if ((re->options & PCRE_ANCHORED) == 0)
   {
-  if ((re->options & PCRE_FIRSTLINE) == 0)
-    CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);
+  if (common->ff_newline_shortcut != NULL)
+    {
+    if ((re->options & PCRE_FIRSTLINE) == 0)
+      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
+    /* There cannot be more newlines here. */
+    }
   else
-    CMPTO(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);
+    {
+    if ((re->options & PCRE_FIRSTLINE) == 0)
+      CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop_label);
+    else
+      CMPTO(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0, mainloop_label);
+    }
   }


/* No more remaining characters. */

Modified: code/trunk/pcre_jit_test.c
===================================================================
--- code/trunk/pcre_jit_test.c    2014-01-21 01:49:14 UTC (rev 1451)
+++ code/trunk/pcre_jit_test.c    2014-01-28 16:07:52 UTC (rev 1452)
@@ -392,6 +392,10 @@
     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
     { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
+    { MUA | PCRE_NO_UTF8_CHECK, 1, "^.a", "\n\x80\nxa" },
+    { MUA, 1, "^", "\r\n" },
+    { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 1 | F_NOMATCH, "^", "\r\n" },
+    { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 1, "^", "\r\na" },


     /* Any character except newline or any newline. */
     { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
@@ -650,6 +654,7 @@
     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
     { PCRE_FIRSTLINE | PCRE_NEWLINE_LF | PCRE_DOTALL, 0 | F_NOMATCH, "ab.", "ab" },
+    { MUA | PCRE_FIRSTLINE, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },


     /* Recurse. */
     { MUA, 0, "(a)(?1)", "aa" },
@@ -980,7 +985,7 @@
         if (offsetmap)
             *offsetmap++ = (int)(iptr - (unsigned char*)input);


-        if (!(*iptr & 0x80))
+        if (*iptr < 0xc0)
             c = *iptr++;
         else if (!(*iptr & 0x20)) {
             c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
@@ -1052,7 +1057,7 @@
         if (offsetmap)
             *offsetmap++ = (int)(iptr - (unsigned char*)input);


-        if (!(*iptr & 0x80))
+        if (*iptr < 0xc0)
             c = *iptr++;
         else if (!(*iptr & 0x20)) {
             c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
@@ -1326,10 +1331,10 @@
             if ((counter & 0x1) != 0) {
                 setstack8(extra8);
                 return_value8[0] = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
-                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
+                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_1, 32);
             } else
                 return_value8[0] = pcre_jit_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
-                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32, getstack8());
+                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_1, 32, getstack8());
             memset(&dummy_extra8, 0, sizeof(pcre_extra));
             dummy_extra8.flags = PCRE_EXTRA_MARK;
             if (current->start_offset & F_STUDY) {
@@ -1338,7 +1343,7 @@
             }
             dummy_extra8.mark = &mark8_2;
             return_value8[1] = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
-                current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32);
+                current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector8_2, 32);
         }
 #endif


@@ -1360,10 +1365,10 @@
             if ((counter & 0x1) != 0) {
                 setstack16(extra16);
                 return_value16[0] = pcre16_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
-                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
+                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_1, 32);
             } else
                 return_value16[0] = pcre16_jit_exec(re16, extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
-                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32, getstack16());
+                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_1, 32, getstack16());
             memset(&dummy_extra16, 0, sizeof(pcre16_extra));
             dummy_extra16.flags = PCRE_EXTRA_MARK;
             if (current->start_offset & F_STUDY) {
@@ -1372,7 +1377,7 @@
             }
             dummy_extra16.mark = &mark16_2;
             return_value16[1] = pcre16_exec(re16, &dummy_extra16, regtest_buf16, length16, current->start_offset & OFFSET_MASK,
-                current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32);
+                current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector16_2, 32);
         }
 #endif


@@ -1394,10 +1399,10 @@
             if ((counter & 0x1) != 0) {
                 setstack32(extra32);
                 return_value32[0] = pcre32_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
-                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32);
+                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_1, 32);
             } else
                 return_value32[0] = pcre32_jit_exec(re32, extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
-                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_1, 32, getstack32());
+                    current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_1, 32, getstack32());
             memset(&dummy_extra32, 0, sizeof(pcre32_extra));
             dummy_extra32.flags = PCRE_EXTRA_MARK;
             if (current->start_offset & F_STUDY) {
@@ -1406,7 +1411,7 @@
             }
             dummy_extra32.mark = &mark32_2;
             return_value32[1] = pcre32_exec(re32, &dummy_extra32, regtest_buf32, length32, current->start_offset & OFFSET_MASK,
-                current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector32_2, 32);
+                current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | PCRE_NO_UTF8_CHECK), ovector32_2, 32);
         }
 #endif