[Pcre-svn] [1523] code/trunk: Disallow quantification of ass…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [1523] code/trunk: Disallow quantification of assertions used as conditions .
Revision: 1523
          http://vcs.pcre.org/viewvc?view=rev&revision=1523
Author:   ph10
Date:     2015-02-08 18:00:45 +0000 (Sun, 08 Feb 2015)


Log Message:
-----------
Disallow quantification of assertions used as conditions.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_compile.c
    code/trunk/pcre_internal.h
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2015-02-08 17:02:05 UTC (rev 1522)
+++ code/trunk/ChangeLog    2015-02-08 18:00:45 UTC (rev 1523)
@@ -53,7 +53,15 @@
     The infinite loop is now broken (with the minimum length unset, that is,
     zero).


+11. If an assertion that was used as a condition was quantified with a minimum
+    of zero, matching went wrong. In particular, if the whole group had
+    unlimited repetition and could match an empty string, a segfault was
+    likely. The pattern (?(?=0)?)+ is an example that caused this. Perl allows
+    assertions to be quantified, but not if they are being used as conditions,
+    so the above pattern is faulted by Perl. PCRE has now been changed so that
+    it also rejects such patterns.


+
Version 8.36 26-September-2014
------------------------------


Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c    2015-02-08 17:02:05 UTC (rev 1522)
+++ code/trunk/pcre_compile.c    2015-02-08 18:00:45 UTC (rev 1523)
@@ -3639,13 +3639,13 @@
   c = *code;


/* When a pattern with bad UTF-8 encoding is compiled with NO_UTF_CHECK,
- it may compile without complaining, but may get into a loop here if the code
+ it may compile without complaining, but may get into a loop here if the code
pointer points to a bad value. This is, of course a documentated possibility,
- when NO_UTF_CHECK is set, so it isn't a bug, but we can detect this case and
+ when NO_UTF_CHECK is set, so it isn't a bug, but we can detect this case and
just give up on this optimization. */

   if (c >= OP_TABLE_LENGTH) return;
- 
+
   if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
     {
     c -= get_repeat_base(c) - OP_STAR;
@@ -6657,7 +6657,10 @@
               (tempptr[2] == CHAR_EQUALS_SIGN ||
                tempptr[2] == CHAR_EXCLAMATION_MARK ||
                tempptr[2] == CHAR_LESS_THAN_SIGN))
+          {
+          cd->iscondassert = TRUE;
           break;
+          }


         /* Other conditions use OP_CREF/OP_DNCREF/OP_RREF/OP_DNRREF, and all
         need to skip at least 1+IMM2_SIZE bytes at the start of the group. */
@@ -6770,7 +6773,7 @@
             goto FAILED;
             }
           PUT2(code, 2+LINK_SIZE, recno);
-          if (recno > cd->top_backref) cd->top_backref = recno; 
+          if (recno > cd->top_backref) cd->top_backref = recno;
           break;
           }


@@ -6793,7 +6796,7 @@
           int offset = i++;
           int count = 1;
           recno = GET2(slot, 0);   /* Number from first found */
-          if (recno > cd->top_backref) cd->top_backref = recno; 
+          if (recno > cd->top_backref) cd->top_backref = recno;
           for (; i < cd->names_found; i++)
             {
             slot += cd->name_entry_size;
@@ -7512,12 +7515,22 @@
       goto FAILED;
       }


-    /* Assertions used not to be repeatable, but this was changed for Perl
-    compatibility, so all kinds can now be repeated. We copy code into a
+    /* All assertions used not to be repeatable, but this was changed for Perl
+    compatibility. All kinds can now be repeated except for assertions that are
+    conditions (Perl also forbids these to be repeated). We copy code into a
     non-register variable (tempcode) in order to be able to pass its address
-    because some compilers complain otherwise. */
+    because some compilers complain otherwise. At the start of a conditional
+    group whose condition is an assertion, cb->iscondassert is set. We unset it
+    here so as to allow assertions later in the group to be quantified. */


-    previous = code;                      /* For handling repetition */
+    if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT &&
+        cd->iscondassert)
+      {
+      previous = NULL;
+      cd->iscondassert = FALSE;
+      }
+    else previous = code;
+
     *code = bravalue;
     tempcode = code;
     tempreqvary = cd->req_varyopt;        /* Save value before bracket */
@@ -9118,6 +9131,7 @@
 cd->namedrefcount = 0;
 cd->start_code = cworkspace;
 cd->hwm = cworkspace;
+cd->iscondassert = FALSE;
 cd->start_workspace = cworkspace;
 cd->workspace_size = COMPILE_WORK_SIZE;
 cd->named_groups = named_groups;
@@ -9213,6 +9227,7 @@
 codestart = cd->name_table + re->name_entry_size * re->name_count;
 cd->start_code = codestart;
 cd->hwm = (pcre_uchar *)(cd->start_workspace);
+cd->iscondassert = FALSE;
 cd->req_varyopt = 0;
 cd->had_accept = FALSE;
 cd->had_pruneorskip = FALSE;


Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h    2015-02-08 17:02:05 UTC (rev 1522)
+++ code/trunk/pcre_internal.h    2015-02-08 18:00:45 UTC (rev 1523)
@@ -2446,6 +2446,7 @@
   BOOL had_pruneorskip;             /* (*PRUNE) or (*SKIP) encountered */
   BOOL check_lookbehind;            /* Lookbehinds need later checking */
   BOOL dupnames;                    /* Duplicate names exist */
+  BOOL iscondassert;                /* Next assert is a condition */ 
   int  nltype;                      /* Newline type */
   int  nllen;                       /* Newline string length */
   pcre_uchar nl[4];                 /* Newline string when fixed length */


Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2015-02-08 17:02:05 UTC (rev 1522)
+++ code/trunk/testdata/testinput2    2015-02-08 18:00:45 UTC (rev 1523)
@@ -4078,10 +4078,10 @@


/\x{whatever}/

-"((?=(?(?=(?(?=(?(?=())))*)))))"
+"((?=(?(?=(?(?=(?(?=()))))))))"
     a


-"(?(?=)?==)(((((((((?=)))))))))"
+"(?(?=)==)(((((((((?=)))))))))"
     a


/^(?:(a)|b)(?(1)A|B)/I
@@ -4102,4 +4102,15 @@

/(?<N111>(?J)(?<N111>1(111111)11|)1|1|)(?(<N111>)1)/

+/(?(?=0)?)+/
+
+/(?(?=0)(?=00)?00765)/
+     00765
+
+/(?(?=0)(?=00)?00765|(?!3).56)/
+     00765
+     456
+     ** Failers
+     356   
+
 /-- End of testinput2 --/


Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2015-02-08 17:02:05 UTC (rev 1522)
+++ code/trunk/testdata/testoutput2    2015-02-08 18:00:45 UTC (rev 1523)
@@ -14224,13 +14224,13 @@
 /\x{whatever}/
 Failed: non-hex character in \x{} (closing brace missing?) at offset 3


-"((?=(?(?=(?(?=(?(?=())))*)))))"
+"((?=(?(?=(?(?=(?(?=()))))))))"
     a
  0: 
  1: 
  2: 


-"(?(?=)?==)(((((((((?=)))))))))"
+"(?(?=)==)(((((((((?=)))))))))"
     a
 No match


@@ -14275,4 +14275,21 @@

/(?<N111>(?J)(?<N111>1(111111)11|)1|1|)(?(<N111>)1)/

+/(?(?=0)?)+/
+Failed: nothing to repeat at offset 7
+
+/(?(?=0)(?=00)?00765)/
+     00765
+ 0: 00765
+
+/(?(?=0)(?=00)?00765|(?!3).56)/
+     00765
+ 0: 00765
+     456
+ 0: 456
+     ** Failers
+No match
+     356   
+No match
+
 /-- End of testinput2 --/