[Pcre-svn] [1352] code/trunk: Fix incorrect "first data item…

トップ ページ
このメッセージを削除
著者: Subversion repository
日付:  
To: pcre-svn
題目: [Pcre-svn] [1352] code/trunk: Fix incorrect "first data item" recorded for an assertion condition.
Revision: 1352
          http://vcs.pcre.org/viewvc?view=rev&revision=1352
Author:   ph10
Date:     2013-07-29 16:49:21 +0100 (Mon, 29 Jul 2013)


Log Message:
-----------
Fix incorrect "first data item" recorded for an assertion condition.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_compile.c
    code/trunk/testdata/testinput1
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput1
    code/trunk/testdata/testoutput12
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2013-07-27 11:18:33 UTC (rev 1351)
+++ code/trunk/ChangeLog    2013-07-29 15:49:21 UTC (rev 1352)
@@ -51,6 +51,12 @@
         there was no match, the code could incorrectly back up beyond the match 
         point, and potentially beyond the first character in the subject, 
         leading to a segfault or an incorrect match result.
+        
+10. A conditional group with an assertion condition could lead to PCRE 
+    recording an incorrect first data item for a match if no other firse data 
+    item was recorded. For example, the pattern (?(?=ab)ab) recorded "a" as a
+    first data item, and therefore matched "ca" after "c" instead of at the 
+    start.



Version 8.33 28-May-2013

Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c    2013-07-27 11:18:33 UTC (rev 1351)
+++ code/trunk/pcre_compile.c    2013-07-29 15:49:21 UTC (rev 1352)
@@ -2361,7 +2361,7 @@
 typedef struct recurse_check {
   struct recurse_check *prev;
   const pcre_uchar *group;
-} recurse_check;    
+} recurse_check;


static BOOL
could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
@@ -2377,7 +2377,7 @@
const pcre_uchar *ccode;

c = *code;
-
+
/* Skip over forward assertions; the other assertions are skipped by
first_significant_code() with a TRUE final argument. */

@@ -2405,27 +2405,27 @@
     NULL. */


     if (cd->start_workspace != NULL)
-      { 
-      const pcre_uchar *tcode; 
+      {
+      const pcre_uchar *tcode;
       for (tcode = cd->start_workspace; tcode < cd->hwm; tcode += LINK_SIZE)
         if ((int)GET(tcode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
       if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */
       }
-    
-    /* If we are scanning a completed pattern, there are no forward references 
-    and all groups are complete. We need to detect whether this is a recursive 
+
+    /* If we are scanning a completed pattern, there are no forward references
+    and all groups are complete. We need to detect whether this is a recursive
     call, as otherwise there will be an infinite loop. If it is a recursion,
-    just skip over it. Simple recursions are easily detected. For mutual 
-    recursions we keep a chain on the stack. */ 
-     
+    just skip over it. Simple recursions are easily detected. For mutual
+    recursions we keep a chain on the stack. */
+
     else
-      {  
+      {
       recurse_check *r = recurses;
       const pcre_uchar *endgroup = scode;
-       
+
       do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);
       if (code >= scode && code <= endgroup) continue;  /* Simple recursion */
-      
+
       for (r = recurses; r != NULL; r = r->prev)
         if (r->group == scode) break;
       if (r != NULL) continue;   /* Mutual recursion */
@@ -2436,8 +2436,8 @@


     empty_branch = FALSE;
     this_recurse.prev = recurses;
-    this_recurse.group = scode; 
-     
+    this_recurse.group = scode;
+
     do
       {
       if (could_be_empty_branch(scode, endcode, utf, cd, &this_recurse))
@@ -2557,29 +2557,29 @@
     case OP_ANY:
     case OP_ALLANY:
     case OP_ANYBYTE:
-     
+
     case OP_PROP:
     case OP_NOTPROP:
     case OP_ANYNL:
-     
+
     case OP_NOT_HSPACE:
     case OP_HSPACE:
     case OP_NOT_VSPACE:
-    case OP_VSPACE:    
+    case OP_VSPACE:
     case OP_EXTUNI:
- 
+
     case OP_NOT_DIGIT:
     case OP_DIGIT:
     case OP_NOT_WHITESPACE:
     case OP_WHITESPACE:
     case OP_NOT_WORDCHAR:
     case OP_WORDCHAR:
-     
+
     case OP_CHAR:
     case OP_CHARI:
     case OP_NOT:
     case OP_NOTI:
-     
+
     case OP_PLUS:
     case OP_PLUSI:
     case OP_MINPLUS:
@@ -2589,22 +2589,22 @@
     case OP_NOTPLUSI:
     case OP_NOTMINPLUS:
     case OP_NOTMINPLUSI:
-     
+
     case OP_POSPLUS:
     case OP_POSPLUSI:
     case OP_NOTPOSPLUS:
     case OP_NOTPOSPLUSI:
-     
+
     case OP_EXACT:
     case OP_EXACTI:
     case OP_NOTEXACT:
-    case OP_NOTEXACTI:  
-     
+    case OP_NOTEXACTI:
+
     case OP_TYPEPLUS:
     case OP_TYPEMINPLUS:
     case OP_TYPEPOSPLUS:
     case OP_TYPEEXACT:
-     
+
     return FALSE;


     /* These are going to continue, as they may be empty, but we have to
@@ -2644,52 +2644,52 @@
 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
     case OP_STAR:
     case OP_STARI:
-    case OP_NOTSTAR:    
-    case OP_NOTSTARI:   
-     
+    case OP_NOTSTAR:
+    case OP_NOTSTARI:
+
     case OP_MINSTAR:
     case OP_MINSTARI:
-    case OP_NOTMINSTAR: 
+    case OP_NOTMINSTAR:
     case OP_NOTMINSTARI:
-     
+
     case OP_POSSTAR:
     case OP_POSSTARI:
-    case OP_NOTPOSSTAR: 
+    case OP_NOTPOSSTAR:
     case OP_NOTPOSSTARI:
-     
+
     case OP_QUERY:
     case OP_QUERYI:
-    case OP_NOTQUERY:   
-    case OP_NOTQUERYI:  
-     
+    case OP_NOTQUERY:
+    case OP_NOTQUERYI:
+
     case OP_MINQUERY:
     case OP_MINQUERYI:
     case OP_NOTMINQUERY:
     case OP_NOTMINQUERYI:
-     
+
     case OP_POSQUERY:
     case OP_POSQUERYI:
     case OP_NOTPOSQUERY:
     case OP_NOTPOSQUERYI:
-     
+
     if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
     break;


     case OP_UPTO:
     case OP_UPTOI:
-    case OP_NOTUPTO:    
-    case OP_NOTUPTOI:   
-     
+    case OP_NOTUPTO:
+    case OP_NOTUPTOI:
+
     case OP_MINUPTO:
     case OP_MINUPTOI:
-    case OP_NOTMINUPTO: 
+    case OP_NOTMINUPTO:
     case OP_NOTMINUPTOI:
-     
+
     case OP_POSUPTO:
     case OP_POSUPTOI:
-    case OP_NOTPOSUPTO: 
+    case OP_NOTPOSUPTO:
     case OP_NOTPOSUPTOI:
-     
+
     if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
     break;
 #endif
@@ -3754,22 +3754,22 @@
 phase. The value of lengthptr distinguishes the two phases.


 Arguments:
-  optionsptr     pointer to the option bits
-  codeptr        points to the pointer to the current code point
-  ptrptr         points to the current pattern pointer
-  errorcodeptr   points to error code variable
-  firstcharptr    place to put the first required character
+  optionsptr        pointer to the option bits
+  codeptr           points to the pointer to the current code point
+  ptrptr            points to the current pattern pointer
+  errorcodeptr      points to error code variable
+  firstcharptr      place to put the first required character
   firstcharflagsptr place to put the first character flags, or a negative number
-  reqcharptr     place to put the last required character
-  reqcharflagsptr place to put the last required character flags, or a negative number
-  bcptr          points to current branch chain
-  cond_depth     conditional nesting depth
-  cd             contains pointers to tables etc.
-  lengthptr      NULL during the real compile phase
-                 points to length accumulator during pre-compile phase
+  reqcharptr        place to put the last required character
+  reqcharflagsptr   place to put the last required character flags, or a negative number
+  bcptr             points to current branch chain
+  cond_depth        conditional nesting depth
+  cd                contains pointers to tables etc.
+  lengthptr         NULL during the real compile phase
+                    points to length accumulator during pre-compile phase


-Returns:         TRUE on success
-                 FALSE, with *errorcodeptr set non-zero on error
+Returns:            TRUE on success
+                    FALSE, with *errorcodeptr set non-zero on error
 */


 static BOOL
@@ -7058,7 +7058,8 @@
         *code++ = OP_PROP;
         *code++ = PT_CLIST;
         *code++ = c;
-        if (firstcharflags == REQ_UNSET) firstcharflags = zerofirstcharflags = REQ_NONE;
+        if (firstcharflags == REQ_UNSET)
+          firstcharflags = zerofirstcharflags = REQ_NONE;
         break;
         }
       }
@@ -7147,24 +7148,24 @@
 value of lengthptr distinguishes the two phases.


 Arguments:
-  options        option bits, including any changes for this subpattern
-  codeptr        -> the address of the current code pointer
-  ptrptr         -> the address of the current pattern pointer
-  errorcodeptr   -> pointer to error code variable
-  lookbehind     TRUE if this is a lookbehind assertion
-  reset_bracount TRUE to reset the count for each branch
-  skipbytes      skip this many bytes at start (for brackets and OP_COND)
-  cond_depth     depth of nesting for conditional subpatterns
-  firstcharptr    place to put the first required character
+  options           option bits, including any changes for this subpattern
+  codeptr           -> the address of the current code pointer
+  ptrptr            -> the address of the current pattern pointer
+  errorcodeptr      -> pointer to error code variable
+  lookbehind        TRUE if this is a lookbehind assertion
+  reset_bracount    TRUE to reset the count for each branch
+  skipbytes         skip this many bytes at start (for brackets and OP_COND)
+  cond_depth        depth of nesting for conditional subpatterns
+  firstcharptr      place to put the first required character
   firstcharflagsptr place to put the first character flags, or a negative number
-  reqcharptr     place to put the last required character
-  reqcharflagsptr place to put the last required character flags, or a negative number
-  bcptr          pointer to the chain of currently open branches
-  cd             points to the data block with tables pointers etc.
-  lengthptr      NULL during the real compile phase
-                 points to length accumulator during pre-compile phase
+  reqcharptr        place to put the last required character
+  reqcharflagsptr   place to put the last required character flags, or a negative number
+  bcptr             pointer to the chain of currently open branches
+  cd                points to the data block with tables pointers etc.
+  lengthptr         NULL during the real compile phase
+                    points to length accumulator during pre-compile phase


-Returns:         TRUE on success
+Returns:            TRUE on success
 */


static BOOL
@@ -7701,13 +7702,14 @@
discarded, because they can cause conflicts with actual literals that follow.
However, if we end up without a first char setting for an unanchored pattern,
it is worth scanning the regex to see if there is an initial asserted first
-char. If all branches start with the same asserted char, or with a bracket all
-of whose alternatives start with the same asserted char (recurse ad lib), then
-we return that char, otherwise -1.
+char. If all branches start with the same asserted char, or with a
+non-conditional bracket all of whose alternatives start with the same asserted
+char (recurse ad lib), then we return that char, with the flags set to zero or
+REQ_CASELESS; otherwise return zero with REQ_NONE in the flags.

 Arguments:
   code       points to start of expression (the bracket)
-  flags       points to the first char flags, or to REQ_NONE
+  flags      points to the first char flags, or to REQ_NONE
   inassert   TRUE if in an assertion


 Returns:     the fixed first char, or 0 with REQ_NONE in flags
@@ -7744,7 +7746,6 @@
      case OP_ASSERT:
      case OP_ONCE:
      case OP_ONCE_NC:
-     case OP_COND:
      d = find_firstassertedchar(scode, &dflags, op == OP_ASSERT);
      if (dflags < 0)
        return 0;
@@ -8297,12 +8298,12 @@
     }
   }


-/* If the workspace had to be expanded, free the new memory. Set the pointer to
+/* If the workspace had to be expanded, free the new memory. Set the pointer to
NULL to indicate that forward references have been filled in. */

if (cd->workspace_size > COMPILE_WORK_SIZE)
(PUBL(free))((void *)cd->start_workspace);
-cd->start_workspace = NULL;
+cd->start_workspace = NULL;

/* Give an error if there's back reference to a non-existent capturing
subpattern. */
@@ -8506,7 +8507,7 @@
}
#endif /* PCRE_DEBUG */

-/* Check for a pattern than can match an empty string, so that this information
+/* Check for a pattern than can match an empty string, so that this information
can be provided to applications. */

 do
@@ -8515,7 +8516,7 @@
     {
     re->flags |= PCRE_MATCH_EMPTY;
     break;
-    }  
+    }
   codestart += GET(codestart, 1);
   }
 while (*codestart == OP_ALT);


Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1    2013-07-27 11:18:33 UTC (rev 1351)
+++ code/trunk/testdata/testinput1    2013-07-29 15:49:21 UTC (rev 1352)
@@ -1,6 +1,6 @@
 /-- This set of tests is for features that are compatible with all versions of
-    Perl >= 5.10, in non-UTF-8 mode. It should run clean for both the 8-bit and
-    16-bit PCRE libraries. --/
+    Perl >= 5.10, in non-UTF-8 mode. It should run clean for the 8-bit, 16-bit,
+    and 32-bit PCRE libraries. --/


 /the quick brown fox/
     the quick brown fox
@@ -4221,9 +4221,6 @@
     ab
     bc


-/^(?=(a)){0}b(?1)/
-    backgammon
-
 /^(?=(?1))?[az]([abc])d/
     abd 
     zcdxx 
@@ -5608,4 +5605,8 @@
     aaaa
     aaa     


+/(?(?=ab)ab)/+
+    ca
+    cd 
+
 /-- End of testinput1 --/


Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2013-07-27 11:18:33 UTC (rev 1351)
+++ code/trunk/testdata/testinput2    2013-07-29 15:49:21 UTC (rev 1352)
@@ -3837,4 +3837,9 @@
     aaaaaaaaaaaaaz
     aaaaaaaaaaaaaz\Q10


+/-- This test causes a segfault with Perl 5.18.0 --/
+
+/^(?=(a)){0}b(?1)/
+    backgammon
+
 /-- End of testinput2 --/


Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1    2013-07-27 11:18:33 UTC (rev 1351)
+++ code/trunk/testdata/testoutput1    2013-07-29 15:49:21 UTC (rev 1352)
@@ -1,6 +1,6 @@
 /-- This set of tests is for features that are compatible with all versions of
-    Perl >= 5.10, in non-UTF-8 mode. It should run clean for both the 8-bit and
-    16-bit PCRE libraries. --/
+    Perl >= 5.10, in non-UTF-8 mode. It should run clean for the 8-bit, 16-bit,
+    and 32-bit PCRE libraries. --/


 /the quick brown fox/
     the quick brown fox
@@ -6904,10 +6904,6 @@
     bc
  0: b


-/^(?=(a)){0}b(?1)/
-    backgammon
- 0: ba
-
 /^(?=(?1))?[az]([abc])d/
     abd 
  0: abd
@@ -9200,4 +9196,12 @@
     aaa     
 No match


+/(?(?=ab)ab)/+
+    ca
+ 0: 
+ 0+ ca
+    cd 
+ 0: 
+ 0+ cd
+
 /-- End of testinput1 --/


Modified: code/trunk/testdata/testoutput12
===================================================================
--- code/trunk/testdata/testoutput12    2013-07-27 11:18:33 UTC (rev 1351)
+++ code/trunk/testdata/testoutput12    2013-07-29 15:49:21 UTC (rev 1352)
@@ -15,7 +15,7 @@
 Capturing subpattern count = 0
 May match empty string
 No options
-First char = 'a'
+No first char
 No need char
 Study returned NULL
 JIT study was not successful
@@ -24,7 +24,7 @@
 Capturing subpattern count = 0
 May match empty string
 No options
-First char = 'a'
+No first char
 No need char
 Subject length lower bound = -1
 No set of starting bytes


Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2013-07-27 11:18:33 UTC (rev 1351)
+++ code/trunk/testdata/testoutput2    2013-07-29 15:49:21 UTC (rev 1352)
@@ -12718,4 +12718,10 @@
     aaaaaaaaaaaaaz\Q10
 Error -21 (recursion limit exceeded)


+/-- This test causes a segfault with Perl 5.18.0 --/
+
+/^(?=(a)){0}b(?1)/
+    backgammon
+ 0: ba
+
 /-- End of testinput2 --/