[Pcre-svn] [381] code/trunk: Fix bug with (?(?=.*b)b|^) thin…

Página Inicial
Delete this message
Autor: Subversion repository
Data:  
Para: pcre-svn
Assunto: [Pcre-svn] [381] code/trunk: Fix bug with (?(?=.*b)b|^) thinking it must match at start of line; also fix
Revision: 381
          http://vcs.pcre.org/viewvc?view=rev&revision=381
Author:   ph10
Date:     2009-03-03 16:08:23 +0000 (Tue, 03 Mar 2009)


Log Message:
-----------
Fix bug with (?(?=.*b)b|^) thinking it must match at start of line; also fix
bug causing a crash when auto-callout is used with a conditional assertion.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_compile.c
    code/trunk/pcre_exec.c
    code/trunk/testdata/testinput1
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput1
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2009-03-03 12:32:47 UTC (rev 380)
+++ code/trunk/ChangeLog    2009-03-03 16:08:23 UTC (rev 381)
@@ -39,7 +39,11 @@


 8.  The pattern (?(?=.*b)b|^) was incorrectly compiled as "match must be at
     start or after a newline", because the conditional assertion was not being 
-    skipped when checking for this condition. 
+    correctly handled. The rule now is that both the assertion and what follows 
+    in the first alternative must satisfy the test. 
+    
+9.  If auto-callout was enabled in a pattern with a conditional group, PCRE 
+    could crash during matching.



Version 7.8 05-Sep-08

Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c    2009-03-03 12:32:47 UTC (rev 380)
+++ code/trunk/pcre_compile.c    2009-03-03 16:08:23 UTC (rev 381)
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.


                        Written by Philip Hazel
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2009 University of Cambridge


 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -5811,28 +5811,28 @@
      NULL, 0, FALSE);
    register int op = *scode;


- /* If we are at the start of a conditional group, skip over the condition.
- before inspecting the first opcode after the condition. */
+ /* If we are at the start of a conditional assertion group, *both* the
+ conditional assertion *and* what follows the condition must satisfy the test
+ for start of line. Other kinds of condition fail. Note that there may be an
+ auto-callout at the start of a condition. */

    if (op == OP_COND)
      {
      scode += 1 + LINK_SIZE; 
+     if (*scode == OP_CALLOUT) scode += _pcre_OP_lengths[OP_CALLOUT];
      switch (*scode)
        {
        case OP_CREF:
        case OP_RREF:
-       scode += 3;
-       break;
-       
        case OP_DEF:
-       scode += 1; 
-       break;
+       return FALSE; 


        default:     /* Assertion */
+       if (!is_startline(scode, bracket_map, backref_map)) return FALSE; 
        do scode += GET(scode, 1); while (*scode == OP_ALT);
+       scode += 1 + LINK_SIZE; 
        break; 
        }  
-
      scode = first_significant_code(scode, NULL, 0, FALSE);
      op = *scode; 
      }  


Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c    2009-03-03 12:32:47 UTC (rev 380)
+++ code/trunk/pcre_exec.c    2009-03-03 16:08:23 UTC (rev 381)
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.


                        Written by Philip Hazel
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2009 University of Cambridge


-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -787,6 +787,34 @@

     case OP_COND:
     case OP_SCOND:
+    /* Because of the way auto-callout works during compile, a callout item is
+    inserted between OP_COND and an assertion condition. */
+ 
+    if (ecode[LINK_SIZE+1] == OP_CALLOUT)
+      {
+      if (pcre_callout != NULL)
+        {
+        pcre_callout_block cb;
+        cb.version          = 1;   /* Version 1 of the callout block */
+        cb.callout_number   = ecode[LINK_SIZE+2];
+        cb.offset_vector    = md->offset_vector;
+        cb.subject          = (PCRE_SPTR)md->start_subject;
+        cb.subject_length   = md->end_subject - md->start_subject;
+        cb.start_match      = mstart - md->start_subject;
+        cb.current_position = eptr - md->start_subject;
+        cb.pattern_position = GET(ecode, LINK_SIZE + 3);
+        cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
+        cb.capture_top      = offset_top/2;
+        cb.capture_last     = md->capture_last;
+        cb.callout_data     = md->callout_data;
+        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
+        if (rrc < 0) RRETURN(rrc);
+        }
+      ecode += _pcre_OP_lengths[OP_CALLOUT];
+      }
+      
+    /* Now see what the actual condition is */
+ 
     if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
       {
       offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/


Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1    2009-03-03 12:32:47 UTC (rev 380)
+++ code/trunk/testdata/testinput1    2009-03-03 16:08:23 UTC (rev 381)
@@ -4055,4 +4055,10 @@
    adc
    abc 


+/(?(?=b).*b|^d)/
+    abc
+
+/(?(?=.*b).*b|^d)/
+    abc
+
 / End of testinput1 /


Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2009-03-03 12:32:47 UTC (rev 380)
+++ code/trunk/testdata/testinput2    2009-03-03 16:08:23 UTC (rev 381)
@@ -2726,4 +2726,12 @@


/(abc|pqr|123){0}[xyz]/SI

+/(?(?=.*b)b|^)/CI
+ adc
+ abc
+
+/(?(?=b).*b|^d)/I
+
+/(?(?=.*b).*b|^d)/I
+
/ End of testinput2 /

Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1    2009-03-03 12:32:47 UTC (rev 380)
+++ code/trunk/testdata/testoutput1    2009-03-03 16:08:23 UTC (rev 381)
@@ -6633,4 +6633,12 @@
    abc 
  0: b


+/(?(?=b).*b|^d)/
+    abc
+ 0: b
+
+/(?(?=.*b).*b|^d)/
+    abc
+ 0: ab
+
 / End of testinput1 /


Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2009-03-03 12:32:47 UTC (rev 380)
+++ code/trunk/testdata/testoutput2    2009-03-03 16:08:23 UTC (rev 381)
@@ -9638,4 +9638,56 @@
 No need char
 Starting byte set: x y z 


+/(?(?=.*b)b|^)/CI
+Capturing subpattern count = 0
+Partial matching not supported
+Options:
+No first char
+No need char
+   adc
+--->adc
+ +0 ^       (?(?=.*b)b|^)
+ +2 ^       (?=.*b)
+ +5 ^       .*
+ +7 ^  ^    b
+ +7 ^ ^     b
+ +7 ^^      b
+ +7 ^       b
+ 0: 
+   abc 
+--->abc
+ +0 ^       (?(?=.*b)b|^)
+ +2 ^       (?=.*b)
+ +5 ^       .*
+ +7 ^  ^    b
+ +7 ^ ^     b
+ +7 ^^      b
+ +8 ^ ^     )
+ +9 ^       b
+ +0  ^      (?(?=.*b)b|^)
+ +2  ^      (?=.*b)
+ +5  ^      .*
+ +7  ^ ^    b
+ +7  ^^     b
+ +7  ^      b
+ +8  ^^     )
+ +9  ^      b
++10  ^^     |
++13  ^^     
+ 0: b
+   
+/(?(?=b).*b|^d)/I
+Capturing subpattern count = 0
+Partial matching not supported
+No options
+No first char
+No need char
+
+/(?(?=.*b).*b|^d)/I
+Capturing subpattern count = 0
+Partial matching not supported
+No options
+First char at start or follows newline
+No need char
+
 / End of testinput2 /