[Pcre-svn] [1480] code/trunk: Fix auto-possessification bug.

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [1480] code/trunk: Fix auto-possessification bug.
Revision: 1480
          http://vcs.pcre.org/viewvc?view=rev&revision=1480
Author:   ph10
Date:     2014-05-27 19:02:51 +0100 (Tue, 27 May 2014)


Log Message:
-----------
Fix auto-possessification bug.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_compile.c
    code/trunk/testdata/testinput1
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput1
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2014-05-27 16:30:07 UTC (rev 1479)
+++ code/trunk/ChangeLog    2014-05-27 18:02:51 UTC (rev 1480)
@@ -40,6 +40,9 @@
     level, when possessive repeated groups should always return to a higher
     level as they have no backtrack points in them. The empty string test now
     occurs at the outer level.
+    
+8.  Fixed a bug that was incorrectly auto-possessifying \w+ in the pattern
+    ^\w+(?>\s*)(?<=\w) which caused it not to match "test test". 



Version 8.35 04-April-2014

Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c    2014-05-27 16:30:07 UTC (rev 1479)
+++ code/trunk/pcre_compile.c    2014-05-27 18:02:51 UTC (rev 1480)
@@ -3076,6 +3076,7 @@
 const pcre_uint8 *set1, *set2, *set_end;
 pcre_uint32 chr;
 BOOL accepted, invert_bits;
+BOOL entered_a_group = FALSE;


/* Note: the base_list[1] contains whether the current opcode has greedy
(represented by a non-zero value) quantifier. This is a different from
@@ -3088,7 +3089,7 @@
Therefore infinite recursions are not possible. */

c = *code;
-
+
/* Skip over callouts */

   if (c == OP_CALLOUT)
@@ -3117,7 +3118,7 @@
     /* If the bracket is capturing, and referenced by an OP_RECURSE, or
     it is an atomic sub-pattern (assert, once, etc.) the non-greedy case
     cannot be converted to a possessive form. */
-
+    
     if (base_list[1] == 0) return FALSE;


     switch(*(code - GET(code, 1)))
@@ -3129,8 +3130,10 @@
       case OP_ONCE:
       case OP_ONCE_NC:
       /* Atomic sub-patterns and assertions can always auto-possessify their
-      last iterator. */
-      return TRUE;
+      last iterator. However, if the group was entered as a result of checking 
+      a previous iterator, this is not possible. */
+
+      return !entered_a_group;
       }


     code += PRIV(OP_lengths)[c];
@@ -3149,6 +3152,8 @@
       code = next_code + 1 + LINK_SIZE;
       next_code += GET(next_code, 1);
       }
+
+    entered_a_group = TRUE;
     continue;


     case OP_BRAZERO:
@@ -3168,13 +3173,16 @@


     code += PRIV(OP_lengths)[c];
     continue;
+
+    default:
+    break; 
     }


/* Check for a supported opcode, and load its properties. */

   code = get_chr_property_list(code, utf, cd->fcc, list);
   if (code == NULL) return FALSE;    /* Unsupported */
-
+  
   /* If either opcode is a small character list, set pointers for comparing
   characters from that list with another list, or with a property. */


@@ -3406,9 +3414,8 @@
            rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
            autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];


-    if (!accepted)
-      return FALSE;
-
+    if (!accepted) return FALSE;
+      
     if (list[1] == 0) return TRUE;
     /* Might be an empty repeat. */
     continue;


Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1    2014-05-27 16:30:07 UTC (rev 1479)
+++ code/trunk/testdata/testinput1    2014-05-27 18:02:51 UTC (rev 1480)
@@ -5681,4 +5681,7 @@
 '\A([^\"1]++|[\"2]([^\"3]*+|[\"4][\"5])*+[\"6])++'
     NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED


+/^\w+(?>\s*)(?<=\w)/
+ test test
+
/-- End of testinput1 --/

Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2014-05-27 16:30:07 UTC (rev 1479)
+++ code/trunk/testdata/testinput2    2014-05-27 18:02:51 UTC (rev 1480)
@@ -4062,4 +4062,6 @@


/(((((a)))))/Q

+/^\w+(?>\s*)(?<=\w)/BZ
+
/-- End of testinput2 --/

Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1    2014-05-27 16:30:07 UTC (rev 1479)
+++ code/trunk/testdata/testoutput1    2014-05-27 18:02:51 UTC (rev 1480)
@@ -9335,4 +9335,8 @@
  1:  AFTER 
  2: 


+/^\w+(?>\s*)(?<=\w)/
+ test test
+ 0: tes
+
/-- End of testinput1 --/

Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2014-05-27 16:30:07 UTC (rev 1479)
+++ code/trunk/testdata/testoutput2    2014-05-27 18:02:51 UTC (rev 1480)
@@ -14149,4 +14149,20 @@
 /(((((a)))))/Q
 ** Missing 0 or 1 after /Q


+/^\w+(?>\s*)(?<=\w)/BZ
+------------------------------------------------------------------
+        Bra
+        ^
+        \w+
+        Once_NC
+        \s*+
+        Ket
+        AssertB
+        Reverse
+        \w
+        Ket
+        Ket
+        End
+------------------------------------------------------------------
+
 /-- End of testinput2 --/