[Pcre-svn] [528] code/trunk: Add knowledge of \R to auto-pos…

Página Inicial
Delete this message
Autor: Subversion repository
Data:  
Para: pcre-svn
Assunto: [Pcre-svn] [528] code/trunk: Add knowledge of \R to auto-possessify feature.
Revision: 528
          http://vcs.pcre.org/viewvc?view=rev&revision=528
Author:   ph10
Date:     2010-05-29 17:40:22 +0100 (Sat, 29 May 2010)


Log Message:
-----------
Add knowledge of \R to auto-possessify feature.

Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/pcre_compile.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2010-05-29 15:50:39 UTC (rev 527)
+++ code/trunk/ChangeLog    2010-05-29 16:40:22 UTC (rev 528)
@@ -57,6 +57,8 @@


 14. pcre_study() now recognizes \h, \v, and \R when constructing a bit map of 
     possible starting bytes for non-anchored patterns. 
+    
+15. The "auto-possessify" feature of pcre_compile() now recognizes \R. 





Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c    2010-05-29 15:50:39 UTC (rev 527)
+++ code/trunk/pcre_compile.c    2010-05-29 16:40:22 UTC (rev 528)
@@ -2544,6 +2544,9 @@
   else
 #endif  /* SUPPORT_UTF8 */
   return (item == cd->fcc[next]);  /* Non-UTF-8 mode */
+  
+  /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set. 
+  When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ 


   case OP_DIGIT:
   return next > 127 || (cd->ctypes[next] & ctype_digit) == 0;
@@ -2586,11 +2589,12 @@
     case 0x202f:
     case 0x205f:
     case 0x3000:
-    return op_code != OP_HSPACE;
+    return op_code == OP_NOT_HSPACE;
     default:
-    return op_code == OP_HSPACE;
+    return op_code != OP_NOT_HSPACE;
     }


+  case OP_ANYNL:
   case OP_VSPACE:
   case OP_NOT_VSPACE:
   switch(next)
@@ -2602,9 +2606,9 @@
     case 0x85:
     case 0x2028:
     case 0x2029:
-    return op_code != OP_VSPACE;
+    return op_code == OP_NOT_VSPACE;
     default:
-    return op_code == OP_VSPACE;
+    return op_code != OP_NOT_VSPACE;
     }


default:
@@ -2612,7 +2616,10 @@
}


-/* Handle the case when the next item is \d, \s, etc. */
+/* Handle the case when the next item is \d, \s, etc. Note that when PCRE_UCP
+is set, \d turns into ESC_du rather than ESC_d, etc., so ESC_d etc. are
+generated only when PCRE_UCP is *not* set, that is, when only ASCII
+characteristics are recognized. */

switch(op_code)
{
@@ -2691,32 +2698,35 @@

   case OP_DIGIT:
   return next == -ESC_D || next == -ESC_s || next == -ESC_W ||
-         next == -ESC_h || next == -ESC_v;
+         next == -ESC_h || next == -ESC_v || next == -ESC_R;


case OP_NOT_DIGIT:
return next == -ESC_d;

case OP_WHITESPACE:
- return next == -ESC_S || next == -ESC_d || next == -ESC_w;
+ return next == -ESC_S || next == -ESC_d || next == -ESC_w || next == -ESC_R;

case OP_NOT_WHITESPACE:
return next == -ESC_s || next == -ESC_h || next == -ESC_v;

   case OP_HSPACE:
-  return next == -ESC_S || next == -ESC_H || next == -ESC_d || next == -ESC_w;
+  return next == -ESC_S || next == -ESC_H || next == -ESC_d || 
+         next == -ESC_w || next == -ESC_v || next == -ESC_R;


case OP_NOT_HSPACE:
return next == -ESC_h;

/* Can't have \S in here because VT matches \S (Perl anomaly) */
+ case OP_ANYNL:
case OP_VSPACE:
return next == -ESC_V || next == -ESC_d || next == -ESC_w;

case OP_NOT_VSPACE:
- return next == -ESC_v;
+ return next == -ESC_v || next == -ESC_R;

   case OP_WORDCHAR:
-  return next == -ESC_W || next == -ESC_s || next == -ESC_h || next == -ESC_v;
+  return next == -ESC_W || next == -ESC_s || next == -ESC_h || 
+         next == -ESC_v || next == -ESC_R;


case OP_NOT_WORDCHAR:
return next == -ESC_w || next == -ESC_d;

Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2010-05-29 15:50:39 UTC (rev 527)
+++ code/trunk/testdata/testinput2    2010-05-29 16:40:22 UTC (rev 528)
@@ -3481,4 +3481,14 @@
   ** Failers
   A\r\nB    


+/\R+b/BZ
+
+/\R+\n/BZ
+
+/\R+\d/BZ
+
+/\d*\R/BZ
+
+/\s*\R/BZ
+
/-- End of testinput2 --/

Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2010-05-29 15:50:39 UTC (rev 527)
+++ code/trunk/testdata/testoutput2    2010-05-29 16:40:22 UTC (rev 528)
@@ -11081,4 +11081,49 @@
   A\r\nB    
 No match


+/\R+b/BZ
+------------------------------------------------------------------
+        Bra
+        \R++
+        b
+        Ket
+        End
+------------------------------------------------------------------
+
+/\R+\n/BZ
+------------------------------------------------------------------
+        Bra
+        \R+
+        \x0a
+        Ket
+        End
+------------------------------------------------------------------
+
+/\R+\d/BZ
+------------------------------------------------------------------
+        Bra
+        \R++
+        \d
+        Ket
+        End
+------------------------------------------------------------------
+
+/\d*\R/BZ
+------------------------------------------------------------------
+        Bra
+        \d*+
+        \R
+        Ket
+        End
+------------------------------------------------------------------
+
+/\s*\R/BZ
+------------------------------------------------------------------
+        Bra
+        \s*+
+        \R
+        Ket
+        End
+------------------------------------------------------------------
+
 /-- End of testinput2 --/