Revision: 1486
http://vcs.pcre.org/viewvc?view=rev&revision=1486
Author: ph10
Date: 2014-06-18 17:48:57 +0100 (Wed, 18 Jun 2014)
Log Message:
-----------
Fix not including VT in starting characters for \s.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_study.c
code/trunk/testdata/testinput1
code/trunk/testdata/testoutput1
code/trunk/testdata/testoutput15
code/trunk/testdata/testoutput18-16
code/trunk/testdata/testoutput18-32
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2014-06-18 16:31:32 UTC (rev 1485)
+++ code/trunk/ChangeLog 2014-06-18 16:48:57 UTC (rev 1486)
@@ -64,6 +64,10 @@
12. A pattern such as /^s?c/mi8 where the optional character has more than
one "other case" was incorrectly compiled such that it would only try to
match starting at "c".
+
+13. When a pattern starting with \s was studied, VT was not included in the
+ list of possible starting characters; this should have been part of the
+ 8.34/18 patch.
Version 8.35 04-April-2014
Modified: code/trunk/pcre_study.c
===================================================================
--- code/trunk/pcre_study.c 2014-06-18 16:31:32 UTC (rev 1485)
+++ code/trunk/pcre_study.c 2014-06-18 16:48:57 UTC (rev 1486)
@@ -1106,24 +1106,17 @@
try_next = FALSE;
break;
- /* The cbit_space table has vertical tab as whitespace; we have to
- ensure it is set as not whitespace. Luckily, the code value is the same
- (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate bit. */
+ /* The cbit_space table has vertical tab as whitespace; we no longer
+ have to play fancy tricks because Perl added VT to its whitespace at
+ release 5.18. PCRE added it at release 8.34. */
case OP_NOT_WHITESPACE:
set_nottype_bits(start_bits, cbit_space, table_limit, cd);
- start_bits[1] |= 0x08;
try_next = FALSE;
break;
- /* The cbit_space table has vertical tab as whitespace; we have to not
- set it from the table. Luckily, the code value is the same (0x0b) in
- ASCII and EBCDIC, so we can just adjust the appropriate bit. */
-
case OP_WHITESPACE:
- c = start_bits[1]; /* Save in case it was already set */
set_type_bits(start_bits, cbit_space, table_limit, cd);
- start_bits[1] = (start_bits[1] & ~0x08) | c;
try_next = FALSE;
break;
Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1 2014-06-18 16:31:32 UTC (rev 1485)
+++ code/trunk/testdata/testinput1 2014-06-18 16:48:57 UTC (rev 1486)
@@ -5705,4 +5705,7 @@
adl
bcl
+/\sabc/
+ \x{0b}abc
+
/-- End of testinput1 --/
Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1 2014-06-18 16:31:32 UTC (rev 1485)
+++ code/trunk/testdata/testoutput1 2014-06-18 16:48:57 UTC (rev 1486)
@@ -9389,4 +9389,8 @@
bcl
0: l
+/\sabc/
+ \x{0b}abc
+ 0: \x0babc
+
/-- End of testinput1 --/
Modified: code/trunk/testdata/testoutput15
===================================================================
--- code/trunk/testdata/testoutput15 2014-06-18 16:31:32 UTC (rev 1485)
+++ code/trunk/testdata/testoutput15 2014-06-18 16:48:57 UTC (rev 1486)
@@ -871,7 +871,7 @@
No first char
Need char = 'x'
Subject length lower bound = 5
-Starting chars: \x09 \x0a \x0c \x0d \x20 \xc2
+Starting chars: \x09 \x0a \x0b \x0c \x0d \x20 \xc2
AB\x{85}xxx\x{a0}XYZ
0: \x{85}xxx\x{a0}
AB\x{a0}xxx\x{85}XYZ
@@ -883,15 +883,15 @@
No first char
Need char = ' '
Subject length lower bound = 3
-Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e
- \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d
- \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @
- A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e
- f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3
- \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2
- \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1
- \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0
- \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
+Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
+ \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
+ \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
+ D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
+ i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4
+ \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3
+ \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2
+ \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1
+ \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
\x{a2} \x{84}
0: \x{a2} \x{84}
A Z
Modified: code/trunk/testdata/testoutput18-16
===================================================================
--- code/trunk/testdata/testoutput18-16 2014-06-18 16:31:32 UTC (rev 1485)
+++ code/trunk/testdata/testoutput18-16 2014-06-18 16:48:57 UTC (rev 1486)
@@ -752,7 +752,7 @@
No first char
Need char = 'x'
Subject length lower bound = 5
-Starting chars: \x09 \x0a \x0c \x0d \x20 \x85 \xa0
+Starting chars: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0
AB\x{85}xxx\x{a0}XYZ
0: \x{85}xxx\x{a0}
AB\x{a0}xxx\x{85}XYZ
@@ -764,20 +764,20 @@
No first char
Need char = ' '
Subject length lower bound = 3
-Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e
- \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d
- \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @
- A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e
- f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83
- \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93
- \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3
- \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2
- \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1
- \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0
- \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf
- \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee
- \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd
- \xfe \xff
+Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
+ \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
+ \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
+ D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
+ i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84
+ \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94
+ \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4
+ \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
+ \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
+ \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
+ \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
+ \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
+ \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
+ \xff
\x{a2} \x{84}
0: \x{a2} \x{84}
A Z
Modified: code/trunk/testdata/testoutput18-32
===================================================================
--- code/trunk/testdata/testoutput18-32 2014-06-18 16:31:32 UTC (rev 1485)
+++ code/trunk/testdata/testoutput18-32 2014-06-18 16:48:57 UTC (rev 1486)
@@ -749,7 +749,7 @@
No first char
Need char = 'x'
Subject length lower bound = 5
-Starting chars: \x09 \x0a \x0c \x0d \x20 \x85 \xa0
+Starting chars: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0
AB\x{85}xxx\x{a0}XYZ
0: \x{85}xxx\x{a0}
AB\x{a0}xxx\x{85}XYZ
@@ -761,20 +761,20 @@
No first char
Need char = ' '
Subject length lower bound = 3
-Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e
- \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d
- \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @
- A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e
- f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83
- \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93
- \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3
- \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2
- \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1
- \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0
- \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf
- \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee
- \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd
- \xfe \xff
+Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
+ \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
+ \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
+ D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
+ i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84
+ \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94
+ \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4
+ \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
+ \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
+ \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
+ \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
+ \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
+ \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
+ \xff
\x{a2} \x{84}
0: \x{a2} \x{84}
A Z