Revision: 744
http://vcs.pcre.org/viewvc?view=rev&revision=744
Author: zherczeg
Date: 2011-11-13 16:31:38 +0000 (Sun, 13 Nov 2011)
Log Message:
-----------
Correctly supporting \x and \u in JavaScript compatibility mode
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/pcre_compile.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2011-11-08 09:59:38 UTC (rev 743)
+++ code/trunk/ChangeLog 2011-11-13 16:31:38 UTC (rev 744)
@@ -12,12 +12,15 @@
3. Fix cache-flush issue on PowerPC (It is still an experimental JIT port).
PCRE_EXTRA_TABLES is not suported by JIT, and should be checked before
calling _pcre_jit_exec. Some extra comments are added.
-
-4. Mark settings inside atomic groups that do not contain any capturing
- parentheses, for example, (?>a(*:m)), were not being passed out. This bug
+
+4. Mark settings inside atomic groups that do not contain any capturing
+ parentheses, for example, (?>a(*:m)), were not being passed out. This bug
was introduced by change 18 for 8.20.
+5. Supporting of \x and \u in JavaScript compatibility mode based on the
+ ECMA-262 standard.
+
Version 8.20 21-Oct-2011
------------------------
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2011-11-08 09:59:38 UTC (rev 743)
+++ code/trunk/pcre_compile.c 2011-11-13 16:31:38 UTC (rev 744)
@@ -676,9 +676,39 @@
case CHAR_l:
case CHAR_L:
+ *errorcodeptr = ERR37;
+ break;
+
case CHAR_u:
+ if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
+ {
+ /* In JavaScript, \u must be followed by four hexadecimal numbers.
+ Otherwise it is a lowercase u letter. */
+ if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0
+ && (digitab[ptr[3]] & ctype_xdigit) != 0 && (digitab[ptr[4]] & ctype_xdigit) != 0)
+ {
+ int i;
+ c = 0;
+ for (i = 0; i < 4; ++i)
+ {
+ register int cc = *(++ptr);
+#ifndef EBCDIC /* ASCII/UTF-8 coding */
+ if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */
+ c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
+#else /* EBCDIC coding */
+ if (cc >= CHAR_a && cc <= CHAR_z) cc += 64; /* Convert to upper case */
+ c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
+#endif
+ }
+ }
+ }
+ else
+ *errorcodeptr = ERR37;
+ break;
+
case CHAR_U:
- *errorcodeptr = ERR37;
+ /* In JavaScript, \U is an uppercase U letter. */
+ if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) *errorcodeptr = ERR37;
break;
/* In a character class, \g is just a literal "g". Outside a character
@@ -828,6 +858,29 @@
treated as a data character. */
case CHAR_x:
+ if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
+ {
+ /* In JavaScript, \x must be followed by two hexadecimal numbers.
+ Otherwise it is a lowercase x letter. */
+ if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0)
+ {
+ int i;
+ c = 0;
+ for (i = 0; i < 2; ++i)
+ {
+ register int cc = *(++ptr);
+#ifndef EBCDIC /* ASCII/UTF-8 coding */
+ if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */
+ c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
+#else /* EBCDIC coding */
+ if (cc >= CHAR_a && cc <= CHAR_z) cc += 64; /* Convert to upper case */
+ c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
+#endif
+ }
+ }
+ break;
+ }
+
if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
{
const uschar *pt = ptr + 2;
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2011-11-08 09:59:38 UTC (rev 743)
+++ code/trunk/testdata/testinput2 2011-11-13 16:31:38 UTC (rev 744)
@@ -3969,4 +3969,38 @@
/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/
\Maabbccddee
+/^a\x41z/<JS>
+ aAz
+ *** Failers
+ ax41z
+
+/^a[m\x41]z/<JS>
+ aAz
+
+/^a\x1z/<JS>
+ ax1z
+
+/^a\X41z/<JS>
+ aX41z
+ *** Failers
+ aAz
+
+/^a\u0041z/<JS>
+ aAz
+ *** Failers
+ au0041z
+
+/^a[m\u0041]z/<JS>
+ aAz
+
+/^a\u041z/<JS>
+ au041z
+ *** Failers
+ aAz
+
+/^a\U0041z/<JS>
+ aU0041z
+ *** Failers
+ aAz
+
/-- End of testinput2 --/
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2011-11-08 09:59:38 UTC (rev 743)
+++ code/trunk/testdata/testoutput2 2011-11-13 16:31:38 UTC (rev 744)
@@ -12502,4 +12502,56 @@
2: cc
3: ee
+/^a\x41z/<JS>
+ aAz
+ 0: aAz
+ *** Failers
+No match
+ ax41z
+No match
+
+/^a[m\x41]z/<JS>
+ aAz
+ 0: aAz
+
+/^a\x1z/<JS>
+ ax1z
+ 0: ax1z
+
+/^a\X41z/<JS>
+ aX41z
+ 0: aX41z
+ *** Failers
+No match
+ aAz
+No match
+
+/^a\u0041z/<JS>
+ aAz
+ 0: aAz
+ *** Failers
+No match
+ au0041z
+No match
+
+/^a[m\u0041]z/<JS>
+ aAz
+ 0: aAz
+
+/^a\u041z/<JS>
+ au041z
+ 0: au041z
+ *** Failers
+No match
+ aAz
+No match
+
+/^a\U0041z/<JS>
+ aU0041z
+ 0: aU0041z
+ *** Failers
+No match
+ aAz
+No match
+
/-- End of testinput2 --/