Revision: 764
http://www.exim.org/viewvc/pcre2?view=rev&revision=764
Author: ph10
Date: 2017-04-22 15:35:14 +0100 (Sat, 22 Apr 2017)
Log Message:
-----------
Add a fancy test for multiple named subpatterns.
Modified Paths:
--------------
code/trunk/testdata/testinput1
code/trunk/testdata/testoutput1
Modified: code/trunk/testdata/testinput1
===================================================================
--- code/trunk/testdata/testinput1 2017-04-21 16:30:18 UTC (rev 763)
+++ code/trunk/testdata/testinput1 2017-04-22 14:35:14 UTC (rev 764)
@@ -5920,4 +5920,65 @@
/^(?1)\d{3}(a)/
a123a
+# This pattern uses a lot of named subpatterns in order to match email
+# addresses in various formats. It's a heavy test for named subpatterns. In the
+# <atext> group, slash is coded as \x{2f} so that this pattern can also be
+# processed by perltest.sh, which does not cater for an escaped delimiter
+# within the pattern. All $ and @ characters in subject strings are escaped so
+# that Perl doesn't interpret them as variable insertions and " characters must
+# also be escaped for Perl.
+
+# This set of subpatterns is more or less a direct transliteration of the BNF
+# definitions in RFC2822, without any of the obsolete features. The addition of
+# a possessive + to the definition of <phrase> reduced the match limit in PCRE2
+# from over 5 million to just under 400, and eliminated a very noticeable delay
+# when this file was passed to perltest.sh.
+
+/(?ix)(?(DEFINE)
+(?<addr_spec> (?&local_part) \@ (?&domain) )
+(?<angle_addr> (?&CFWS)?+ < (?&addr_spec) > (?&CFWS)?+ )
+(?<atext> [a-z\d!#$%&'*+-\x{2f}=?^_`{|}~] )
+(?<atom> (?&CFWS)?+ (?&atext)+ (?&CFWS)?+ )
+(?<ccontent> (?&ctext) | (?"ed_pair) | (?&comment) )
+(?<ctext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ ()\\] )
+(?<comment> \( (?: (?&FWS)?+ (?&ccontent) )*+ (?&FWS)?+ \) )
+(?<CFWS> (?: (?&FWS)?+ (?&comment) )* (?# NOT possessive)
+ (?: (?&FWS)?+ (?&comment) | (?&FWS) ) )
+(?<dcontent> (?&dtext) | (?"ed_pair) )
+(?<display_name> (?&phrase) )
+(?<domain> (?&dot_atom) | (?&domain_literal) )
+(?<domain_literal> (?&CFWS)?+ \[ (?: (?&FWS)?+ (?&dcontent) )* (?&FWS)?+ \]
+ (?&CFWS)?+ )
+(?<dot_atom> (?&CFWS)?+ (?&dot_atom_text) (?&CFWS)?+ )
+(?<dot_atom_text> (?&atext)++ (?: \. (?&atext)++)*+ )
+(?<dtext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ \[\]\\] )
+(?<FWS> (?: [\t\ ]*+ \n)?+ [\t\ ]++ )
+(?<local_part> (?&dot_atom) | (?"ed_string) )
+(?<mailbox> (?&name_addr) | (?&addr_spec) )
+(?<name_addr> (?&display_name)? (?&angle_addr) )
+(?<phrase> (?&word)++ )
+(?<qcontent> (?&qtext) | (?"ed_pair) )
+(?<quoted_pair> " (?&text) )
+(?<quoted_string> (?&CFWS)?+ " (?: (?&FWS)?+ (?&qcontent))* (?&FWS)?+ "
+ (?&CFWS)?+ )
+(?<qtext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ "\\] )
+(?<text> [^\r\n] )
+(?<word> (?&atom) | (?"ed_string) )
+) # End DEFINE
+^(?&mailbox)$/
+ Alan Other <user\@dom.ain>
+ <user\@dom.ain>
+ user\@dom.ain
+ user\@[]
+ user\@[domain literal]
+ user\@[domain literal with \"[square brackets\"] inside]
+ \"A. Other\" <user.1234\@dom.ain> (a comment)
+ A. Other <user.1234\@dom.ain> (a comment)
+ \"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay
+\= Expect no match
+ A missing angle <user\@some.where
+ The quick brown fox
+
+# --------------------------------------------------------------------------
+
# End of testinput1
Modified: code/trunk/testdata/testoutput1
===================================================================
--- code/trunk/testdata/testoutput1 2017-04-21 16:30:18 UTC (rev 763)
+++ code/trunk/testdata/testoutput1 2017-04-22 14:35:14 UTC (rev 764)
@@ -9492,4 +9492,76 @@
0: a123a
1: a
+# This pattern uses a lot of named subpatterns in order to match email
+# addresses in various formats. It's a heavy test for named subpatterns. In the
+# <atext> group, slash is coded as \x{2f} so that this pattern can also be
+# processed by perltest.sh, which does not cater for an escaped delimiter
+# within the pattern. All $ and @ characters in subject strings are escaped so
+# that Perl doesn't interpret them as variable insertions and " characters must
+# also be escaped for Perl.
+
+# This set of subpatterns is more or less a direct transliteration of the BNF
+# definitions in RFC2822, without any of the obsolete features. The addition of
+# a possessive + to the definition of <phrase> reduced the match limit in PCRE2
+# from over 5 million to just under 400, and eliminated a very noticeable delay
+# when this file was passed to perltest.sh.
+
+/(?ix)(?(DEFINE)
+(?<addr_spec> (?&local_part) \@ (?&domain) )
+(?<angle_addr> (?&CFWS)?+ < (?&addr_spec) > (?&CFWS)?+ )
+(?<atext> [a-z\d!#$%&'*+-\x{2f}=?^_`{|}~] )
+(?<atom> (?&CFWS)?+ (?&atext)+ (?&CFWS)?+ )
+(?<ccontent> (?&ctext) | (?"ed_pair) | (?&comment) )
+(?<ctext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ ()\\] )
+(?<comment> \( (?: (?&FWS)?+ (?&ccontent) )*+ (?&FWS)?+ \) )
+(?<CFWS> (?: (?&FWS)?+ (?&comment) )* (?# NOT possessive)
+ (?: (?&FWS)?+ (?&comment) | (?&FWS) ) )
+(?<dcontent> (?&dtext) | (?"ed_pair) )
+(?<display_name> (?&phrase) )
+(?<domain> (?&dot_atom) | (?&domain_literal) )
+(?<domain_literal> (?&CFWS)?+ \[ (?: (?&FWS)?+ (?&dcontent) )* (?&FWS)?+ \]
+ (?&CFWS)?+ )
+(?<dot_atom> (?&CFWS)?+ (?&dot_atom_text) (?&CFWS)?+ )
+(?<dot_atom_text> (?&atext)++ (?: \. (?&atext)++)*+ )
+(?<dtext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ \[\]\\] )
+(?<FWS> (?: [\t\ ]*+ \n)?+ [\t\ ]++ )
+(?<local_part> (?&dot_atom) | (?"ed_string) )
+(?<mailbox> (?&name_addr) | (?&addr_spec) )
+(?<name_addr> (?&display_name)? (?&angle_addr) )
+(?<phrase> (?&word)++ )
+(?<qcontent> (?&qtext) | (?"ed_pair) )
+(?<quoted_pair> " (?&text) )
+(?<quoted_string> (?&CFWS)?+ " (?: (?&FWS)?+ (?&qcontent))* (?&FWS)?+ "
+ (?&CFWS)?+ )
+(?<qtext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ "\\] )
+(?<text> [^\r\n] )
+(?<word> (?&atom) | (?"ed_string) )
+) # End DEFINE
+^(?&mailbox)$/
+ Alan Other <user\@dom.ain>
+ 0: Alan Other <user@???>
+ <user\@dom.ain>
+ 0: <user@???>
+ user\@dom.ain
+ 0: user@???
+ user\@[]
+ 0: user@[]
+ user\@[domain literal]
+ 0: user@[domain literal]
+ user\@[domain literal with \"[square brackets\"] inside]
+ 0: user@[domain literal with "[square brackets"] inside]
+ \"A. Other\" <user.1234\@dom.ain> (a comment)
+ 0: "A. Other" <user.1234@???> (a comment)
+ A. Other <user.1234\@dom.ain> (a comment)
+ 0: A. Other <user.1234@???> (a comment)
+ \"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay
+ 0: "/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/"@???
+\= Expect no match
+ A missing angle <user\@some.where
+No match
+ The quick brown fox
+No match
+
+# --------------------------------------------------------------------------
+
# End of testinput1