Revision: 432
http://vcs.pcre.org/viewvc?view=rev&revision=432
Author: ph10
Date: 2009-09-02 17:02:56 +0100 (Wed, 02 Sep 2009)
Log Message:
-----------
Add REG_UNGREEDY (non-standard) to the POSIX interface.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/doc/pcreposix.3
code/trunk/pcreposix.c
code/trunk/pcreposix.h
code/trunk/pcretest.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2009-09-02 09:23:21 UTC (rev 431)
+++ code/trunk/ChangeLog 2009-09-02 16:02:56 UTC (rev 432)
@@ -83,6 +83,11 @@
libpcrecpp.pc and pcre-config when PCRE is not compiled as a shared
library.
+15. Added REG_UNGREEDY to the pcreposix interface, at the request of a user.
+ It maps to PCRE_UNGREEDY. It is not, of course, POSIX-compatible, but it
+ is not the first non-POSIX option to be added. Clearly some people find
+ these options useful.
+
Version 7.9 11-Apr-09
---------------------
Modified: code/trunk/doc/pcreposix.3
===================================================================
--- code/trunk/doc/pcreposix.3 2009-09-02 09:23:21 UTC (rev 431)
+++ code/trunk/doc/pcreposix.3 2009-09-02 16:02:56 UTC (rev 432)
@@ -45,6 +45,10 @@
POSIX interface often use it, this makes it easier to slot in PCRE as a
replacement library. Other POSIX options are not even defined.
.P
+There are also some other options that are not defined by POSIX. These have
+been added at the request of users who want to make use of certain
+PCRE-specific features via the POSIX calling interface.
+.P
When PCRE is called via these functions, it is only the API that is POSIX-like
in style. The syntax and semantics of the regular expressions themselves are
still those of Perl, subject to the setting of various PCRE options, as
@@ -97,6 +101,12 @@
\fInmatch\fP and \fIpmatch\fP arguments are ignored, and no captured strings
are returned.
.sp
+ REG_UNGREEDY
+.sp
+The PCRE_UNGREEDY option is set when the regular expression is passed for
+compilation to the native function. Note that REG_UNGREEDY is not part of the
+POSIX standard.
+.sp
REG_UTF8
.sp
The PCRE_UTF8 option is set when the regular expression is passed for
@@ -109,7 +119,7 @@
particular, the way it handles newline characters in the subject string is the
Perl way, not the POSIX way. Note that setting PCRE_MULTILINE has only
\fIsome\fP of the effects specified for REG_NEWLINE. It does not affect the way
-newlines are matched by . (they aren't) or by a negative class such as [^a]
+newlines are matched by . (they are not) or by a negative class such as [^a]
(they are).
.P
The yield of \fBregcomp()\fP is zero on success, and non-zero otherwise. The
@@ -243,6 +253,6 @@
.rs
.sp
.nf
-Last updated: 15 August 2009
+Last updated: 02 September 2009
Copyright (c) 1997-2009 University of Cambridge.
.fi
Modified: code/trunk/pcreposix.c
===================================================================
--- code/trunk/pcreposix.c 2009-09-02 09:23:21 UTC (rev 431)
+++ code/trunk/pcreposix.c 2009-09-02 16:02:56 UTC (rev 432)
@@ -240,11 +240,12 @@
int errorcode;
int options = 0;
-if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS;
-if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE;
-if ((cflags & REG_DOTALL) != 0) options |= PCRE_DOTALL;
-if ((cflags & REG_NOSUB) != 0) options |= PCRE_NO_AUTO_CAPTURE;
-if ((cflags & REG_UTF8) != 0) options |= PCRE_UTF8;
+if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS;
+if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE;
+if ((cflags & REG_DOTALL) != 0) options |= PCRE_DOTALL;
+if ((cflags & REG_NOSUB) != 0) options |= PCRE_NO_AUTO_CAPTURE;
+if ((cflags & REG_UTF8) != 0) options |= PCRE_UTF8;
+if ((cflags & REG_UNGREEDY) != 0) options |= PCRE_UNGREEDY;
preg->re_pcre = pcre_compile2(pattern, options, &errorcode, &errorptr,
&erroffset, NULL);
@@ -299,10 +300,11 @@
((regex_t *)preg)->re_erroffset = (size_t)(-1); /* Only has meaning after compile */
-/* When no string data is being returned, ensure that nmatch is zero.
-Otherwise, ensure the vector for holding the return data is large enough. */
+/* When no string data is being returned, or no vector has been passed in which
+to put it, ensure that nmatch is zero. Otherwise, ensure the vector for holding
+the return data is large enough. */
-if (nosub) nmatch = 0;
+if (nosub || pmatch == NULL) nmatch = 0;
else if (nmatch > 0)
{
Modified: code/trunk/pcreposix.h
===================================================================
--- code/trunk/pcreposix.h 2009-09-02 09:23:21 UTC (rev 431)
+++ code/trunk/pcreposix.h 2009-09-02 16:02:56 UTC (rev 432)
@@ -50,17 +50,18 @@
extern "C" {
#endif
-/* Options, mostly defined by POSIX, but with a couple of extras. */
+/* Options, mostly defined by POSIX, but with some extras. */
-#define REG_ICASE 0x0001
-#define REG_NEWLINE 0x0002
-#define REG_NOTBOL 0x0004
-#define REG_NOTEOL 0x0008
-#define REG_DOTALL 0x0010 /* NOT defined by POSIX. */
-#define REG_NOSUB 0x0020
-#define REG_UTF8 0x0040 /* NOT defined by POSIX. */
+#define REG_ICASE 0x0001 /* Maps to PCRE_CASELESS */
+#define REG_NEWLINE 0x0002 /* Maps to PCRE_MULTILINE */
+#define REG_NOTBOL 0x0004 /* Maps to PCRE_NOTBOL */
+#define REG_NOTEOL 0x0008 /* Maps to PCRE_NOTEOL */
+#define REG_DOTALL 0x0010 /* NOT defined by POSIX; maps to PCRE_DOTALL */
+#define REG_NOSUB 0x0020 /* Maps to PCRE_NO_AUTO_CAPTURE */
+#define REG_UTF8 0x0040 /* NOT defined by POSIX; maps to PCRE_UTF8 */
#define REG_STARTEND 0x0080 /* BSD feature: pass subject string by so,eo */
-#define REG_NOTEMPTY 0x0100 /* NOT defined by POSIX. */
+#define REG_NOTEMPTY 0x0100 /* NOT defined by POSIX; maps to PCRE_NOTEMPTY */
+#define REG_UNGREEDY 0x0200 /* NOT defined by POSIX; maps to PCRE_UNGREEDY */
/* This is not used by PCRE, but by defining it we make it easier
to slot PCRE into existing programs that make POSIX calls. */
Modified: code/trunk/pcretest.c
===================================================================
--- code/trunk/pcretest.c 2009-09-02 09:23:21 UTC (rev 431)
+++ code/trunk/pcretest.c 2009-09-02 16:02:56 UTC (rev 432)
@@ -1305,6 +1305,7 @@
if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
+ if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
rc = regcomp(&preg, (char *)p, cflags);
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2009-09-02 09:23:21 UTC (rev 431)
+++ code/trunk/testdata/testinput2 2009-09-02 16:02:56 UTC (rev 432)
@@ -123,38 +123,38 @@
defabc
\Zdefabc
-/abc/IP
+/abc/P
abc
*** Failers
-/^abc|def/IP
+/^abc|def/P
abcdef
abcdef\B
-/.*((abc)$|(def))/IP
+/.*((abc)$|(def))/P
defabc
\Zdefabc
-/the quick brown fox/IP
+/the quick brown fox/P
the quick brown fox
*** Failers
The Quick Brown Fox
-/the quick brown fox/IPi
+/the quick brown fox/Pi
the quick brown fox
The Quick Brown Fox
-/abc.def/IP
+/abc.def/P
*** Failers
abc\ndef
-/abc$/IP
+/abc$/P
abc
abc\n
-/(abc)\2/IP
+/(abc)\2/P
-/(abc\1)/IP
+/(abc\1)/P
abc
/)/
@@ -593,7 +593,7 @@
*** Failers
\Nabc
-/a*(b+)(z)(z)/IP
+/a*(b+)(z)(z)/P
aaaabbbbzzzz
aaaabbbbzzzz\O0
aaaabbbbzzzz\O1
@@ -1419,13 +1419,13 @@
** Failers
line one\nthis is a line\nbreak in the second line
-/ab.cd/IP
+/ab.cd/P
ab-cd
ab=cd
** Failers
ab\ncd
-/ab.cd/IPs
+/ab.cd/Ps
ab-cd
ab=cd
ab\ncd
@@ -1480,10 +1480,10 @@
(this)
((this))
-/a(b)c/IPN
+/a(b)c/PN
abc
-/a(?P<name>b)c/IPN
+/a(?P<name>b)c/PN
abc
/\x{100}/I
@@ -2935,5 +2935,11 @@
/abc/
abc\P
abc\P\P
+
+/\w+A/P
+ CDAAAAB
+/\w+A/PU
+ CDAAAAB
+
/ End of testinput2 /
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2009-09-02 09:23:21 UTC (rev 431)
+++ code/trunk/testdata/testoutput2 2009-09-02 16:02:56 UTC (rev 432)
@@ -331,19 +331,19 @@
2: <unset>
3: def
-/abc/IP
+/abc/P
abc
0: abc
*** Failers
No match: POSIX code 17: match failed
-/^abc|def/IP
+/^abc|def/P
abcdef
0: abc
abcdef\B
0: def
-/.*((abc)$|(def))/IP
+/.*((abc)$|(def))/P
defabc
0: defabc
1: abc
@@ -353,7 +353,7 @@
1: def
3: def
-/the quick brown fox/IP
+/the quick brown fox/P
the quick brown fox
0: the quick brown fox
*** Failers
@@ -361,28 +361,28 @@
The Quick Brown Fox
No match: POSIX code 17: match failed
-/the quick brown fox/IPi
+/the quick brown fox/Pi
the quick brown fox
0: the quick brown fox
The Quick Brown Fox
0: The Quick Brown Fox
-/abc.def/IP
+/abc.def/P
*** Failers
No match: POSIX code 17: match failed
abc\ndef
No match: POSIX code 17: match failed
-/abc$/IP
+/abc$/P
abc
0: abc
abc\n
0: abc
-/(abc)\2/IP
+/(abc)\2/P
Failed: POSIX code 15: bad back reference at offset 7
-/(abc\1)/IP
+/(abc\1)/P
abc
No match: POSIX code 17: match failed
@@ -1615,7 +1615,7 @@
\Nabc
No match
-/a*(b+)(z)(z)/IP
+/a*(b+)(z)(z)/P
aaaabbbbzzzz
0: aaaabbbbzz
1: bbbb
@@ -5548,7 +5548,7 @@
line one\nthis is a line\nbreak in the second line
No match
-/ab.cd/IP
+/ab.cd/P
ab-cd
0: ab-cd
ab=cd
@@ -5558,7 +5558,7 @@
ab\ncd
No match: POSIX code 17: match failed
-/ab.cd/IPs
+/ab.cd/Ps
ab-cd
0: ab-cd
ab=cd
@@ -5858,11 +5858,11 @@
((this))
0: ((this))
-/a(b)c/IPN
+/a(b)c/PN
abc
Matched with REG_NOSUB
-/a(?P<name>b)c/IPN
+/a(?P<name>b)c/PN
abc
Matched with REG_NOSUB
@@ -9963,5 +9963,13 @@
0: abc
abc\P\P
0: abc
+
+/\w+A/P
+ CDAAAAB
+ 0: CDAAAA
+/\w+A/PU
+ CDAAAAB
+ 0: CDA
+
/ End of testinput2 /