Hello,
I noticed that PCRE_JCHANGED can be set even though the option 'J'
does not actually change. For example, in these cases:
/(?J)(?P<A>a)/ ... a global change at the beginning of the pattern
/x(?-J:(?P<A>a))/ ... J is unset al the time
/x(?J:(?P<A>a))/J ... J is set al the time
/x(?J:y)z(?P<A>a)/ ... J does not affect any named subpattern
Actually, I think that ``J changed'' is not exactly the information
which is needed. Function get_first_set can perform its
optimization, unless a subpattern name was used more than once.
Let's call this bit PCRE_DUPNAMEUSED; it's easy to trace this info.
So I wrote a patch which replaces PCRE_JCHANGED by PCRE_DUPNAMEUSED.
But then I discovered that PCRE_INFO_JCHANGED is part of the
documented interface. That's unfortunate, and even the 7.4 changelog
shows that the definition of this bit differs from what would be
suitable internally (cf. first of my examples above).
So I discarded my patch, and replaced it by a much smaller one.
Attached please find it; it passes "make check".
Aren't my comments too cheeky?
Wouldn't it be possible to admit that PCRE_INFO_JCHANGED was a bad
idea and remove it from the documented interface? It's not that long
since it was published, after all.
(I'm willing to redo the patch, if necessary.)
Have a nice day,
Stepan Kasal
2007-11-23 Stepan Kasal <skasal@???>
Introduce PCRE_DUPNAMEUSED which is set iff a duplicate subpattern
name occurs. This is exactly the bit needed in pcre_get.c .
Index: pcre.h.in
===================================================================
--- pcre.h.in (revision 276)
+++ pcre.h.in (working copy)
@@ -168,8 +168,9 @@
#define PCRE_INFO_STUDYSIZE 10
#define PCRE_INFO_DEFAULT_TABLES 11
#define PCRE_INFO_OKPARTIAL 12
-#define PCRE_INFO_JCHANGED 13
+#define PCRE_INFO_JCHANGED 13 /* Not used by pcre itself, but unfortunately documented */
#define PCRE_INFO_HASCRORLF 14
+#define PCRE_INFO_DUPNAMEUSED 15
/* Request types for pcre_config(). Do not re-arrange, in order to remain
compatible. */
Index: pcre_internal.h
===================================================================
--- pcre_internal.h (revision 276)
+++ pcre_internal.h (working copy)
@@ -489,8 +489,9 @@
#define PCRE_FIRSTSET 0x0002 /* first_byte is set */
#define PCRE_REQCHSET 0x0004 /* req_byte is set */
#define PCRE_STARTLINE 0x0008 /* start after \n for multiline */
-#define PCRE_JCHANGED 0x0010 /* j option used in regex */
+#define PCRE_JCHANGED 0x0010 /* j option used in regex; useless, but documented */
#define PCRE_HASCRORLF 0x0020 /* explicit \r or \n in pattern */
+#define PCRE_DUPNAMEUSED 0x0040 /* a duplicate subpattern name has occured */
/* Options for the "extra" block produced by pcre_study(). */
Index: pcre_compile.c
===================================================================
--- pcre_compile.c (revision 276)
+++ pcre_compile.c (working copy)
@@ -4482,7 +4482,9 @@
{
if (slot[2+namelen] == 0)
{
- if ((options & PCRE_DUPNAMES) == 0)
+ if ((options & PCRE_DUPNAMES) != 0)
+ cd->external_flags |= PCRE_DUPNAMEUSED;
+ else
{
*errorcodeptr = ERR43;
goto FAILED;
@@ -4733,8 +4735,8 @@
{
case '-': optset = &unset; break;
- case 'J': /* Record that it changed in the external options */
- *optset |= PCRE_DUPNAMES;
+ case 'J': *optset |= PCRE_DUPNAMES;
+ /* Remember that we have seen this, for PCRE_INFO_JCHANGED. */
cd->external_flags |= PCRE_JCHANGED;
break;
Index: pcre_get.c
===================================================================
--- pcre_get.c (revision 276)
+++ pcre_get.c (working copy)
@@ -189,7 +189,7 @@
int entrysize;
char *first, *last;
uschar *entry;
-if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
+if ((re->flags & PCRE_DUPNAMEUSED) == 0)
return pcre_get_stringnumber(code, stringname);
entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
if (entrysize <= 0) return entrysize;
Index: pcretest.c
===================================================================
--- pcretest.c (revision 276)
+++ pcretest.c (working copy)
@@ -1396,7 +1396,7 @@
#if !defined NOINFOCHECK
int old_first_char, old_options, old_count;
#endif
- int count, backrefmax, first_char, need_char, okpartial, jchanged,
+ int count, backrefmax, first_char, need_char, okpartial,
hascrorlf;
int nameentrysize, namecount;
const uschar *nametable;
@@ -1411,7 +1411,6 @@
new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
- new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
#if !defined NOINFOCHECK
@@ -1478,8 +1477,6 @@
((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
- if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
-
switch (get_options & PCRE_NEWLINE_BITS)
{
case PCRE_NEWLINE_CR:
Index: testdata/testoutput2
===================================================================
--- testdata/testoutput2 (revision 276)
+++ testdata/testoutput2 (working copy)
@@ -6166,7 +6166,6 @@
A 2
A 3
Options: anchored dupnames
-Duplicate name status changes
No first char
No need char
a1b\CA
@@ -6200,7 +6199,6 @@
B 3
C 4
Options: anchored
-Duplicate name status changes
No first char
No need char
a bc d\CA\CB\CC