Revision: 1061
http://vcs.pcre.org/viewvc?view=rev&revision=1061
Author: chpe
Date: 2012-10-16 16:54:02 +0100 (Tue, 16 Oct 2012)
Log Message:
-----------
pcre32: compile: Encode back references as negative numbers
Return the back reference as negative numbers from check_escape(),
instead of adding them to ESC_REF. This way, there will never be an
overflow.
Modified Paths:
--------------
code/trunk/pcre_compile.c
code/trunk/pcre_internal.h
Modified: code/trunk/pcre_compile.c
===================================================================
--- code/trunk/pcre_compile.c 2012-10-16 15:53:57 UTC (rev 1060)
+++ code/trunk/pcre_compile.c 2012-10-16 15:54:02 UTC (rev 1061)
@@ -750,9 +750,9 @@
/* This function is called when a \ has been encountered. It either returns a
positive value for a simple escape such as \n, or 0 for a data character
-which will be placed in chptr. A backreference to group
-n is returned as ESC_REF + n; ESC_REF is the highest ESC_xxx macro. When
-UTF-8 is enabled, a positive value greater than 255 may be returned in chptr.
+which will be placed in chptr. A backreference to group n is returned as
+negative n. When UTF-8 is enabled, a positive value greater than 255 may
+be returned in chptr.
On entry,ptr is pointing at the \. On exit, it is on the final character of the
escape sequence.
@@ -766,6 +766,7 @@
Returns: zero => a data character
positive => a special escape sequence
+ negative => a back reference
on error, errorcodeptr is set
*/
@@ -954,7 +955,7 @@
c = bracount - (c - 1);
}
- escape = ESC_REF + c;
+ escape = -c;
break;
/* The handling of escape sequences consisting of a string of digits
@@ -995,7 +996,7 @@
}
if (c < 10 || c <= bracount)
{
- escape = ESC_REF + c;
+ escape = -c;
break;
}
ptr = oldptr; /* Put the pointer back and fall through */
@@ -4459,7 +4460,7 @@
/* \b is backspace; any other special means the '-' was literal. */
- if (descape > 0)
+ if (descape != 0)
{
if (descape == ESC_b) d = CHAR_BS; else
{
@@ -6673,7 +6674,7 @@
/* Handle metasequences introduced by \. For ones like \d, the ESC_ values
are arranged to be the negation of the corresponding OP_values in the
default case when PCRE_UCP is not set. For the back references, the values
- are ESC_REF plus the reference number. Only back references and those types
+ are negative the reference number. Only back references and those types
that consume a character may be repeated. We can test for values between
ESC_b and ESC_Z for the latter; this may have to change if any new ones are
ever created. */
@@ -6713,7 +6714,7 @@
is a subroutine call by number (Oniguruma syntax). In fact, the value
ESC_g is returned only for these cases. So we don't need to check for <
or ' if the value is ESC_g. For the Perl syntax \g{n} the value is
- ESC_REF+n, and for the Perl syntax \g{name} the result is ESC_k (as
+ -n, and for the Perl syntax \g{name} the result is ESC_k (as
that is a synonym for a named back reference). */
if (escape == ESC_g)
@@ -6791,10 +6792,10 @@
not set to cope with cases like (?=(\w+))\1: which would otherwise set
':' later. */
- if (escape >= ESC_REF)
+ if (escape < 0)
{
open_capitem *oc;
- recno = escape - ESC_REF;
+ recno = -escape;
HANDLE_REFERENCE: /* Come here from named backref handling */
if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h 2012-10-16 15:53:57 UTC (rev 1060)
+++ code/trunk/pcre_internal.h 2012-10-16 15:54:02 UTC (rev 1061)
@@ -1775,8 +1775,8 @@
They must be contiguous, and remain in order so that the replacements can be
looked up from a table.
-The final escape must be ESC_REF as subsequent values are used for
-backreferences (\1, \2, \3, etc). There are two tests in the code for an escape
+Negative numbers are used to encode a backreference (\1, \2, \3, etc.) in
+check_escape(). There are two tests in the code for an escape
greater than ESC_b and less than ESC_Z to detect the types that may be
repeated. These are the types that consume characters. If any new escapes are
put in between that don't consume a character, that code will have to change.
@@ -1786,8 +1786,7 @@
ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,
ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z,
ESC_E, ESC_Q, ESC_g, ESC_k,
- ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu,
- ESC_REF };
+ ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu };
/* Opcode table: Starting from 1 (i.e. after OP_END), the values up to
OP_EOD must correspond in order to the list of escapes immediately above.