Revision: 1100
http://vcs.pcre.org/viewvc?view=rev&revision=1100
Author: chpe
Date: 2012-10-16 16:56:26 +0100 (Tue, 16 Oct 2012)
Log Message:
-----------
pcre32: exec: Mask bits > 21 in 32-bit UTF mode
Allow passing characters with high bits set in UTF-32 mode.
Modified Paths:
--------------
code/trunk/pcre_dfa_exec.c
code/trunk/pcre_exec.c
code/trunk/pcre_internal.h
code/trunk/pcre_string_utils.c
Modified: code/trunk/pcre_dfa_exec.c
===================================================================
--- code/trunk/pcre_dfa_exec.c 2012-10-16 15:56:22 UTC (rev 1099)
+++ code/trunk/pcre_dfa_exec.c 2012-10-16 15:56:26 UTC (rev 1100)
@@ -613,9 +613,10 @@
{
clen = 1; /* Number of data items in the character */
#ifdef SUPPORT_UTF
- if (utf) { GETCHARLEN(c, ptr, clen); } else
+ GETCHARLENTEST(c, ptr, clen);
+#else
+ c = *ptr;
#endif /* SUPPORT_UTF */
- c = *ptr;
}
else
{
@@ -1437,7 +1438,7 @@
goto ANYNL01;
case CHAR_CR:
- if (ptr + 1 < end_subject && ptr[1] == CHAR_LF) ncount = 1;
+ if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
/* Fall through */
ANYNL01:
@@ -1692,7 +1693,7 @@
goto ANYNL02;
case CHAR_CR:
- if (ptr + 1 < end_subject && ptr[1] == CHAR_LF) ncount = 1;
+ if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
/* Fall through */
ANYNL02:
@@ -1948,7 +1949,7 @@
goto ANYNL03;
case CHAR_CR:
- if (ptr + 1 < end_subject && ptr[1] == CHAR_LF) ncount = 1;
+ if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
/* Fall through */
ANYNL03:
@@ -2146,7 +2147,7 @@
if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
reset_could_continue = TRUE;
}
- else if (ptr[1] == CHAR_LF)
+ else if (RAWUCHARTEST(ptr + 1) == CHAR_LF)
{
ADD_NEW_DATA(-(state_offset + 1), 0, 1);
}
@@ -2260,7 +2261,7 @@
if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
if (clen > 0)
{
- unsigned int otherd = NOTACHAR;
+ pcre_uint32 otherd = NOTACHAR;
if (caseless)
{
#ifdef SUPPORT_UTF
@@ -2307,7 +2308,7 @@
ADD_ACTIVE(state_offset + dlen + 1, 0);
if (clen > 0)
{
- unsigned int otherd = NOTACHAR;
+ pcre_uint32 otherd = NOTACHAR;
if (caseless)
{
#ifdef SUPPORT_UTF
@@ -2352,7 +2353,7 @@
ADD_ACTIVE(state_offset + dlen + 1, 0);
if (clen > 0)
{
- unsigned int otherd = NOTACHAR;
+ pcre_uint32 otherd = NOTACHAR;
if (caseless)
{
#ifdef SUPPORT_UTF
@@ -2389,7 +2390,7 @@
count = current_state->count; /* Number already matched */
if (clen > 0)
{
- unsigned int otherd = NOTACHAR;
+ pcre_uint32 otherd = NOTACHAR;
if (caseless)
{
#ifdef SUPPORT_UTF
@@ -2433,7 +2434,7 @@
count = current_state->count; /* Number already matched */
if (clen > 0)
{
- unsigned int otherd = NOTACHAR;
+ pcre_uint32 otherd = NOTACHAR;
if (caseless)
{
#ifdef SUPPORT_UTF
@@ -3378,12 +3379,15 @@
if (has_first_char)
{
if (first_char != first_char2)
+ {
+ pcre_uchar csc;
while (current_subject < end_subject &&
- *current_subject != first_char && *current_subject != first_char2)
+ (csc = RAWUCHARTEST(current_subject)) != first_char && csc != first_char2)
current_subject++;
+ }
else
while (current_subject < end_subject &&
- *current_subject != first_char)
+ RAWUCHARTEST(current_subject) != first_char)
current_subject++;
}
@@ -3413,10 +3417,10 @@
ANYCRLF, and we are now at a LF, advance the match position by one
more character. */
- if (current_subject[-1] == CHAR_CR &&
+ if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
(md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
current_subject < end_subject &&
- *current_subject == CHAR_NL)
+ RAWUCHARTEST(current_subject) == CHAR_NL)
current_subject++;
}
}
@@ -3427,7 +3431,7 @@
{
while (current_subject < end_subject)
{
- register unsigned int c = *current_subject;
+ register pcre_uint32 c = RAWUCHARTEST(current_subject);
#ifndef COMPILE_PCRE8
if (c > 255) c = 255;
#endif
@@ -3493,7 +3497,7 @@
{
while (p < end_subject)
{
- register pcre_uint32 pp = *p++;
+ register pcre_uint32 pp = RAWUCHARINCTEST(p);
if (pp == req_char || pp == req_char2) { p--; break; }
}
}
@@ -3501,7 +3505,7 @@
{
while (p < end_subject)
{
- if (*p++ == req_char) { p--; break; }
+ if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
}
}
@@ -3559,9 +3563,9 @@
not contain any explicit matches for \r or \n, and the newline option is CRLF
or ANY or ANYCRLF, advance the match position by one more character. */
- if (current_subject[-1] == CHAR_CR &&
+ if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
current_subject < end_subject &&
- *current_subject == CHAR_NL &&
+ RAWUCHARTEST(current_subject) == CHAR_NL &&
(re->flags & PCRE_HASCRORLF) == 0 &&
(md->nltype == NLTYPE_ANY ||
md->nltype == NLTYPE_ANYCRLF ||
Modified: code/trunk/pcre_exec.c
===================================================================
--- code/trunk/pcre_exec.c 2012-10-16 15:56:22 UTC (rev 1099)
+++ code/trunk/pcre_exec.c 2012-10-16 15:56:26 UTC (rev 1100)
@@ -92,8 +92,6 @@
static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
-
-
#ifdef PCRE_DEBUG
/*************************************************
* Debugging function to print chars *
@@ -114,10 +112,11 @@
static void
pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
{
-unsigned int c;
+pcre_uint32 c;
+BOOL utf = md->utf;
if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
while (length-- > 0)
- if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
+ if (isprint(c = RAWUCHARINCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
}
#endif
@@ -150,6 +149,7 @@
{
PCRE_PUCHAR eptr_start = eptr;
register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
+BOOL utf = md->utf;
#ifdef PCRE_DEBUG
if (eptr >= md->end_subject)
@@ -177,7 +177,7 @@
{
#ifdef SUPPORT_UTF
#ifdef SUPPORT_UCP
- if (md->utf)
+ if (utf)
{
/* Match characters up to the end of the reference. NOTE: the number of
data units matched may differ, because in UTF-8 there are some characters
@@ -217,8 +217,11 @@
{
while (length-- > 0)
{
+ pcre_uchar cc, cp;
if (eptr >= md->end_subject) return -2; /* Partial match */
- if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
+ cc = RAWUCHARTEST(eptr);
+ cp = RAWUCHARTEST(p);
+ if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
p++;
eptr++;
}
@@ -233,7 +236,7 @@
while (length-- > 0)
{
if (eptr >= md->end_subject) return -2; /* Partial match */
- if (*p++ != *eptr++) return -1;
+ if (RAWUCHARINCTEST(p) != RAWUCHARINCTEST(eptr)) return -1;
}
}
@@ -307,7 +310,7 @@
}
#define RRETURN(ra) \
{ \
- printf("match() returned %d from line %d ", ra, __LINE__); \
+ printf("match() returned %d from line %d\n", ra, __LINE__); \
return ra; \
}
#else
@@ -748,7 +751,7 @@
unaltered. */
else if (rrc == MATCH_SKIP_ARG &&
- STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)
+ STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0)
{
md->start_match_ptr = eptr;
RRETURN(MATCH_SKIP);
@@ -2094,7 +2097,7 @@
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- *eptr == NLBLOCK->nl[0])
+ RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -2138,7 +2141,7 @@
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- *eptr == NLBLOCK->nl[0])
+ RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -2281,7 +2284,7 @@
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- *eptr == NLBLOCK->nl[0])
+ RAWUCHARTEST(eptr) == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -2435,7 +2438,7 @@
{
SCHECK_PARTIAL();
}
- else if (*eptr == CHAR_LF) eptr++;
+ else if (RAWUCHARTEST(eptr) == CHAR_LF) eptr++;
break;
case CHAR_LF:
@@ -3145,7 +3148,7 @@
CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
RRETURN(MATCH_NOMATCH);
}
- while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
+ while (length-- > 0) if (*ecode++ != RAWUCHARINC(eptr)) RRETURN(MATCH_NOMATCH);
}
else
#endif
@@ -3185,8 +3188,8 @@
if (fc < 128)
{
- if (md->lcc[fc]
- != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
+ pcre_uchar cc = RAWUCHAR(eptr);
+ if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
ecode++;
eptr++;
}
@@ -3434,12 +3437,15 @@
for (i = 1; i <= min; i++)
{
+ pcre_uchar cc;
+
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
+ cc = RAWUCHARTEST(eptr);
+ if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
eptr++;
}
if (min == max) continue;
@@ -3447,6 +3453,8 @@
{
for (fi = min;; fi++)
{
+ pcre_uchar cc;
+
RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max) RRETURN(MATCH_NOMATCH);
@@ -3455,7 +3463,8 @@
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
+ cc = RAWUCHARTEST(eptr);
+ if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
eptr++;
}
/* Control never gets here */
@@ -3465,12 +3474,15 @@
pp = eptr;
for (i = min; i < max; i++)
{
+ pcre_uchar cc;
+
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
break;
}
- if (fc != *eptr && foc != *eptr) break;
+ cc = RAWUCHARTEST(eptr);
+ if (fc != cc && foc != cc) break;
eptr++;
}
@@ -3498,7 +3510,7 @@
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
+ if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
}
if (min == max) continue;
@@ -3515,7 +3527,7 @@
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
+ if (fc != RAWUCHARINCTEST(eptr)) RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
}
@@ -3529,7 +3541,7 @@
SCHECK_PARTIAL();
break;
}
- if (fc != *eptr) break;
+ if (fc != RAWUCHARTEST(eptr)) break;
eptr++;
}
if (possessive) continue;
@@ -3699,7 +3711,7 @@
#ifdef SUPPORT_UTF
if (utf)
{
- register unsigned int d;
+ register pcre_uint32 d;
for (i = 1; i <= min; i++)
{
if (eptr >= md->end_subject)
@@ -4270,7 +4282,7 @@
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- *eptr == NLBLOCK->nl[0])
+ RAWUCHAR(eptr) == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -4312,7 +4324,7 @@
default: RRETURN(MATCH_NOMATCH);
case CHAR_CR:
- if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
+ if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++;
break;
case CHAR_LF:
@@ -4416,12 +4428,15 @@
case OP_DIGIT:
for (i = 1; i <= min; i++)
{
+ pcre_uchar cc;
+
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0)
+ cc = RAWUCHAR(eptr);
+ if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
RRETURN(MATCH_NOMATCH);
eptr++;
/* No need to skip more bytes - we know it's a 1-byte character */
@@ -4431,12 +4446,15 @@
case OP_NOT_WHITESPACE:
for (i = 1; i <= min; i++)
{
+ pcre_uchar cc;
+
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
+ cc = RAWUCHAR(eptr);
+ if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
RRETURN(MATCH_NOMATCH);
eptr++;
ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
@@ -4446,12 +4464,15 @@
case OP_WHITESPACE:
for (i = 1; i <= min; i++)
{
+ pcre_uchar cc;
+
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0)
+ cc = RAWUCHAR(eptr);
+ if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
RRETURN(MATCH_NOMATCH);
eptr++;
/* No need to skip more bytes - we know it's a 1-byte character */
@@ -4461,12 +4482,15 @@
case OP_NOT_WORDCHAR:
for (i = 1; i <= min; i++)
{
+ pcre_uchar cc;
+
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
+ cc = RAWUCHAR(eptr);
+ if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
RRETURN(MATCH_NOMATCH);
eptr++;
ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
@@ -4476,12 +4500,15 @@
case OP_WORDCHAR:
for (i = 1; i <= min; i++)
{
+ pcre_uchar cc;
+
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0)
+ cc = RAWUCHAR(eptr);
+ if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
RRETURN(MATCH_NOMATCH);
eptr++;
/* No need to skip more bytes - we know it's a 1-byte character */
@@ -5028,7 +5055,7 @@
{
default: RRETURN(MATCH_NOMATCH);
case CHAR_CR:
- if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
+ if (eptr < md->end_subject && RAWUCHAR(eptr) == CHAR_LF) eptr++;
break;
case CHAR_LF:
@@ -5532,7 +5559,7 @@
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- *eptr == NLBLOCK->nl[0])
+ RAWUCHAR(eptr) == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -5558,7 +5585,7 @@
eptr + 1 >= md->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- *eptr == NLBLOCK->nl[0])
+ RAWUCHAR(eptr) == NLBLOCK->nl[0])
{
md->hitend = TRUE;
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
@@ -5615,7 +5642,7 @@
if (c == CHAR_CR)
{
if (++eptr >= md->end_subject) break;
- if (*eptr == CHAR_LF) eptr++;
+ if (RAWUCHAR(eptr) == CHAR_LF) eptr++;
}
else
{
@@ -5783,8 +5810,8 @@
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (eptr-- == pp) break; /* Stop if tried at original pos */
BACKCHAR(eptr);
- if (ctype == OP_ANYNL && eptr > pp && *eptr == CHAR_NL &&
- eptr[-1] == CHAR_CR) eptr--;
+ if (ctype == OP_ANYNL && eptr > pp && RAWUCHAR(eptr) == CHAR_NL &&
+ RAWUCHAR(eptr - 1) == CHAR_CR) eptr--;
}
}
else
@@ -6629,12 +6656,14 @@
if (has_first_char)
{
+ pcre_uchar smc;
+
if (first_char != first_char2)
while (start_match < end_subject &&
- *start_match != first_char && *start_match != first_char2)
+ (smc = RAWUCHARTEST(start_match)) != first_char && smc != first_char2)
start_match++;
else
- while (start_match < end_subject && *start_match != first_char)
+ while (start_match < end_subject && RAWUCHARTEST(start_match) != first_char)
start_match++;
}
@@ -6666,7 +6695,7 @@
if (start_match[-1] == CHAR_CR &&
(md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
start_match < end_subject &&
- *start_match == CHAR_NL)
+ RAWUCHARTEST(start_match) == CHAR_NL)
start_match++;
}
}
@@ -6677,7 +6706,7 @@
{
while (start_match < end_subject)
{
- register unsigned int c = *start_match;
+ register pcre_uint32 c = RAWUCHARTEST(start_match);
#ifndef COMPILE_PCRE8
if (c > 255) c = 255;
#endif
@@ -6745,7 +6774,7 @@
{
while (p < end_subject)
{
- register pcre_uint32 pp = *p++;
+ register pcre_uint32 pp = RAWUCHARINCTEST(p);
if (pp == req_char || pp == req_char2) { p--; break; }
}
}
@@ -6753,7 +6782,7 @@
{
while (p < end_subject)
{
- if (*p++ == req_char) { p--; break; }
+ if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
}
}
Modified: code/trunk/pcre_internal.h
===================================================================
--- code/trunk/pcre_internal.h 2012-10-16 15:56:22 UTC (rev 1099)
+++ code/trunk/pcre_internal.h 2012-10-16 15:56:26 UTC (rev 1100)
@@ -304,8 +304,8 @@
&(NLBLOCK->nllen), utf)) \
: \
((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
- (p)[0] == NLBLOCK->nl[0] && \
- (NLBLOCK->nllen == 1 || (p)[1] == NLBLOCK->nl[1]) \
+ RAWUCHARTEST(p) == NLBLOCK->nl[0] && \
+ (NLBLOCK->nllen == 1 || RAWUCHARTEST(p+1) == NLBLOCK->nl[1]) \
) \
)
@@ -318,8 +318,8 @@
&(NLBLOCK->nllen), utf)) \
: \
((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
- (p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \
- (NLBLOCK->nllen == 1 || (p)[-NLBLOCK->nllen+1] == NLBLOCK->nl[1]) \
+ RAWUCHARTEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] && \
+ (NLBLOCK->nllen == 1 || RAWUCHARTEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \
) \
)
@@ -579,6 +579,10 @@
#define GETCHARINC(c, eptr) c = *eptr++;
#define GETCHARINCTEST(c, eptr) c = *eptr++;
#define GETCHARLEN(c, eptr, len) c = *eptr;
+#define RAWUCHAR(eptr) (*(eptr))
+#define RAWUCHARINC(eptr) (*(eptr)++)
+#define RAWUCHARTEST(eptr) (*(eptr))
+#define RAWUCHARINCTEST(eptr) (*(eptr)++)
/* #define GETCHARLENTEST(c, eptr, len) */
/* #define BACKCHAR(eptr) */
/* #define FORWARDCHAR(eptr) */
@@ -751,6 +755,30 @@
c = *eptr; \
if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len);
+/* Returns the next uchar, not advancing the pointer. This is called when
+we know we are in UTF mode. */
+
+#define RAWUCHAR(eptr) \
+ (*(eptr))
+
+/* Returns the next uchar, advancing the pointer. This is called when
+we know we are in UTF mode. */
+
+#define RAWUCHARINC(eptr) \
+ (*(eptr)++)
+
+/* Returns the next uchar, testing for UTF mode, and not advancing the
+pointer. */
+
+#define RAWUCHARTEST(eptr) \
+ (*(eptr))
+
+/* Returns the next uchar, testing for UTF mode, advancing the
+pointer. */
+
+#define RAWUCHARINCTEST(eptr) \
+ (*(eptr)++)
+
/* If the pointer is not at the start of a character, move it back until
it is. This is called only in UTF-8 mode - we don't put a test within the macro
because almost all calls are already within a block of UTF-8 only code. */
@@ -846,6 +874,30 @@
c = *eptr; \
if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);
+/* Returns the next uchar, not advancing the pointer. This is called when
+we know we are in UTF mode. */
+
+#define RAWUCHAR(eptr) \
+ (*(eptr))
+
+/* Returns the next uchar, advancing the pointer. This is called when
+we know we are in UTF mode. */
+
+#define RAWUCHARINC(eptr) \
+ (*(eptr)++)
+
+/* Returns the next uchar, testing for UTF mode, and not advancing the
+pointer. */
+
+#define RAWUCHARTEST(eptr) \
+ (*(eptr))
+
+/* Returns the next uchar, testing for UTF mode, advancing the
+pointer. */
+
+#define RAWUCHARINCTEST(eptr) \
+ (*(eptr)++)
+
/* If the pointer is not at the start of a character, move it back until
it is. This is called only in UTF-16 mode - we don't put a test within the
macro because almost all calls are already within a block of UTF-16 only
@@ -910,6 +962,30 @@
#define GETCHARLENTEST(c, eptr, len) \
GETCHARTEST(c, eptr)
+/* Returns the next uchar, not advancing the pointer. This is called when
+we know we are in UTF mode. */
+
+#define RAWUCHAR(eptr) \
+ (*(eptr) & UTF32_MASK)
+
+/* Returns the next uchar, advancing the pointer. This is called when
+we know we are in UTF mode. */
+
+#define RAWUCHARINC(eptr) \
+ (*(eptr)++ & UTF32_MASK)
+
+/* Returns the next uchar, testing for UTF mode, and not advancing the
+pointer. */
+
+#define RAWUCHARTEST(eptr) \
+ (utf ? (*(eptr) & UTF32_MASK) : *(eptr))
+
+/* Returns the next uchar, testing for UTF mode, advancing the
+pointer. */
+
+#define RAWUCHARINCTEST(eptr) \
+ (utf ? (*(eptr)++ & UTF32_MASK) : *(eptr)++)
+
/* If the pointer is not at the start of a character, move it back until
it is. This is called only in UTF-32 mode - we don't put a test within the
macro because almost all calls are already within a block of UTF-32 only
@@ -2581,6 +2657,25 @@
#endif /* COMPILE_PCRE[8|16|32] */
+#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
+
+#define STRCMP_UC_UC_TEST(str1, str2) STRCMP_UC_UC(str1, str2)
+#define STRCMP_UC_C8_TEST(str1, str2) STRCMP_UC_C8(str1, str2)
+
+#elif defined COMPILE_PCRE32
+
+extern int PRIV(strcmp_uc_uc_utf)(const pcre_uchar *,
+ const pcre_uchar *);
+extern int PRIV(strcmp_uc_c8_utf)(const pcre_uchar *,
+ const char *);
+
+#define STRCMP_UC_UC_TEST(str1, str2) \
+ (utf ? PRIV(strcmp_uc_uc_utf)((str1), (str2)) : PRIV(strcmp_uc_uc)((str1), (str2)))
+#define STRCMP_UC_C8_TEST(str1, str2) \
+ (utf ? PRIV(strcmp_uc_c8_utf)((str1), (str2)) : PRIV(strcmp_uc_c8)((str1), (str2)))
+
+#endif /* COMPILE_PCRE[8|16|32] */
+
extern const pcre_uchar *PRIV(find_bracket)(const pcre_uchar *, BOOL, int);
extern BOOL PRIV(is_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,
int *, BOOL);
Modified: code/trunk/pcre_string_utils.c
===================================================================
--- code/trunk/pcre_string_utils.c 2012-10-16 15:56:22 UTC (rev 1099)
+++ code/trunk/pcre_string_utils.c 2012-10-16 15:56:26 UTC (rev 1100)
@@ -81,7 +81,28 @@
return 0;
}
+#ifdef COMPILE_PCRE32
+
int
+PRIV(strcmp_uc_uc_utf)(const pcre_uchar *str1, const pcre_uchar *str2)
+{
+pcre_uchar c1;
+pcre_uchar c2;
+
+while (*str1 != '\0' || *str2 != '\0')
+ {
+ c1 = RAWUCHARINC(str1);
+ c2 = RAWUCHARINC(str2);
+ if (c1 != c2)
+ return ((c1 > c2) << 1) - 1;
+ }
+/* Both length and characters must be equal. */
+return 0;
+}
+
+#endif /* COMPILE_PCRE32 */
+
+int
PRIV(strcmp_uc_c8)(const pcre_uchar *str1, const char *str2)
{
const pcre_uint8 *ustr2 = (pcre_uint8 *)str2;
@@ -99,6 +120,28 @@
return 0;
}
+#ifdef COMPILE_PCRE32
+
+int
+PRIV(strcmp_uc_c8_utf)(const pcre_uchar *str1, const char *str2)
+{
+const pcre_uint8 *ustr2 = (pcre_uint8 *)str2;
+pcre_uchar c1;
+pcre_uchar c2;
+
+while (*str1 != '\0' || *ustr2 != '\0')
+ {
+ c1 = RAWUCHARINC(str1);
+ c2 = (pcre_uchar)*ustr2++;
+ if (c1 != c2)
+ return ((c1 > c2) << 1) - 1;
+ }
+/* Both length and characters must be equal. */
+return 0;
+}
+
+#endif /* COMPILE_PCRE32 */
+
/* The following two functions compares two, fixed length
strings. Basically an strncmp for non 8 bit characters.
@@ -163,6 +206,6 @@
return len;
}
-#endif /* COMPILE_PCRE8 */
+#endif /* !COMPILE_PCRE8 */
/* End of pcre_string_utils.c */