Revision: 858
http://www.exim.org/viewvc/pcre2?view=rev&revision=858
Author: ph10
Date: 2017-09-12 17:28:42 +0100 (Tue, 12 Sep 2017)
Log Message:
-----------
Replace multiple copies of extended grapheme sequence code with a single
subroutine.
Modified Paths:
--------------
code/trunk/CMakeLists.txt
code/trunk/ChangeLog
code/trunk/Makefile.am
code/trunk/NON-AUTOTOOLS-BUILD
code/trunk/PrepareRelease
code/trunk/README
code/trunk/src/pcre2_dfa_match.c
code/trunk/src/pcre2_internal.h
code/trunk/src/pcre2_match.c
Added Paths:
-----------
code/trunk/src/pcre2_extuni.c
Modified: code/trunk/CMakeLists.txt
===================================================================
--- code/trunk/CMakeLists.txt 2017-09-12 11:41:31 UTC (rev 857)
+++ code/trunk/CMakeLists.txt 2017-09-12 16:28:42 UTC (rev 858)
@@ -432,6 +432,7 @@
src/pcre2_convert.c
src/pcre2_dfa_match.c
src/pcre2_error.c
+ src/pcre2_extuni.c
src/pcre2_find_bracket.c
src/pcre2_jit_compile.c
src/pcre2_maketables.c
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2017-09-12 11:41:31 UTC (rev 857)
+++ code/trunk/ChangeLog 2017-09-12 16:28:42 UTC (rev 858)
@@ -5,9 +5,13 @@
Version 10.31 xx-xxx-201x
-------------------------
-1. Fix typo (missing ]) in VMS code in pcre2test.c.
+1. Fix typo (missing ]) in VMS code in pcre2test.c.
+2. Replace the replicated code for matching extended Unicode grapheme sequences
+(which got a lot more complicated by change 10.30/49) by a single subroutine
+that is called by both pcre2_match() and pcre2_dfa_match().
+
Version 10.30 14-August-2017
----------------------------
Modified: code/trunk/Makefile.am
===================================================================
--- code/trunk/Makefile.am 2017-09-12 11:41:31 UTC (rev 857)
+++ code/trunk/Makefile.am 2017-09-12 16:28:42 UTC (rev 858)
@@ -351,6 +351,7 @@
src/pcre2_convert.c \
src/pcre2_dfa_match.c \
src/pcre2_error.c \
+ src/pcre2_extuni.c \
src/pcre2_find_bracket.c \
src/pcre2_internal.h \
src/pcre2_intmodedep.h \
Modified: code/trunk/NON-AUTOTOOLS-BUILD
===================================================================
--- code/trunk/NON-AUTOTOOLS-BUILD 2017-09-12 11:41:31 UTC (rev 857)
+++ code/trunk/NON-AUTOTOOLS-BUILD 2017-09-12 16:28:42 UTC (rev 858)
@@ -91,8 +91,10 @@
pcre2_compile.c
pcre2_config.c
pcre2_context.c
+ pcre2_convert.c
pcre2_dfa_match.c
pcre2_error.c
+ pcre2_extuni.c
pcre2_find_bracket.c
pcre2_jit_compile.c
pcre2_maketables.c
@@ -377,4 +379,4 @@
recommended download site.
=============================
-Last Updated: 17 March 2017
+Last Updated: 12 September 2017
Modified: code/trunk/PrepareRelease
===================================================================
--- code/trunk/PrepareRelease 2017-09-12 11:41:31 UTC (rev 857)
+++ code/trunk/PrepareRelease 2017-09-12 16:28:42 UTC (rev 858)
@@ -196,8 +196,10 @@
src/pcre2_compile.c \
src/pcre2_config.c \
src/pcre2_context.c \
+ src/pcre2_convert.c \
src/pcre2_dfa_match.c \
src/pcre2_error.c \
+ src/pcre2_extuni.c \
src/pcre2_find_bracket.c \
src/pcre2_internal.h \
src/pcre2_intmodedep.h \
Modified: code/trunk/README
===================================================================
--- code/trunk/README 2017-09-12 11:41:31 UTC (rev 857)
+++ code/trunk/README 2017-09-12 16:28:42 UTC (rev 858)
@@ -773,6 +773,7 @@
src/pcre2_convert.c )
src/pcre2_dfa_match.c )
src/pcre2_error.c )
+ src/pcre2_extuni.c )
src/pcre2_find_bracket.c )
src/pcre2_jit_compile.c )
src/pcre2_jit_match.c ) sources for the functions in the library,
@@ -882,4 +883,4 @@
Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
-Last updated: 18 July 2017
+Last updated: 12 September 2017
Modified: code/trunk/src/pcre2_dfa_match.c
===================================================================
--- code/trunk/src/pcre2_dfa_match.c 2017-09-12 11:41:31 UTC (rev 857)
+++ code/trunk/src/pcre2_dfa_match.c 2017-09-12 16:28:42 UTC (rev 858)
@@ -1364,8 +1364,6 @@
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
if (clen > 0)
{
- uint32_t lgb, rgb;
- PCRE2_SPTR nptr = ptr + clen;
int ncount = 0;
if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
{
@@ -1372,55 +1370,8 @@
active_count--; /* Remove non-match possibility */
next_active_state--;
}
- lgb = UCD_GRAPHBREAK(c);
- while (nptr < end_subject)
- {
- dlen = 1;
- if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
- rgb = UCD_GRAPHBREAK(d);
- if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
-
- /* Not breaking between Regional Indicators is allowed only if
- there are an even number of preceding RIs. */
-
- if (lgb == ucp_gbRegionalIndicator &&
- rgb == ucp_gbRegionalIndicator)
- {
- int ricount = 0;
- PCRE2_SPTR bptr = nptr - 1;
-#ifdef SUPPORT_UNICODE
- if (utf) BACKCHAR(bptr);
-#endif
- /* bptr is pointing to the left-hand character */
-
- while (bptr > mb->start_subject)
- {
- bptr--;
-#ifdef SUPPORT_UNICODE
- if (utf)
- {
- BACKCHAR(bptr);
- GETCHAR(d, bptr);
- }
- else
-#endif
- d = *bptr;
- if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
- ricount++;
- }
- if ((ricount & 1) != 0) break; /* Grapheme break required */
- }
-
- /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
- any number of Extend before a following E_Modifier. */
-
- if (rgb != ucp_gbExtend ||
- (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
- lgb = rgb;
-
- ncount++;
- nptr += dlen;
- }
+ (void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
+ &ncount);
count++;
ADD_NEW_DATA(-state_offset, count, ncount);
}
@@ -1663,8 +1614,6 @@
ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0)
{
- uint32_t lgb, rgb;
- PCRE2_SPTR nptr = ptr + clen;
int ncount = 0;
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
@@ -1672,55 +1621,8 @@
active_count--; /* Remove non-match possibility */
next_active_state--;
}
- lgb = UCD_GRAPHBREAK(c);
- while (nptr < end_subject)
- {
- dlen = 1;
- if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
- rgb = UCD_GRAPHBREAK(d);
- if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
-
- /* Not breaking between Regional Indicators is allowed only if
- there are an even number of preceding RIs. */
-
- if (lgb == ucp_gbRegionalIndicator &&
- rgb == ucp_gbRegionalIndicator)
- {
- int ricount = 0;
- PCRE2_SPTR bptr = nptr - 1;
-#ifdef SUPPORT_UNICODE
- if (utf) BACKCHAR(bptr);
-#endif
- /* bptr is pointing to the left-hand character */
-
- while (bptr > mb->start_subject)
- {
- bptr--;
-#ifdef SUPPORT_UNICODE
- if (utf)
- {
- BACKCHAR(bptr);
- GETCHAR(d, bptr);
- }
- else
-#endif
- d = *bptr;
- if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
- ricount++;
- }
- if ((ricount & 1) != 0) break; /* Grapheme break required */
- }
-
- /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
- any number of Extend before a following E_Modifier. */
-
- if (rgb != ucp_gbExtend ||
- (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
- lgb = rgb;
-
- ncount++;
- nptr += dlen;
- }
+ (void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
+ &ncount);
ADD_NEW_DATA(-(state_offset + count), 0, ncount);
}
break;
@@ -1973,8 +1875,7 @@
count = current_state->count; /* Number already matched */
if (clen > 0)
{
- uint32_t lgb, rgb;
- PCRE2_SPTR nptr = ptr + clen;
+ PCRE2_SPTR nptr;
int ncount = 0;
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
{
@@ -1981,55 +1882,8 @@
active_count--; /* Remove non-match possibility */
next_active_state--;
}
- lgb = UCD_GRAPHBREAK(c);
- while (nptr < end_subject)
- {
- dlen = 1;
- if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
- rgb = UCD_GRAPHBREAK(d);
- if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
-
- /* Not breaking between Regional Indicators is allowed only if
- there are an even number of preceding RIs. */
-
- if (lgb == ucp_gbRegionalIndicator &&
- rgb == ucp_gbRegionalIndicator)
- {
- int ricount = 0;
- PCRE2_SPTR bptr = nptr - 1;
-#ifdef SUPPORT_UNICODE
- if (utf) BACKCHAR(bptr);
-#endif
- /* bptr is pointing to the left-hand character */
-
- while (bptr > mb->start_subject)
- {
- bptr--;
-#ifdef SUPPORT_UNICODE
- if (utf)
- {
- BACKCHAR(bptr);
- GETCHAR(d, bptr);
- }
- else
-#endif
- d = *bptr;
- if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
- ricount++;
- }
- if ((ricount & 1) != 0) break; /* Grapheme break required */
- }
-
- /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
- any number of Extend before a following E_Modifier. */
-
- if (rgb != ucp_gbExtend ||
- (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
- lgb = rgb;
-
- ncount++;
- nptr += dlen;
- }
+ nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
+ &ncount);
if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
reset_could_continue = TRUE;
if (++count >= (int)GET2(code, 1))
@@ -2206,58 +2060,9 @@
case OP_EXTUNI:
if (clen > 0)
{
- uint32_t lgb, rgb;
- PCRE2_SPTR nptr = ptr + clen;
int ncount = 0;
- lgb = UCD_GRAPHBREAK(c);
- while (nptr < end_subject)
- {
- dlen = 1;
- if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
- rgb = UCD_GRAPHBREAK(d);
- if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
-
- /* Not breaking between Regional Indicators is allowed only if
- there are an even number of preceding RIs. */
-
- if (lgb == ucp_gbRegionalIndicator &&
- rgb == ucp_gbRegionalIndicator)
- {
- int ricount = 0;
- PCRE2_SPTR bptr = nptr - 1;
-#ifdef SUPPORT_UNICODE
- if (utf) BACKCHAR(bptr);
-#endif
- /* bptr is pointing to the left-hand character */
-
- while (bptr > mb->start_subject)
- {
- bptr--;
-#ifdef SUPPORT_UNICODE
- if (utf)
- {
- BACKCHAR(bptr);
- GETCHAR(d, bptr);
- }
- else
-#endif
- d = *bptr;
- if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
- ricount++;
- }
- if ((ricount & 1) != 0) break; /* Grapheme break required */
- }
-
- /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
- any number of Extend before a following E_Modifier. */
-
- if (rgb != ucp_gbExtend ||
- (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
- lgb = rgb;
-
- ncount++;
- nptr += dlen;
- }
+ PCRE2_SPTR nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject,
+ end_subject, utf, &ncount);
if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
reset_could_continue = TRUE;
ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
Added: code/trunk/src/pcre2_extuni.c
===================================================================
--- code/trunk/src/pcre2_extuni.c (rev 0)
+++ code/trunk/src/pcre2_extuni.c 2017-09-12 16:28:42 UTC (rev 858)
@@ -0,0 +1,129 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Original API code Copyright (c) 1997-2012 University of Cambridge
+ New API code Copyright (c) 2016-2017 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* This module contains an internal function that is used to match a Unicode
+extended grapheme sequence. It is used by both pcre2_match() and
+pcre2_def_match(). */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "pcre2_internal.h"
+
+/*************************************************
+* Match an extended grapheme sequence *
+*************************************************/
+
+/*
+Arguments:
+ c the first character
+ eptr pointer to next character
+ start_subject pointer to start of subject
+ end_subject pointer to end of subject
+ utf TRUE if in UTF mode
+ xcount pointer to count of additional characters,
+ or NULL if count not needed
+
+Returns: pointer after the end of the sequence
+*/
+
+PCRE2_SPTR
+PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
+ PCRE2_SPTR end_subject, BOOL utf, int *xcount)
+{
+int lgb = UCD_GRAPHBREAK(c);
+
+while (eptr < end_subject)
+ {
+ int rgb;
+ int len = 1;
+ if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
+ rgb = UCD_GRAPHBREAK(c);
+ if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
+
+ /* Not breaking between Regional Indicators is allowed only if there
+ are an even number of preceding RIs. */
+
+ if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
+ {
+ int ricount = 0;
+ PCRE2_SPTR bptr = eptr - 1;
+#ifdef SUPPORT_UNICODE
+ if (utf) BACKCHAR(bptr);
+#endif
+
+ /* bptr is pointing to the left-hand character */
+
+ while (bptr > start_subject)
+ {
+ bptr--;
+#ifdef SUPPORT_UNICODE
+ if (utf)
+ {
+ BACKCHAR(bptr);
+ GETCHAR(c, bptr);
+ }
+ else
+#endif
+ c = *bptr;
+ if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
+ ricount++;
+ }
+ if ((ricount & 1) != 0) break; /* Grapheme break required */
+ }
+
+ /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
+ any number of Extend before a following E_Modifier. */
+
+ if (rgb != ucp_gbExtend ||
+ (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
+ lgb = rgb;
+
+ eptr += len;
+ if (xcount != NULL) *xcount += 1;
+ }
+
+return eptr;
+}
+
+/* End of pcre2_extuni.c */
Modified: code/trunk/src/pcre2_internal.h
===================================================================
--- code/trunk/src/pcre2_internal.h 2017-09-12 11:41:31 UTC (rev 857)
+++ code/trunk/src/pcre2_internal.h 2017-09-12 16:28:42 UTC (rev 858)
@@ -1926,6 +1926,7 @@
#define _pcre2_auto_possessify PCRE2_SUFFIX(_pcre2_auto_possessify_)
#define _pcre2_check_escape PCRE2_SUFFIX(_pcre2_check_escape_)
+#define _pcre2_extuni PCRE2_SUFFIX(_pcre2_extuni_)
#define _pcre2_find_bracket PCRE2_SUFFIX(_pcre2_find_bracket_)
#define _pcre2_is_newline PCRE2_SUFFIX(_pcre2_is_newline_)
#define _pcre2_jit_free_rodata PCRE2_SUFFIX(_pcre2_jit_free_rodata_)
@@ -1949,6 +1950,8 @@
const compile_block *);
extern int _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *,
int *, uint32_t, BOOL, compile_block *);
+extern PCRE2_SPTR _pcre2_extuni(uint32_t, PCRE2_SPTR, PCRE2_SPTR, PCRE2_SPTR,
+ BOOL, int *);
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
uint32_t *, BOOL);
Modified: code/trunk/src/pcre2_match.c
===================================================================
--- code/trunk/src/pcre2_match.c 2017-09-12 11:41:31 UTC (rev 857)
+++ code/trunk/src/pcre2_match.c 2017-09-12 16:28:42 UTC (rev 858)
@@ -2440,55 +2440,9 @@
}
else
{
- int lgb, rgb;
GETCHARINCTEST(fc, Feptr);
- lgb = UCD_GRAPHBREAK(fc);
- while (Feptr < mb->end_subject)
- {
- int len = 1;
- if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
- rgb = UCD_GRAPHBREAK(fc);
- if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
-
- /* Not breaking between Regional Indicators is allowed only if there
- are an even number of preceding RIs. */
-
- if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
- {
- int ricount = 0;
- PCRE2_SPTR bptr = Feptr - 1;
-#ifdef SUPPORT_UNICODE
- if (utf) BACKCHAR(bptr);
-#endif
- /* bptr is pointing to the left-hand character */
-
- while (bptr > mb->start_subject)
- {
- bptr--;
-#ifdef SUPPORT_UNICODE
- if (utf)
- {
- BACKCHAR(bptr);
- GETCHAR(fc, bptr);
- }
- else
-#endif
- fc = *bptr;
- if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
- ricount++;
- }
- if ((ricount & 1) != 0) break; /* Grapheme break required */
- }
-
- /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
- any number of Extend before a following E_Modifier. */
-
- if (rgb != ucp_gbExtend ||
- (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
- lgb = rgb;
-
- Feptr += len;
- }
+ Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, utf,
+ NULL);
}
CHECK_PARTIAL();
Fecode++;
@@ -2785,61 +2739,13 @@
}
else
{
- int lgb, rgb;
GETCHARINCTEST(fc, Feptr);
- lgb = UCD_GRAPHBREAK(fc);
- while (Feptr < mb->end_subject)
- {
- int len = 1;
- if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
- rgb = UCD_GRAPHBREAK(fc);
- if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
-
- /* Not breaking between Regional Indicators is allowed only if
- there are an even number of preceding RIs. */
-
- if (lgb == ucp_gbRegionalIndicator &&
- rgb == ucp_gbRegionalIndicator)
- {
- int ricount = 0;
- PCRE2_SPTR bptr = Feptr - 1;
-#ifdef SUPPORT_UNICODE
- if (utf) BACKCHAR(bptr);
-#endif
- /* bptr is pointing to the left-hand character */
-
- while (bptr > mb->start_subject)
- {
- bptr--;
-#ifdef SUPPORT_UNICODE
- if (utf)
- {
- BACKCHAR(bptr);
- GETCHAR(fc, bptr);
- }
- else
-#endif
- fc = *bptr;
- if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
- ricount++;
- }
- if ((ricount & 1) != 0) break; /* Grapheme break required */
- }
-
- /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
- any number of Extend before a following E_Modifier. */
-
- if (rgb != ucp_gbExtend ||
- (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
- lgb = rgb;
-
- Feptr += len;
- }
+ Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject,
+ mb->end_subject, utf, NULL);
}
CHECK_PARTIAL();
}
}
-
else
#endif /* SUPPORT_UNICODE */
@@ -3593,56 +3499,9 @@
}
else
{
- int lgb, rgb;
GETCHARINCTEST(fc, Feptr);
- lgb = UCD_GRAPHBREAK(fc);
- while (Feptr < mb->end_subject)
- {
- int len = 1;
- if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
- rgb = UCD_GRAPHBREAK(fc);
- if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
-
- /* Not breaking between Regional Indicators is allowed only if
- there are an even number of preceding RIs. */
-
- if (lgb == ucp_gbRegionalIndicator &&
- rgb == ucp_gbRegionalIndicator)
- {
- int ricount = 0;
- PCRE2_SPTR bptr = Feptr - 1;
-#ifdef SUPPORT_UNICODE
- if (utf) BACKCHAR(bptr);
-#endif
- /* bptr is pointing to the left-hand character */
-
- while (bptr > mb->start_subject)
- {
- bptr--;
-#ifdef SUPPORT_UNICODE
- if (utf)
- {
- BACKCHAR(bptr);
- GETCHAR(fc, bptr);
- }
- else
-#endif
- fc = *bptr;
- if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
- ricount++;
- }
- if ((ricount & 1) != 0) break; /* Grapheme break required */
- }
-
- /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
- any number of Extend before a following E_Modifier. */
-
- if (rgb != ucp_gbExtend ||
- (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
- lgb = rgb;
-
- Feptr += len;
- }
+ Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
+ utf, NULL);
}
CHECK_PARTIAL();
}
@@ -4167,56 +4026,9 @@
}
else
{
- int lgb, rgb;
GETCHARINCTEST(fc, Feptr);
- lgb = UCD_GRAPHBREAK(fc);
- while (Feptr < mb->end_subject)
- {
- int len = 1;
- if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
- rgb = UCD_GRAPHBREAK(fc);
- if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
-
- /* Not breaking between Regional Indicators is allowed only if
- there are an even number of preceding RIs. */
-
- if (lgb == ucp_gbRegionalIndicator &&
- rgb == ucp_gbRegionalIndicator)
- {
- int ricount = 0;
- PCRE2_SPTR bptr = Feptr - 1;
-#ifdef SUPPORT_UNICODE
- if (utf) BACKCHAR(bptr);
-#endif
- /* bptr is pointing to the left-hand character */
-
- while (bptr > mb->start_subject)
- {
- bptr--;
-#ifdef SUPPORT_UNICODE
- if (utf)
- {
- BACKCHAR(bptr);
- GETCHAR(fc, bptr);
- }
- else
-#endif
- fc = *bptr;
- if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
- ricount++;
- }
- if ((ricount & 1) != 0) break; /* Grapheme break required */
- }
-
- /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
- any number of Extend before a following E_Modifier. */
-
- if (rgb != ucp_gbExtend ||
- (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
- lgb = rgb;
-
- Feptr += len;
- }
+ Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
+ utf, NULL);
}
CHECK_PARTIAL();
}