[Pcre-svn] [858] code/trunk: Replace multiple copies of exte…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [858] code/trunk: Replace multiple copies of extended grapheme sequence code with a single
Revision: 858
          http://www.exim.org/viewvc/pcre2?view=rev&revision=858
Author:   ph10
Date:     2017-09-12 17:28:42 +0100 (Tue, 12 Sep 2017)
Log Message:
-----------
Replace multiple copies of extended grapheme sequence code with a single 
subroutine.


Modified Paths:
--------------
    code/trunk/CMakeLists.txt
    code/trunk/ChangeLog
    code/trunk/Makefile.am
    code/trunk/NON-AUTOTOOLS-BUILD
    code/trunk/PrepareRelease
    code/trunk/README
    code/trunk/src/pcre2_dfa_match.c
    code/trunk/src/pcre2_internal.h
    code/trunk/src/pcre2_match.c


Added Paths:
-----------
    code/trunk/src/pcre2_extuni.c


Modified: code/trunk/CMakeLists.txt
===================================================================
--- code/trunk/CMakeLists.txt    2017-09-12 11:41:31 UTC (rev 857)
+++ code/trunk/CMakeLists.txt    2017-09-12 16:28:42 UTC (rev 858)
@@ -432,6 +432,7 @@
   src/pcre2_convert.c
   src/pcre2_dfa_match.c
   src/pcre2_error.c
+  src/pcre2_extuni.c 
   src/pcre2_find_bracket.c
   src/pcre2_jit_compile.c
   src/pcre2_maketables.c


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2017-09-12 11:41:31 UTC (rev 857)
+++ code/trunk/ChangeLog    2017-09-12 16:28:42 UTC (rev 858)
@@ -5,9 +5,13 @@
 Version 10.31 xx-xxx-201x
 -------------------------


-1. Fix typo (missing ]) in VMS code in pcre2test.c.
+1. Fix typo (missing ]) in VMS code in pcre2test.c.

+2. Replace the replicated code for matching extended Unicode grapheme sequences
+(which got a lot more complicated by change 10.30/49) by a single subroutine
+that is called by both pcre2_match() and pcre2_dfa_match().

+
Version 10.30 14-August-2017
----------------------------


Modified: code/trunk/Makefile.am
===================================================================
--- code/trunk/Makefile.am    2017-09-12 11:41:31 UTC (rev 857)
+++ code/trunk/Makefile.am    2017-09-12 16:28:42 UTC (rev 858)
@@ -351,6 +351,7 @@
   src/pcre2_convert.c \
   src/pcre2_dfa_match.c \
   src/pcre2_error.c \
+  src/pcre2_extuni.c \
   src/pcre2_find_bracket.c \
   src/pcre2_internal.h \
   src/pcre2_intmodedep.h \


Modified: code/trunk/NON-AUTOTOOLS-BUILD
===================================================================
--- code/trunk/NON-AUTOTOOLS-BUILD    2017-09-12 11:41:31 UTC (rev 857)
+++ code/trunk/NON-AUTOTOOLS-BUILD    2017-09-12 16:28:42 UTC (rev 858)
@@ -91,8 +91,10 @@
        pcre2_compile.c
        pcre2_config.c
        pcre2_context.c
+       pcre2_convert.c 
        pcre2_dfa_match.c
        pcre2_error.c
+       pcre2_extuni.c 
        pcre2_find_bracket.c
        pcre2_jit_compile.c
        pcre2_maketables.c
@@ -377,4 +379,4 @@
 recommended download site.


=============================
-Last Updated: 17 March 2017
+Last Updated: 12 September 2017

Modified: code/trunk/PrepareRelease
===================================================================
--- code/trunk/PrepareRelease    2017-09-12 11:41:31 UTC (rev 857)
+++ code/trunk/PrepareRelease    2017-09-12 16:28:42 UTC (rev 858)
@@ -196,8 +196,10 @@
   src/pcre2_compile.c \
   src/pcre2_config.c \
   src/pcre2_context.c \
+  src/pcre2_convert.c \
   src/pcre2_dfa_match.c \
   src/pcre2_error.c \
+  src/pcre2_extuni.c \
   src/pcre2_find_bracket.c \
   src/pcre2_internal.h \
   src/pcre2_intmodedep.h \


Modified: code/trunk/README
===================================================================
--- code/trunk/README    2017-09-12 11:41:31 UTC (rev 857)
+++ code/trunk/README    2017-09-12 16:28:42 UTC (rev 858)
@@ -773,6 +773,7 @@
   src/pcre2_convert.c      )
   src/pcre2_dfa_match.c    )
   src/pcre2_error.c        )
+  src/pcre2_extuni.c       ) 
   src/pcre2_find_bracket.c )
   src/pcre2_jit_compile.c  )
   src/pcre2_jit_match.c    ) sources for the functions in the library,
@@ -882,4 +883,4 @@
 Philip Hazel
 Email local part: ph10
 Email domain: cam.ac.uk
-Last updated: 18 July 2017
+Last updated: 12 September 2017


Modified: code/trunk/src/pcre2_dfa_match.c
===================================================================
--- code/trunk/src/pcre2_dfa_match.c    2017-09-12 11:41:31 UTC (rev 857)
+++ code/trunk/src/pcre2_dfa_match.c    2017-09-12 16:28:42 UTC (rev 858)
@@ -1364,8 +1364,6 @@
       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
       if (clen > 0)
         {
-        uint32_t lgb, rgb;
-        PCRE2_SPTR nptr = ptr + clen;
         int ncount = 0;
         if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
           {
@@ -1372,55 +1370,8 @@
           active_count--;           /* Remove non-match possibility */
           next_active_state--;
           }
-        lgb = UCD_GRAPHBREAK(c);
-        while (nptr < end_subject)
-          {
-          dlen = 1;
-          if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
-          rgb = UCD_GRAPHBREAK(d);
-          if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
-
-          /* Not breaking between Regional Indicators is allowed only if
-          there are an even number of preceding RIs. */
-
-          if (lgb == ucp_gbRegionalIndicator &&
-              rgb == ucp_gbRegionalIndicator)
-            {
-            int ricount = 0;
-            PCRE2_SPTR bptr = nptr - 1;
-#ifdef SUPPORT_UNICODE
-            if (utf) BACKCHAR(bptr);
-#endif
-            /* bptr is pointing to the left-hand character */
-
-            while (bptr > mb->start_subject)
-              {
-              bptr--;
-#ifdef SUPPORT_UNICODE
-              if (utf)
-                {
-                BACKCHAR(bptr);
-                GETCHAR(d, bptr);
-                }
-              else
-#endif
-              d = *bptr;
-              if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
-              ricount++;
-              }
-            if ((ricount & 1) != 0) break;  /* Grapheme break required */
-            }
-
-          /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
-          any number of Extend before a following E_Modifier. */
-
-          if (rgb != ucp_gbExtend ||
-              (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
-            lgb = rgb;
-
-          ncount++;
-          nptr += dlen;
-          }
+        (void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf, 
+          &ncount);
         count++;
         ADD_NEW_DATA(-state_offset, count, ncount);
         }
@@ -1663,8 +1614,6 @@
       ADD_ACTIVE(state_offset + 2, 0);
       if (clen > 0)
         {
-        uint32_t lgb, rgb;
-        PCRE2_SPTR nptr = ptr + clen;
         int ncount = 0;
         if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
             codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
@@ -1672,55 +1621,8 @@
           active_count--;           /* Remove non-match possibility */
           next_active_state--;
           }
-        lgb = UCD_GRAPHBREAK(c);
-        while (nptr < end_subject)
-          {
-          dlen = 1;
-          if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
-          rgb = UCD_GRAPHBREAK(d);
-          if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
-
-          /* Not breaking between Regional Indicators is allowed only if
-          there are an even number of preceding RIs. */
-
-          if (lgb == ucp_gbRegionalIndicator &&
-              rgb == ucp_gbRegionalIndicator)
-            {
-            int ricount = 0;
-            PCRE2_SPTR bptr = nptr - 1;
-#ifdef SUPPORT_UNICODE
-            if (utf) BACKCHAR(bptr);
-#endif
-            /* bptr is pointing to the left-hand character */
-
-            while (bptr > mb->start_subject)
-              {
-              bptr--;
-#ifdef SUPPORT_UNICODE
-              if (utf)
-                {
-                BACKCHAR(bptr);
-                GETCHAR(d, bptr);
-                }
-              else
-#endif
-              d = *bptr;
-              if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
-              ricount++;
-              }
-            if ((ricount & 1) != 0) break;  /* Grapheme break required */
-            }
-
-          /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
-          any number of Extend before a following E_Modifier. */
-
-          if (rgb != ucp_gbExtend ||
-              (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
-            lgb = rgb;
-
-          ncount++;
-          nptr += dlen;
-          }
+        (void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf, 
+          &ncount);
         ADD_NEW_DATA(-(state_offset + count), 0, ncount);
         }
       break;
@@ -1973,8 +1875,7 @@
       count = current_state->count;  /* Number already matched */
       if (clen > 0)
         {
-        uint32_t lgb, rgb;
-        PCRE2_SPTR nptr = ptr + clen;
+        PCRE2_SPTR nptr;
         int ncount = 0;
         if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
           {
@@ -1981,55 +1882,8 @@
           active_count--;           /* Remove non-match possibility */
           next_active_state--;
           }
-        lgb = UCD_GRAPHBREAK(c);
-        while (nptr < end_subject)
-          {
-          dlen = 1;
-          if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
-          rgb = UCD_GRAPHBREAK(d);
-          if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
-
-          /* Not breaking between Regional Indicators is allowed only if
-          there are an even number of preceding RIs. */
-
-          if (lgb == ucp_gbRegionalIndicator &&
-              rgb == ucp_gbRegionalIndicator)
-            {
-            int ricount = 0;
-            PCRE2_SPTR bptr = nptr - 1;
-#ifdef SUPPORT_UNICODE
-            if (utf) BACKCHAR(bptr);
-#endif
-            /* bptr is pointing to the left-hand character */
-
-            while (bptr > mb->start_subject)
-              {
-              bptr--;
-#ifdef SUPPORT_UNICODE
-              if (utf)
-                {
-                BACKCHAR(bptr);
-                GETCHAR(d, bptr);
-                }
-              else
-#endif
-              d = *bptr;
-              if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
-              ricount++;
-              }
-            if ((ricount & 1) != 0) break;  /* Grapheme break required */
-            }
-
-          /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
-          any number of Extend before a following E_Modifier. */
-
-          if (rgb != ucp_gbExtend ||
-              (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
-            lgb = rgb;
-
-          ncount++;
-          nptr += dlen;
-          }
+        nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf, 
+          &ncount);
         if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
             reset_could_continue = TRUE;
         if (++count >= (int)GET2(code, 1))
@@ -2206,58 +2060,9 @@
       case OP_EXTUNI:
       if (clen > 0)
         {
-        uint32_t lgb, rgb;
-        PCRE2_SPTR nptr = ptr + clen;
         int ncount = 0;
-        lgb = UCD_GRAPHBREAK(c);
-        while (nptr < end_subject)
-          {
-          dlen = 1;
-          if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
-          rgb = UCD_GRAPHBREAK(d);
-          if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
-
-          /* Not breaking between Regional Indicators is allowed only if
-          there are an even number of preceding RIs. */
-
-          if (lgb == ucp_gbRegionalIndicator &&
-              rgb == ucp_gbRegionalIndicator)
-            {
-            int ricount = 0;
-            PCRE2_SPTR bptr = nptr - 1;
-#ifdef SUPPORT_UNICODE
-            if (utf) BACKCHAR(bptr);
-#endif
-            /* bptr is pointing to the left-hand character */
-
-            while (bptr > mb->start_subject)
-              {
-              bptr--;
-#ifdef SUPPORT_UNICODE
-              if (utf)
-                {
-                BACKCHAR(bptr);
-                GETCHAR(d, bptr);
-                }
-              else
-#endif
-              d = *bptr;
-              if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
-              ricount++;
-              }
-            if ((ricount & 1) != 0) break;  /* Grapheme break required */
-            }
-
-          /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
-          any number of Extend before a following E_Modifier. */
-
-          if (rgb != ucp_gbExtend ||
-              (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
-            lgb = rgb;
-
-          ncount++;
-          nptr += dlen;
-          }
+        PCRE2_SPTR nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject, 
+          end_subject, utf, &ncount);
         if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
             reset_could_continue = TRUE;
         ADD_NEW_DATA(-(state_offset + 1), 0, ncount);


Added: code/trunk/src/pcre2_extuni.c
===================================================================
--- code/trunk/src/pcre2_extuni.c                            (rev 0)
+++ code/trunk/src/pcre2_extuni.c    2017-09-12 16:28:42 UTC (rev 858)
@@ -0,0 +1,129 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2017 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* This module contains an internal function that is used to match a Unicode
+extended grapheme sequence. It is used by both pcre2_match() and
+pcre2_def_match(). */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "pcre2_internal.h"
+
+/*************************************************
+*      Match an extended grapheme sequence       *
+*************************************************/
+
+/* 
+Arguments:
+  c              the first character
+  eptr           pointer to next character
+  start_subject  pointer to start of subject
+  end_subject    pointer to end of subject 
+  utf            TRUE if in UTF mode
+  xcount         pointer to count of additional characters,
+                   or NULL if count not needed 
+
+Returns:         pointer after the end of the sequence
+*/
+
+PCRE2_SPTR
+PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
+  PCRE2_SPTR end_subject, BOOL utf, int *xcount)
+{
+int lgb = UCD_GRAPHBREAK(c);
+
+while (eptr < end_subject)
+  {
+  int rgb; 
+  int len = 1;
+  if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
+  rgb = UCD_GRAPHBREAK(c);
+  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
+
+  /* Not breaking between Regional Indicators is allowed only if there
+  are an even number of preceding RIs. */
+
+  if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
+    {
+    int ricount = 0;
+    PCRE2_SPTR bptr = eptr - 1;
+#ifdef SUPPORT_UNICODE
+    if (utf) BACKCHAR(bptr);
+#endif
+
+    /* bptr is pointing to the left-hand character */
+
+    while (bptr > start_subject)
+      {
+      bptr--;
+#ifdef SUPPORT_UNICODE
+      if (utf)
+        {
+        BACKCHAR(bptr);
+        GETCHAR(c, bptr);
+        }
+      else
+#endif
+      c = *bptr;
+      if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
+      ricount++;
+      }
+    if ((ricount & 1) != 0) break;  /* Grapheme break required */
+    }
+
+  /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
+  any number of Extend before a following E_Modifier. */
+
+  if (rgb != ucp_gbExtend ||
+      (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
+    lgb = rgb;
+
+  eptr += len;
+  if (xcount != NULL) *xcount += 1; 
+  }
+
+return eptr;
+}
+
+/* End of pcre2_extuni.c */


Modified: code/trunk/src/pcre2_internal.h
===================================================================
--- code/trunk/src/pcre2_internal.h    2017-09-12 11:41:31 UTC (rev 857)
+++ code/trunk/src/pcre2_internal.h    2017-09-12 16:28:42 UTC (rev 858)
@@ -1926,6 +1926,7 @@


 #define _pcre2_auto_possessify       PCRE2_SUFFIX(_pcre2_auto_possessify_)
 #define _pcre2_check_escape          PCRE2_SUFFIX(_pcre2_check_escape_)
+#define _pcre2_extuni                PCRE2_SUFFIX(_pcre2_extuni_)
 #define _pcre2_find_bracket          PCRE2_SUFFIX(_pcre2_find_bracket_)
 #define _pcre2_is_newline            PCRE2_SUFFIX(_pcre2_is_newline_)
 #define _pcre2_jit_free_rodata       PCRE2_SUFFIX(_pcre2_jit_free_rodata_)
@@ -1949,6 +1950,8 @@
                       const compile_block *);
 extern int          _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *,
                       int *, uint32_t, BOOL, compile_block *);
+extern PCRE2_SPTR   _pcre2_extuni(uint32_t, PCRE2_SPTR, PCRE2_SPTR, PCRE2_SPTR,
+                      BOOL, int *);
 extern PCRE2_SPTR   _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
 extern BOOL         _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
                       uint32_t *, BOOL);


Modified: code/trunk/src/pcre2_match.c
===================================================================
--- code/trunk/src/pcre2_match.c    2017-09-12 11:41:31 UTC (rev 857)
+++ code/trunk/src/pcre2_match.c    2017-09-12 16:28:42 UTC (rev 858)
@@ -2440,55 +2440,9 @@
       }
     else
       {
-      int lgb, rgb;
       GETCHARINCTEST(fc, Feptr);
-      lgb = UCD_GRAPHBREAK(fc);
-      while (Feptr < mb->end_subject)
-        {
-        int len = 1;
-        if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
-        rgb = UCD_GRAPHBREAK(fc);
-        if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
-
-        /* Not breaking between Regional Indicators is allowed only if there
-        are an even number of preceding RIs. */
-
-        if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
-          {
-          int ricount = 0;
-          PCRE2_SPTR bptr = Feptr - 1;
-#ifdef SUPPORT_UNICODE
-          if (utf) BACKCHAR(bptr);
-#endif
-          /* bptr is pointing to the left-hand character */
-
-          while (bptr > mb->start_subject)
-            {
-            bptr--;
-#ifdef SUPPORT_UNICODE
-            if (utf)
-              {
-              BACKCHAR(bptr);
-              GETCHAR(fc, bptr);
-              }
-            else
-#endif
-            fc = *bptr;
-            if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
-            ricount++;
-            }
-          if ((ricount & 1) != 0) break;  /* Grapheme break required */
-          }
-
-        /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
-        any number of Extend before a following E_Modifier. */
-
-        if (rgb != ucp_gbExtend ||
-            (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
-          lgb = rgb;
-
-        Feptr += len;
-        }
+      Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, utf, 
+        NULL);
       }
     CHECK_PARTIAL();
     Fecode++;
@@ -2785,61 +2739,13 @@
             }
           else
             {
-            int lgb, rgb;
             GETCHARINCTEST(fc, Feptr);
-            lgb = UCD_GRAPHBREAK(fc);
-            while (Feptr < mb->end_subject)
-              {
-              int len = 1;
-              if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
-              rgb = UCD_GRAPHBREAK(fc);
-              if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
-
-              /* Not breaking between Regional Indicators is allowed only if
-              there are an even number of preceding RIs. */
-
-              if (lgb == ucp_gbRegionalIndicator &&
-                  rgb == ucp_gbRegionalIndicator)
-                {
-                int ricount = 0;
-                PCRE2_SPTR bptr = Feptr - 1;
-#ifdef SUPPORT_UNICODE
-                if (utf) BACKCHAR(bptr);
-#endif
-                /* bptr is pointing to the left-hand character */
-
-                while (bptr > mb->start_subject)
-                  {
-                  bptr--;
-#ifdef SUPPORT_UNICODE
-                  if (utf)
-                    {
-                    BACKCHAR(bptr);
-                    GETCHAR(fc, bptr);
-                    }
-                  else
-#endif
-                  fc = *bptr;
-                  if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
-                  ricount++;
-                  }
-                if ((ricount & 1) != 0) break;  /* Grapheme break required */
-                }
-
-              /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
-              any number of Extend before a following E_Modifier. */
-
-              if (rgb != ucp_gbExtend ||
-                  (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
-                lgb = rgb;
-
-              Feptr += len;
-              }
+            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, 
+              mb->end_subject, utf, NULL);
             }
           CHECK_PARTIAL();
           }
         }
-
       else
 #endif     /* SUPPORT_UNICODE */


@@ -3593,56 +3499,9 @@
             }
           else
             {
-            int lgb, rgb;
             GETCHARINCTEST(fc, Feptr);
-            lgb = UCD_GRAPHBREAK(fc);
-            while (Feptr < mb->end_subject)
-              {
-              int len = 1;
-              if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
-              rgb = UCD_GRAPHBREAK(fc);
-              if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
-
-              /* Not breaking between Regional Indicators is allowed only if
-              there are an even number of preceding RIs. */
-
-              if (lgb == ucp_gbRegionalIndicator &&
-                  rgb == ucp_gbRegionalIndicator)
-                {
-                int ricount = 0;
-                PCRE2_SPTR bptr = Feptr - 1;
-#ifdef SUPPORT_UNICODE
-                if (utf) BACKCHAR(bptr);
-#endif
-                /* bptr is pointing to the left-hand character */
-
-                while (bptr > mb->start_subject)
-                  {
-                  bptr--;
-#ifdef SUPPORT_UNICODE
-                  if (utf)
-                    {
-                    BACKCHAR(bptr);
-                    GETCHAR(fc, bptr);
-                    }
-                  else
-#endif
-                  fc = *bptr;
-                  if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
-                  ricount++;
-                  }
-                if ((ricount & 1) != 0) break;  /* Grapheme break required */
-                }
-
-              /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
-              any number of Extend before a following E_Modifier. */
-
-              if (rgb != ucp_gbExtend ||
-                  (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
-                lgb = rgb;
-
-              Feptr += len;
-              }
+            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
+              utf, NULL);
             }
           CHECK_PARTIAL();
           }
@@ -4167,56 +4026,9 @@
             }
           else
             {
-            int lgb, rgb;
             GETCHARINCTEST(fc, Feptr);
-            lgb = UCD_GRAPHBREAK(fc);
-            while (Feptr < mb->end_subject)
-              {
-              int len = 1;
-              if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
-              rgb = UCD_GRAPHBREAK(fc);
-              if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
-
-              /* Not breaking between Regional Indicators is allowed only if
-              there are an even number of preceding RIs. */
-
-              if (lgb == ucp_gbRegionalIndicator &&
-                  rgb == ucp_gbRegionalIndicator)
-                {
-                int ricount = 0;
-                PCRE2_SPTR bptr = Feptr - 1;
-#ifdef SUPPORT_UNICODE
-                if (utf) BACKCHAR(bptr);
-#endif
-                /* bptr is pointing to the left-hand character */
-
-                while (bptr > mb->start_subject)
-                  {
-                  bptr--;
-#ifdef SUPPORT_UNICODE
-                  if (utf)
-                    {
-                    BACKCHAR(bptr);
-                    GETCHAR(fc, bptr);
-                    }
-                  else
-#endif
-                  fc = *bptr;
-                  if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
-                  ricount++;
-                  }
-                if ((ricount & 1) != 0) break;  /* Grapheme break required */
-                }
-
-              /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
-              any number of Extend before a following E_Modifier. */
-
-              if (rgb != ucp_gbExtend ||
-                  (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
-                lgb = rgb;
-
-              Feptr += len;
-              }
+            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
+              utf, NULL);
             }
           CHECK_PARTIAL();
           }