[Pcre-svn] [1093] code/trunk/src: Rework word boundary in JI…

Page principale
Supprimer ce message
Auteur: Subversion repository
Date:  
À: pcre-svn
Sujet: [Pcre-svn] [1093] code/trunk/src: Rework word boundary in JIT.
Revision: 1093
          http://www.exim.org/viewvc/pcre2?view=rev&revision=1093
Author:   zherczeg
Date:     2019-05-23 08:46:10 +0100 (Thu, 23 May 2019)
Log Message:
-----------
Rework word boundary in JIT.


Modified Paths:
--------------
    code/trunk/src/pcre2_jit_compile.c
    code/trunk/src/pcre2_jit_test.c


Modified: code/trunk/src/pcre2_jit_compile.c
===================================================================
--- code/trunk/src/pcre2_jit_compile.c    2019-05-13 16:38:18 UTC (rev 1092)
+++ code/trunk/src/pcre2_jit_compile.c    2019-05-23 07:46:10 UTC (rev 1093)
@@ -3898,9 +3898,9 @@


static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
{
-/* Goes one character back. TMP2 must contain the start of
-the subject buffer. Affects STR_PTR and TMP1. Does not modify
-STR_PTR for invalid character sequences. */
+/* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
+TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
+and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
DEFINE_COMPILER;

SLJIT_UNUSED_ARG(backtracks);
@@ -4440,7 +4440,7 @@

static void do_utfpeakcharback(compiler_common *common)
{
-/* Peak a character back. */
+/* Peak a character back. Does not modify STR_PTR. */
DEFINE_COMPILER;
struct sljit_jump *jump[2];

@@ -4477,7 +4477,7 @@

static void do_utfpeakcharback_invalid(compiler_common *common)
{
-/* Peak a character back. */
+/* Peak a character back. Does not modify STR_PTR. */
DEFINE_COMPILER;
sljit_s32 i;
sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
@@ -4705,7 +4705,7 @@

static void do_utfpeakcharback_invalid(compiler_common *common)
{
-/* Peak a character back. */
+/* Peak a character back. Does not modify STR_PTR. */
DEFINE_COMPILER;
struct sljit_jump *jump;
struct sljit_jump *exit_invalid[3];
@@ -4819,18 +4819,12 @@
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);

-// PH hacking
-//fprintf(stderr, "~~A\n");
- OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
-
+/* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
+OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
+OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
+OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);

- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 0);
-
-// OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
}

@@ -6770,7 +6764,11 @@
DEFINE_COMPILER;
struct sljit_jump *skipread;
jump_list *skipread_list = NULL;
-jump_list *invalid_utf = NULL;
+#ifdef SUPPORT_UNICODE
+struct sljit_label *valid_utf;
+jump_list *invalid_utf1 = NULL;
+#endif /* SUPPORT_UNICODE */
+jump_list *invalid_utf2 = NULL;
#if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
struct sljit_jump *jump;
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
@@ -6784,14 +6782,30 @@
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);

-if (common->mode == PCRE2_JIT_COMPLETE)
-  peek_char_back(common, READ_CHAR_MAX, &invalid_utf);
+#ifdef SUPPORT_UNICODE
+if (common->invalid_utf)
+  {
+  peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
+
+  if (common->mode != PCRE2_JIT_COMPLETE)
+    {
+    OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
+    move_back(common, NULL, TRUE);
+    check_start_used_ptr(common);
+    OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
+    }
+  }
 else
+#endif /* SUPPORT_UNICODE */
   {
-  move_back(common, &invalid_utf, FALSE);
-  check_start_used_ptr(common);
-  /* No need precise read since match fails anyway. */
-  read_char(common, 0, READ_CHAR_MAX, &invalid_utf, READ_CHAR_UPDATE_STR_PTR);
+  if (common->mode == PCRE2_JIT_COMPLETE)
+    peek_char_back(common, READ_CHAR_MAX, NULL);
+  else
+    {
+    move_back(common, NULL, TRUE);
+    check_start_used_ptr(common);
+    read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
+    }
   }


/* Testing char type. */
@@ -6835,10 +6849,13 @@

OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
check_str_end(common, &skipread_list);
-peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf);
+peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);

/* Testing char type. This is a code duplication. */
#ifdef SUPPORT_UNICODE
+
+valid_utf = LABEL();
+
if (common->use_ucp)
{
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
@@ -6884,13 +6901,19 @@
#ifdef SUPPORT_UNICODE
if (common->invalid_utf)
{
- SLJIT_ASSERT(invalid_utf != NULL);
+ set_jumps(invalid_utf1, LABEL());

-  set_jumps(invalid_utf, LABEL());
+  peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
+  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
+
   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
   sljit_emit_fast_return(compiler, TMP1, 0);
-  return;
+
+  set_jumps(invalid_utf2, LABEL());
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+  OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
+  sljit_emit_fast_return(compiler, TMP1, 0);
   }
 #endif /* SUPPORT_UNICODE */
 }
@@ -8224,9 +8247,7 @@
 #ifdef SUPPORT_UNICODE
   if (common->invalid_utf)
     {
-    OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_SIG_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
-    add_jump(compiler, backtracks, JUMP(SLJIT_SIG_LESS));
-    add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
+    add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
     return cc;
     }
 #endif /* SUPPORT_UNICODE */


Modified: code/trunk/src/pcre2_jit_test.c
===================================================================
--- code/trunk/src/pcre2_jit_test.c    2019-05-13 16:38:18 UTC (rev 1092)
+++ code/trunk/src/pcre2_jit_test.c    2019-05-23 07:46:10 UTC (rev 1093)
@@ -1864,53 +1864,52 @@
     { UDA, CI, 0, 0, 0, 0, 1, { ".", NULL }, "\x7f" },


     { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf4\x8f\xbf\xbf#" },
-    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80#" },
-    { UDA, CPI, 4, 1, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf#" },
+    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80\xf4\xa0\x80\x80" },
+    { UDA, CPI, 4, 1, 1, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf" },
     { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xef\xbf\xbf#" },
     { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xe0\xa0\x80#" },
     { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf0\x90\x80\x80#" },
     { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf3\xbf\xbf\xbf#" },
-    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf#" },
-    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80#" },
-    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80#" },
-    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff#" },
-    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf#" },
-    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80#" },
-    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80#" },
-    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf#" },
-    { UDA, CPI, 4, 2, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80#" },
-    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xf0\x80\x80#" },
-    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xed\xa0\x80#" },
+    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf\xf0\x8f\xbf\xbf" },
+    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80\xf5\x80\x80\x80" },
+    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80\xf4\x90\x80\x80" },
+    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff\xf4\x8f\xbf\xff" },
+    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf\xf4\x8f\xff\xbf" },
+    { UDA, CPI, 4, 0, 1, -1, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80\xef\x80\x80" },
+    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80\x80\x80\x80\x80" },
+    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf\xe0\x9f\xbf#" },
+    { UDA, CPI, 4, 2, 2, -1, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80\xe0\xa0\x80#" },
+    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xf0\x80\x80\xf0\x80\x80#" },
+    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xed\xa0\x80\xed\xa0\x80#" },
     { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xdf\xbf#" },
     { UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xdf\xbf#" },
     { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xc2\x80#" },
     { UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xc2\x80#" },
-    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xc1\xbf#" },
-    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xdf\xc0#" },
-    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80#" },
-    { UDA, CPI, 4, 2, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80#" },
+    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xc1\xbf\xc1\xbf##" },
+    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xdf\xc0\xdf\xc0##" },
+    { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80\xe0\x80##" },


     { UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xef\xbf\xbf#" },
     { UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xe0\xa0\x80#" },
-    { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf#" },
-    { UDA, CPI, 3, 1, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xbf#" },
-    { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\x80\x80#" },
-    { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xff#" },
-    { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xff\xbf#" },
-    { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xed\xbf\xbf#" },
+    { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf\xe0\x9f\xbf" },
+    { UDA, CPI, 3, 1, 1, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xbf\xef\xbf\xbf" },
+    { UDA, CPI, 3, 0, 1, -1, -1, { "\\B", "\\b" }, "\xdf\x80\x80\xdf\x80" },
+    { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xff\xef\xbf\xff" },
+    { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xff\xbf\xef\xff\xbf" },
+    { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xed\xbf\xbf\xed\xbf\xbf" },


     { UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xdf\xbf#" },
     { UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xc2\x80#" },
-    { UDA, CPI, 2, 1, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xbf#" },
-    { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xc1\xbf#" },
-    { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x80#" },
-    { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xff#" },
-    { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xff\xbf#" },
+    { UDA, CPI, 2, 1, 1, -1, -1, { "\\B", "\\b" }, "\xdf\xbf\xdf\xbf" },
+    { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xc1\xbf\xc1\xbf" },
+    { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x80\xe0\x80" },
+    { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xff\xdf\xff" },
+    { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xff\xbf\xff\xbf" },


     { UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x7f#" },
     { UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x01#" },
-    { UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80#" },
-    { UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80#" },
+    { UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80" },
+    { UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\xb0\xb0" },


     { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { "(.)\\1", NULL }, "aA" },
     { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "a\xff" },
@@ -2107,10 +2106,10 @@
 static PCRE2_UCHAR16 afternl16[] = { '^', '\\', 'W', 0 };
 static PCRE2_UCHAR16 generic16[] = { '#', 0xd800, 0xdc00, '#', 0 };
 static PCRE2_UCHAR16 test16_1[] = { 0xd7ff, 0xe000, 0xffff, 0x01, '#', 0 };
-static PCRE2_UCHAR16 test16_2[] = { 0xd800, 0xdc00, '#', 0 };
-static PCRE2_UCHAR16 test16_3[] = { 0xdbff, 0xdfff, '#', 0 };
-static PCRE2_UCHAR16 test16_4[] = { 0xd800, 0xdbff, '#', 0 };
-static PCRE2_UCHAR16 test16_5[] = { '#', 0xd800, '#', 0 };
+static PCRE2_UCHAR16 test16_2[] = { 0xd800, 0xdc00, 0xd800, 0xdc00, 0 };
+static PCRE2_UCHAR16 test16_3[] = { 0xdbff, 0xdfff, 0xdbff, 0xdfff, 0 };
+static PCRE2_UCHAR16 test16_4[] = { 0xd800, 0xdbff, 0xd800, 0xdbff, 0 };
+static PCRE2_UCHAR16 test16_5[] = { '#', 0xd800, 0xdc00, '#', 0 };
 static PCRE2_UCHAR16 test16_6[] = { 'a', 'A', 0xdc28, 0 };
 static PCRE2_UCHAR16 test16_7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 };
 static PCRE2_UCHAR16 test16_8[] = { '#', 0xd800, 0xdc00, 0 };
@@ -2125,10 +2124,10 @@
     { UDA, CI, 2, 0, 0, 2, 3, { allany16, NULL }, test16_1 },
     { UDA, CI, 3, 0, 0, 3, 4, { allany16, NULL }, test16_1 },
     { UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_2 },
-    { UDA, CI, 0, 0, 2, -1, -1, { allany16, NULL }, test16_2 },
+    { UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_2 },
     { UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_2 },
     { UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_3 },
-    { UDA, CI, 0, 0, 2, -1, -1, { allany16, NULL }, test16_3 },
+    { UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_3 },
     { UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_3 },


     { UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary16, NULL }, test16_1 },
@@ -2137,8 +2136,8 @@
     { UDA, CPI, 4, 0, 0, 4, 4, { non_word_boundary16, NULL }, test16_1 },
     { UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_2 },
     { UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_3 },
-    { UDA, CPI, 2, 1, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_2 },
-    { UDA, CPI, 2, 1, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_3 },
+    { UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_2 },
+    { UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_3 },
     { UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_4 },
     { UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_5 },


@@ -2313,8 +2312,8 @@
 static PCRE2_UCHAR32 grapheme32[] = { '\\', 'X', 0 };
 static PCRE2_UCHAR32 nothashmark32[] = { '[', '^', '#', ']', 0 };
 static PCRE2_UCHAR32 afternl32[] = { '^', '\\', 'W', 0 };
-static PCRE2_UCHAR32 test32_1[] = { 0x10ffff, 0x10ffff, 0x110000, 0x10ffff, 0 };
-static PCRE2_UCHAR32 test32_2[] = { 0xd7ff, 0xe000, 0xd800, 0xdfff, 0xe000, 0 };
+static PCRE2_UCHAR32 test32_1[] = { 0x10ffff, 0x10ffff, 0x110000, 0x110000, 0x10ffff, 0 };
+static PCRE2_UCHAR32 test32_2[] = { 0xd7ff, 0xe000, 0xd800, 0xdfff, 0xe000, 0xdfff, 0xd800, 0 };
 static PCRE2_UCHAR32 test32_3[] = { 'a', 'A', 0x110000, 0 };
 static PCRE2_UCHAR32 test32_4[] = { '#', 0x10ffff, 0x110000, 0 };
 static PCRE2_UCHAR32 test32_5[] = { ' ', 0x2028, '#', 0 };
@@ -2329,11 +2328,10 @@
     { UDA, CI, 3, 0, 0, -1, -1, { allany32, NULL }, test32_2 },


     { UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_1 },
-    { UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 },
     { UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 },
     { UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_2 },
-    { UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
-    { UDA, CPI, 4, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
+    { UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
+    { UDA, CPI, 6, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },


     { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference32, NULL }, test32_3 },
     { UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference32, NULL }, test32_3 },