[Pcre-svn] [905] code/trunk: Support the new EXTUNI in JIT.

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [905] code/trunk: Support the new EXTUNI in JIT.
Revision: 905
          http://www.exim.org/viewvc/pcre2?view=rev&revision=905
Author:   zherczeg
Date:     2018-01-06 08:48:11 +0000 (Sat, 06 Jan 2018)
Log Message:
-----------
Support the new EXTUNI in JIT.


Modified Paths:
--------------
    code/trunk/src/pcre2_jit_compile.c
    code/trunk/testdata/testinput5
    code/trunk/testdata/testoutput5


Modified: code/trunk/src/pcre2_jit_compile.c
===================================================================
--- code/trunk/src/pcre2_jit_compile.c    2018-01-05 09:30:45 UTC (rev 904)
+++ code/trunk/src/pcre2_jit_compile.c    2018-01-06 08:48:11 UTC (rev 905)
@@ -7207,6 +7207,122 @@
 return cc;
 }


+#ifdef SUPPORT_UNICODE
+
+#if PCRE2_CODE_UNIT_WIDTH != 32
+
+static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(PCRE2_SPTR cc, jit_arguments *args)
+{
+PCRE2_SPTR start_subject = args->begin;
+PCRE2_SPTR end_subject = args->end;
+int lgb, rgb, len, ricount;
+PCRE2_SPTR prevcc, bptr;
+uint32_t c;
+
+prevcc = cc;
+GETCHARINC(c, cc);
+lgb = UCD_GRAPHBREAK(c);
+
+while (cc < end_subject)
+  {
+  len = 1;
+  GETCHARLEN(c, cc, len);
+  rgb = UCD_GRAPHBREAK(c);
+
+  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
+
+  /* Not breaking between Regional Indicators is allowed only if there
+  are an even number of preceding RIs. */
+
+  if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
+    {
+    ricount = 0;
+    bptr = prevcc;
+
+    /* bptr is pointing to the left-hand character */
+    while (bptr > start_subject)
+      {
+      bptr--;
+      BACKCHAR(bptr);
+      GETCHAR(c, bptr);
+
+      if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
+
+      ricount++;
+      }
+
+    if ((ricount & 1) != 0) break;  /* Grapheme break required */
+    }
+
+  /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
+  any number of Extend before a following E_Modifier. */
+
+  if (rgb != ucp_gbExtend || (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
+    lgb = rgb;
+
+  prevcc = cc;
+  cc += len;
+  }
+
+return cc;
+}
+
+#endif
+
+static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(PCRE2_SPTR cc, jit_arguments *args)
+{
+PCRE2_SPTR start_subject = args->begin;
+PCRE2_SPTR end_subject = args->end;
+int lgb, rgb, ricount;
+PCRE2_SPTR bptr;
+uint32_t c;
+
+GETCHARINC(c, cc);
+lgb = UCD_GRAPHBREAK(c);
+
+while (cc < end_subject)
+  {
+  c = *cc;
+  rgb = UCD_GRAPHBREAK(c);
+
+  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
+
+  /* Not breaking between Regional Indicators is allowed only if there
+  are an even number of preceding RIs. */
+
+  if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
+    {
+    ricount = 0;
+    bptr = cc - 1;
+
+    /* bptr is pointing to the left-hand character */
+    while (bptr > start_subject)
+      {
+      bptr--;
+      c = *bptr;
+
+      if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
+
+      ricount++;
+      }
+
+    if ((ricount & 1) != 0) break;  /* Grapheme break required */
+    }
+
+  /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
+  any number of Extend before a following E_Modifier. */
+
+  if (rgb != ucp_gbExtend || (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
+    lgb = rgb;
+
+  cc++;
+  }
+
+return cc;
+}
+
+#endif
+
 static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
 {
 DEFINE_COMPILER;
@@ -7216,7 +7332,6 @@
 struct sljit_jump *jump[3];
 jump_list *end_list;
 #ifdef SUPPORT_UNICODE
-struct sljit_label *label;
 PCRE2_UCHAR propdata[5];
 #endif /* SUPPORT_UNICODE */


@@ -7383,35 +7498,24 @@
   case OP_EXTUNI:
   if (check_str_ptr)
     detect_partial_match(common, backtracks);
-  read_char(common);
-  add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
-  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
-  /* Optimize register allocation: use a real register. */
+
+  OP1(SLJIT_MOV, SLJIT_R0, 0, STR_PTR, 0);
   OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
-  OP1(SLJIT_MOV_U8, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
+  OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);


-  label = LABEL();
-  jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
-  OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
-  read_char(common);
-  add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
-  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
-  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
+#if PCRE2_CODE_UNIT_WIDTH != 32
+  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
+      common->utf ? SLJIT_FUNC_OFFSET(do_extuni_utf) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
+#else
+  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_extuni_no_utf));
+#endif


- OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
- OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
- OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
- OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
- JUMPTO(SLJIT_NOT_ZERO, label);
-
- OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
- JUMPHERE(jump[0]);
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);

   if (common->mode == PCRE2_JIT_PARTIAL_HARD)
     {
-    jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
+    jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
     /* Since we successfully read a char above, partial matching must occure. */
     check_partial(common, TRUE);
     JUMPHERE(jump[0]);


Modified: code/trunk/testdata/testinput5
===================================================================
--- code/trunk/testdata/testinput5    2018-01-05 09:30:45 UTC (rev 904)
+++ code/trunk/testdata/testinput5    2018-01-06 08:48:11 UTC (rev 905)
@@ -2053,13 +2053,13 @@
     \x{200d}\x{1F3A4}B             ZWJ Glue_After_ZWJ
     \x{200d}\x{1F469}B             ZWJ E_Base_GAZ  
     \x{1F1E6}\x{1F1E7}B            RegionalIndicator RegionalIndicator 
-    \x{261D}\x{E0100}\x{1F3FB}B\=no_jit    E_Base Extend E_Modifier
-    
+    \x{261D}\x{E0100}\x{1F3FB}B    E_Base Extend E_Modifier
+
 # Regional indicators


 /^(\X)(\X)/utf,aftertext
-    \x{1F1E6}\x{1F1E7}\x{1F1E7}B\=no_jit
-    \x{1F1E6}\x{1F1E7}\x{1F1E7}\x{1F1E6}B\=no_jit
+    \x{1F1E6}\x{1F1E7}\x{1F1E7}B
+    \x{1F1E6}\x{1F1E7}\x{1F1E7}\x{1F1E6}B



# End of testinput5

Modified: code/trunk/testdata/testoutput5
===================================================================
--- code/trunk/testdata/testoutput5    2018-01-05 09:30:45 UTC (rev 904)
+++ code/trunk/testdata/testoutput5    2018-01-06 08:48:11 UTC (rev 905)
@@ -4685,18 +4685,18 @@
  0: \x{200d}\x{1f469}
     \x{1F1E6}\x{1F1E7}B            RegionalIndicator RegionalIndicator 
  0: \x{1f1e6}\x{1f1e7}
-    \x{261D}\x{E0100}\x{1F3FB}B\=no_jit    E_Base Extend E_Modifier
-** /n is not valid here
-    
+    \x{261D}\x{E0100}\x{1F3FB}B    E_Base Extend E_Modifier
+ 0: \x{261d}\x{e0100}\x{1f3fb}
+
 # Regional indicators


 /^(\X)(\X)/utf,aftertext
-    \x{1F1E6}\x{1F1E7}\x{1F1E7}B\=no_jit
+    \x{1F1E6}\x{1F1E7}\x{1F1E7}B
  0: \x{1f1e6}\x{1f1e7}\x{1f1e7}
  0+ B
  1: \x{1f1e6}\x{1f1e7}
  2: \x{1f1e7}
-    \x{1F1E6}\x{1F1E7}\x{1F1E7}\x{1F1E6}B\=no_jit
+    \x{1F1E6}\x{1F1E7}\x{1F1E7}\x{1F1E6}B
  0: \x{1f1e6}\x{1f1e7}\x{1f1e7}\x{1f1e6}
  0+ B
  1: \x{1f1e6}\x{1f1e7}