[Pcre-svn] [225] code/trunk: Add string offset within the pa…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [225] code/trunk: Add string offset within the pattern to the data passed to a callout with a
Revision: 225
          http://www.exim.org/viewvc/pcre2?view=rev&revision=225
Author:   ph10
Date:     2015-03-14 12:20:18 +0000 (Sat, 14 Mar 2015)


Log Message:
-----------
Add string offset within the pattern to the data passed to a callout with a
string argument.

Modified Paths:
--------------
    code/trunk/src/pcre2.h.in
    code/trunk/src/pcre2_compile.c
    code/trunk/src/pcre2_dfa_match.c
    code/trunk/src/pcre2_jit_compile.c
    code/trunk/src/pcre2_match.c
    code/trunk/src/pcre2_printint.c
    code/trunk/src/pcre2test.c
    code/trunk/testdata/testoutput2
    code/trunk/testdata/testoutput6


Modified: code/trunk/src/pcre2.h.in
===================================================================
--- code/trunk/src/pcre2.h.in    2015-03-14 10:47:43 UTC (rev 224)
+++ code/trunk/src/pcre2.h.in    2015-03-14 12:20:18 UTC (rev 225)
@@ -338,6 +338,7 @@
   PCRE2_SIZE    pattern_position;  /* Offset to next item in the pattern */ \
   PCRE2_SIZE    next_item_length;  /* Length of next item in the pattern */ \
   /* ------------------- Added for Version 1 -------------------------- */ \
+  PCRE2_SIZE    callout_string_offset; /* Offset to string within pattern */ \
   PCRE2_SPTR    callout_string;    /* String compiled into pattern */ \
   uint32_t      callout_string_length; /* Length of string compiled into pattern */ \
   /* ------------------------------------------------------------------ */ \


Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c    2015-03-14 10:47:43 UTC (rev 224)
+++ code/trunk/src/pcre2_compile.c    2015-03-14 12:20:18 UTC (rev 225)
@@ -5652,17 +5652,19 @@
             for the terminating zero. Any doubled delimiters within the string
             make this an overestimate, but it is not worth bothering about. */


-            (*lengthptr) += (ptr - start) + 2 + (1 + 3*LINK_SIZE);
+            (*lengthptr) += (ptr - start) + 2 + (1 + 4*LINK_SIZE);
             }


           /* In the real compile we can copy the string, knowing that it is
           syntactically OK. The starting delimiter is included so that the
-          client can discover it if they want. */
+          client can discover it if they want. We also pass the start offset to 
+          help a script language give better error messages. */


           else
             {
-            PCRE2_UCHAR *callout_string = code + (1 + 3*LINK_SIZE);
+            PCRE2_UCHAR *callout_string = code + (1 + 4*LINK_SIZE);
             *callout_string++ = *ptr++;
+            PUT(code, 1 + 3*LINK_SIZE, (int)(ptr - cb->start_pattern)); /* Start offset */
             for(;;)
               {
               if (*ptr == delimiter)
@@ -7302,7 +7304,7 @@
      scode += 1 + LINK_SIZE;


      if (*scode == OP_CALLOUT) scode += PRIV(OP_lengths)[OP_CALLOUT];
-     else if (*scode == OP_CALLOUT_STR) scode += GET(scode, 1 + 2*LINK_SIZE);
+       else if (*scode == OP_CALLOUT_STR) scode += GET(scode, 1 + 2*LINK_SIZE);


      switch (*scode)
        {


Modified: code/trunk/src/pcre2_dfa_match.c
===================================================================
--- code/trunk/src/pcre2_dfa_match.c    2015-03-14 10:47:43 UTC (rev 224)
+++ code/trunk/src/pcre2_dfa_match.c    2015-03-14 12:20:18 UTC (rev 225)
@@ -2631,15 +2631,17 @@
             if (code[LINK_SIZE + 1] == OP_CALLOUT)
               {
               cb.callout_number = code[2 + 3*LINK_SIZE];
+              cb.callout_string_offset = 0; 
               cb.callout_string = NULL;
               cb.callout_string_length = 0;
               }
             else
               {
               cb.callout_number = 0;
-              cb.callout_string = code + (2 + 4*LINK_SIZE) + 1;
+              cb.callout_string_offset = GET(code, 2 + 4*LINK_SIZE); 
+              cb.callout_string = code + (2 + 5*LINK_SIZE) + 1;
               cb.callout_string_length =
-                callout_length - (1 + 3*LINK_SIZE) - 2;
+                callout_length - (1 + 4*LINK_SIZE) - 2;
               }


             if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)
@@ -2997,15 +2999,17 @@
           if (*code == OP_CALLOUT)
             {
             cb.callout_number = code[1 + 2*LINK_SIZE];
+            cb.callout_string_offset = 0; 
             cb.callout_string = NULL;
             cb.callout_string_length = 0;
             }
           else
             {
             cb.callout_number = 0;
-            cb.callout_string = code + (1 + 3*LINK_SIZE) + 1;
+            cb.callout_string_offset = GET(code, 1 + 3*LINK_SIZE); 
+            cb.callout_string = code + (1 + 4*LINK_SIZE) + 1;
             cb.callout_string_length =
-              callout_length - (1 + 3*LINK_SIZE) - 2;
+              callout_length - (1 + 4*LINK_SIZE) - 2;
             }


           if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)


Modified: code/trunk/src/pcre2_jit_compile.c
===================================================================
--- code/trunk/src/pcre2_jit_compile.c    2015-03-14 10:47:43 UTC (rev 224)
+++ code/trunk/src/pcre2_jit_compile.c    2015-03-14 12:20:18 UTC (rev 225)
@@ -6346,6 +6346,7 @@
     ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
 sljit_sw value1;
 sljit_sw value2;
+sljit_sw value3;


PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);

@@ -6373,15 +6374,18 @@
{
value1 = 0;
value2 = 0;
+ value3 = 0;
}
else
{
- value1 = (sljit_sw) (cc + (1 + 3*LINK_SIZE) + 1);
- value2 = (callout_length - (1 + 3*LINK_SIZE + 2));
+ value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
+ value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
+ value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
}

OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
+OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);

/* Needed to save important temporary registers. */

Modified: code/trunk/src/pcre2_match.c
===================================================================
--- code/trunk/src/pcre2_match.c    2015-03-14 10:47:43 UTC (rev 224)
+++ code/trunk/src/pcre2_match.c    2015-03-14 12:20:18 UTC (rev 225)
@@ -1333,15 +1333,17 @@
         if (*ecode == OP_CALLOUT)
           {
           cb.callout_number = ecode[1 + 2*LINK_SIZE];
+          cb.callout_string_offset = 0; 
           cb.callout_string = NULL;
           cb.callout_string_length = 0;
           }
         else
           {
           cb.callout_number = 0;
-          cb.callout_string = ecode + (1 + 3*LINK_SIZE) + 1;
+          cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE); 
+          cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
           cb.callout_string_length =
-            callout_length - (1 + 3*LINK_SIZE) - 2;
+            callout_length - (1 + 4*LINK_SIZE) - 2;
           }


         if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)
@@ -1757,15 +1759,17 @@
         if (*ecode == OP_CALLOUT)
           {
           cb.callout_number = ecode[1 + 2*LINK_SIZE];
+          cb.callout_string_offset = 0; 
           cb.callout_string = NULL;
           cb.callout_string_length = 0;
           }
         else
           {
           cb.callout_number = 0;
-          cb.callout_string = ecode + (1 + 3*LINK_SIZE) + 1;
+          cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE); 
+          cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
           cb.callout_string_length =
-            callout_length - (1 + 3*LINK_SIZE) - 2;
+            callout_length - (1 + 4*LINK_SIZE) - 2;
           }


         if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)


Modified: code/trunk/src/pcre2_printint.c
===================================================================
--- code/trunk/src/pcre2_printint.c    2015-03-14 10:47:43 UTC (rev 224)
+++ code/trunk/src/pcre2_printint.c    2015-03-14 12:20:18 UTC (rev 225)
@@ -600,18 +600,18 @@
     break;


     case OP_CALLOUT_STR:
-    c = code[1 + 3*LINK_SIZE]; 
+    c = code[1 + 4*LINK_SIZE]; 
     fprintf(f, "    %s %c", OP_names[*code], c);
     extra = GET(code, 1 + 2*LINK_SIZE);
-    print_custring(f, code + 2 + 3*LINK_SIZE);
-    
+    print_custring(f, code + 2 + 4*LINK_SIZE);
     for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
       if (c == PRIV(callout_start_delims)[i])
         { 
         c = PRIV(callout_end_delims)[i]; 
         break;
         }  
-    fprintf(f, "%c %d %d", c, GET(code, 1), GET(code, 1 + LINK_SIZE));
+    fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1), 
+      GET(code, 1 + LINK_SIZE));
     break;


     case OP_PROP:


Modified: code/trunk/src/pcre2test.c
===================================================================
--- code/trunk/src/pcre2test.c    2015-03-14 10:47:43 UTC (rev 224)
+++ code/trunk/src/pcre2test.c    2015-03-14 12:20:18 UTC (rev 225)
@@ -4546,7 +4546,8 @@
 if (cb->callout_string != NULL)
   {
   uint32_t delimiter = CODE_UNIT(cb->callout_string, -1); 
-  fprintf(outfile, "Callout: %c", delimiter);
+  fprintf(outfile, "Callout (%lu): %c", 
+    (unsigned long int)cb->callout_string_offset, delimiter);
   PCHARSV(cb->callout_string, 0,
     cb->callout_string_length, utf, outfile);
   for (i = 0; callout_start_delims[i] != 0; i++)


Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2015-03-14 10:47:43 UTC (rev 224)
+++ code/trunk/testdata/testoutput2    2015-03-14 12:20:18 UTC (rev 225)
@@ -13987,7 +13987,7 @@
 ------------------------------------------------------------------
         Bra
         a
-        CalloutStr "a)b"c" 13 0
+        CalloutStr "a)b"c" 5 13 0
         Ket
         End
 ------------------------------------------------------------------
@@ -13996,18 +13996,18 @@
 ------------------------------------------------------------------
         Bra
         ab
-        CalloutStr " any text with spaces " 30 1
+        CalloutStr " any text with spaces " 6 30 1
         cde
         Ket
         End
 ------------------------------------------------------------------
     abcde
-Callout: " any text with spaces "
+Callout (6): " any text with spaces "
 --->abcde
     ^ ^       c
  0: abcde
     12abcde
-Callout: " any text with spaces "
+Callout (6): " any text with spaces "
 --->12abcde
       ^ ^       c
  0: abcde
@@ -14021,7 +14021,7 @@


 /^a(b)c(?C"AB")def/
       abcdef
-Callout: "AB"
+Callout (10): "AB"
 --->abcdef
     ^  ^       d
  0: abcdef
@@ -14046,13 +14046,13 @@
         b
         Ket
         c
-        CalloutStr {AB} 14 1
+        CalloutStr {AB} 10 14 1
         def
         Ket
         End
 ------------------------------------------------------------------
       abcdef\=callout_capture
-Callout: {AB} last capture = 1
+Callout (10): {AB} last capture = 1
  0: <unset>
  1: b
 --->abcdef
@@ -14063,14 +14063,14 @@
 /(?C`a``b`)(?C'a''b')(?C"a""b")(?C^a^^b^)(?C%a%%b%)(?C#a##b#)(?C$a$$b$)(?C{a}}b})/B
 ------------------------------------------------------------------
         Bra
-        CalloutStr `a`b` 10 0
-        CalloutStr 'a'b' 20 0
-        CalloutStr "a"b" 30 0
-        CalloutStr ^a^b^ 40 0
-        CalloutStr %a%b% 50 0
-        CalloutStr #a#b# 60 0
-        CalloutStr $a$b$ 70 0
-        CalloutStr {a}b} 80 0
+        CalloutStr `a`b` 4 10 0
+        CalloutStr 'a'b' 14 20 0
+        CalloutStr "a"b" 24 30 0
+        CalloutStr ^a^b^ 34 40 0
+        CalloutStr %a%b% 44 50 0
+        CalloutStr #a#b# 54 60 0
+        CalloutStr $a$b$ 64 70 0
+        CalloutStr {a}b} 74 80 0
         Ket
         End
 ------------------------------------------------------------------
@@ -14080,15 +14080,15 @@
         Bra
         Bra
         a
-        CalloutStr `code` 14 0
+        CalloutStr `code` 8 14 0
         Ket
         Bra
         a
-        CalloutStr `code` 14 0
+        CalloutStr `code` 8 14 0
         Ket
         Bra
         a
-        CalloutStr `code` 14 0
+        CalloutStr `code` 8 14 0
         Ket
         Ket
         End
@@ -14124,7 +14124,7 @@
         Bra
         ^
         Cond
-        CalloutStr $abc$ 12 7
+        CalloutStr $abc$ 7 12 7
         Assert
         abc
         Ket
@@ -14136,35 +14136,35 @@
         End
 ------------------------------------------------------------------
     abcdefg
-Callout: $abc$
+Callout (7): $abc$
 --->abcdefg
     ^           (?=abc)
  0: abcd
     xyz123 
-Callout: $abc$
+Callout (7): $abc$
 --->xyz123
     ^          (?=abc)
  0: xyz


 /^ab(?C'first')cd(?C"second")ef/
     abcdefg
-Callout: 'first'
+Callout (7): 'first'
 --->abcdefg
     ^ ^         c
-Callout: "second"
+Callout (20): "second"
 --->abcdefg
     ^   ^       e
  0: abcdef


 /(?:a(?C`code`)){3}X/
     aaaXY
-Callout: `code`
+Callout (8): `code`
 --->aaaXY
     ^^        )
-Callout: `code`
+Callout (8): `code`
 --->aaaXY
     ^ ^       )
-Callout: `code`
+Callout (8): `code`
 --->aaaXY
     ^  ^      )
  0: aaaX


Modified: code/trunk/testdata/testoutput6
===================================================================
--- code/trunk/testdata/testoutput6    2015-03-14 10:47:43 UTC (rev 224)
+++ code/trunk/testdata/testoutput6    2015-03-14 12:20:18 UTC (rev 225)
@@ -7777,18 +7777,18 @@
 ------------------------------------------------------------------
         Bra
         ab
-        CalloutStr " any text with spaces " 30 1
+        CalloutStr " any text with spaces " 6 30 1
         cde
         Ket
         End
 ------------------------------------------------------------------
     abcde
-Callout: " any text with spaces "
+Callout (6): " any text with spaces "
 --->abcde
     ^ ^       c
  0: abcde
     12abcde
-Callout: " any text with spaces "
+Callout (6): " any text with spaces "
 --->12abcde
       ^ ^       c
  0: abcde
@@ -7801,7 +7801,7 @@


 /^a(b)c(?C"AB")def/
       abcdef
-Callout: "AB"
+Callout (10): "AB"
 --->abcdef
     ^  ^       d
  0: abcdef
@@ -7823,13 +7823,13 @@
         b
         Ket
         c
-        CalloutStr {AB} 14 1
+        CalloutStr {AB} 10 14 1
         def
         Ket
         End
 ------------------------------------------------------------------
       abcdef\=callout_capture
-Callout: {AB} last capture = 0
+Callout (10): {AB} last capture = 0
  0: 
 --->abcdef
     ^  ^       d
@@ -7865,7 +7865,7 @@
         Bra
         ^
         Cond
-        CalloutStr $abc$ 12 7
+        CalloutStr $abc$ 7 12 7
         Assert
         abc
         Ket
@@ -7877,35 +7877,35 @@
         End
 ------------------------------------------------------------------
     abcdefg
-Callout: $abc$
+Callout (7): $abc$
 --->abcdefg
     ^           (?=abc)
  0: abcd
     xyz123 
-Callout: $abc$
+Callout (7): $abc$
 --->xyz123
     ^          (?=abc)
  0: xyz


 /^ab(?C'first')cd(?C"second")ef/
     abcdefg
-Callout: 'first'
+Callout (7): 'first'
 --->abcdefg
     ^ ^         c
-Callout: "second"
+Callout (20): "second"
 --->abcdefg
     ^   ^       e
  0: abcdef


 /(?:a(?C`code`)){3}X/
     aaaXY
-Callout: `code`
+Callout (8): `code`
 --->aaaXY
     ^^        )
-Callout: `code`
+Callout (8): `code`
 --->aaaXY
     ^ ^       )
-Callout: `code`
+Callout (8): `code`
 --->aaaXY
     ^  ^      )
  0: aaaX