[exim-cvs] Expansions: extract methods for JSON objects and …

Top Page
Delete this message
Reply to this message
Author: Exim Git Commits Mailing List
Date:  
To: exim-cvs
Subject: [exim-cvs] Expansions: extract methods for JSON objects and arrays. Bug 2282
Gitweb: https://git.exim.org/exim.git/commitdiff/8fdf20fd84ec88d8f8a250f56d2b4d29ba946392
Commit:     8fdf20fd84ec88d8f8a250f56d2b4d29ba946392
Parent:     c092711614b9b12df56869a95bc9609f913e7be3
Author:     Jeremy Harris <jgh146exb@???>
AuthorDate: Sun Sep 30 01:08:51 2018 +0100
Committer:  Jeremy Harris <jgh146exb@???>
CommitDate: Sun Sep 30 01:08:51 2018 +0100


    Expansions: extract methods for JSON objects and arrays.  Bug 2282
---
 doc/doc-docbook/spec.xfpt    |  34 +++++++
 doc/doc-txt/NewStuff         |   2 +
 src/src/expand.c             | 208 +++++++++++++++++++++++++++++++++++++------
 test/scripts/0000-Basic/0002 |  21 +++++
 test/stdout/0002             |   9 ++
 5 files changed, 248 insertions(+), 26 deletions(-)


diff --git a/doc/doc-docbook/spec.xfpt b/doc/doc-docbook/spec.xfpt
index 8fde639..1dfa552 100644
--- a/doc/doc-docbook/spec.xfpt
+++ b/doc/doc-docbook/spec.xfpt
@@ -9374,6 +9374,27 @@ ${extract{Z}{A=... B=...}{$value} fail }
This forces an expansion failure (see section &<<SECTforexpfai>>&);
{<&'string2'&>} must be present for &"fail"& to be recognized.

+.new
+.vitem "&*${extract json{*&<&'key'&>&*}{*&<&'string1'&>&*}{*&<&'string2'&>&*}&&&
+       {*&<&'string3'&>&*}}*&"
+.cindex "expansion" "extracting from JSON object"
+.cindex JSON expansions
+The key and <&'string1'&> are first expanded separately. Leading and trailing
+white space is removed from the key (but not from any of the strings). The key
+must not be empty and must not consist entirely of digits.
+The expanded <&'string1'&> must be of the form:
+.display
+{ <&'"key1"'&> : <&'value1'&> ,  <&'"key2"'&> , <&'value2'&> ... }
+.endd
+.vindex "&$value$&"
+The braces, commas and colons, and the quoting of the member name are required;
+the spaces are optional.
+Matching of the key against the member names is done case-sensitively.
+. XXX should be a UTF-8 compare
+
+The results of matching are handled as above.
+.wen
+


 .vitem "&*${extract{*&<&'number'&>&*}{*&<&'separators'&>&*}&&&
         {*&<&'string1'&>&*}{*&<&'string2'&>&*}{*&<&'string3'&>&*}}*&"
@@ -9406,6 +9427,19 @@ yields &"99"&. Two successive separators mean that the field between them is
 empty (for example, the fifth field above).



+.new
+.vitem "&*${extract json{*&<&'number'&>&*}}&&&
+        {*&<&'string1'&>&*}{*&<&'string2'&>&*}{*&<&'string3'&>&*}}*&"
+.cindex "expansion" "extracting from JSON array"
+.cindex JSON expansions
+The <&'number'&> argument must consist entirely of decimal digits,
+apart from leading and trailing white space, which is ignored.
+
+Field selection and result handling is as above;
+there is no choice of field separator.
+.wen
+
+
 .vitem &*${filter{*&<&'string'&>&*}{*&<&'condition'&>&*}}*&
 .cindex "list" "selecting by condition"
 .cindex "expansion" "selecting from list by condition"
diff --git a/doc/doc-txt/NewStuff b/doc/doc-txt/NewStuff
index 8d2bf22..3f25720 100644
--- a/doc/doc-txt/NewStuff
+++ b/doc/doc-txt/NewStuff
@@ -25,6 +25,8 @@ Version 4.92


6. Builtin macros for supported log_selector and openssl_options values.

+ 7. JSON variants of the ${extract } expansion item.
+
Version 4.91
--------------

diff --git a/src/src/expand.c b/src/src/expand.c
index cd753ef..43f572e 100644
--- a/src/src/expand.c
+++ b/src/src/expand.c
@@ -1133,20 +1133,20 @@ Returns:    NULL if the subfield was not found, or
 */


static uschar *
-expand_getkeyed(uschar *key, const uschar *s)
+expand_getkeyed(uschar * key, const uschar * s)
{
int length = Ustrlen(key);
while (isspace(*s)) s++;

/* Loop to search for the key */

-while (*s != 0)
+while (*s)
{
int dkeylength;
- uschar *data;
- const uschar *dkey = s;
+ uschar * data;
+ const uschar * dkey = s;

- while (*s != 0 && *s != '=' && !isspace(*s)) s++;
+ while (*s && *s != '=' && !isspace(*s)) s++;
dkeylength = s - dkey;
while (isspace(*s)) s++;
if (*s == '=') while (isspace((*(++s))));
@@ -1257,17 +1257,17 @@ return fieldtext;
static uschar *
expand_getlistele(int field, const uschar * list)
{
-const uschar * tlist= list;
-int sep= 0;
+const uschar * tlist = list;
+int sep = 0;
uschar dummy;

-if(field<0)
+if (field < 0)
{
- for(field++; string_nextinlist(&tlist, &sep, &dummy, 1); ) field++;
- sep= 0;
+ for (field++; string_nextinlist(&tlist, &sep, &dummy, 1); ) field++;
+ sep = 0;
}
-if(field==0) return NULL;
-while(--field>0 && (string_nextinlist(&list, &sep, &dummy, 1))) ;
+if (field == 0) return NULL;
+while (--field > 0 && (string_nextinlist(&list, &sep, &dummy, 1))) ;
return string_nextinlist(&list, &sep, NULL, 0);
}

@@ -3849,6 +3849,79 @@ return x;



+/* Return pointer to dewrapped string, with enclosing specified chars removed.
+The given string is modified on return.  Leading whitespace is skipped while
+looking for the opening wrap character, then the rest is scanned for the trailing
+(non-escaped) wrap character.  A backslash in the string will act as an escape.
+
+A nul is written over the trailing wrap, and a pointer to the char after the
+leading wrap is returned.
+
+Arguments:
+  s    String for de-wrapping
+  wrap  Two-char string, the first being the opener, second the closer wrapping
+        character
+Return:
+  Pointer to de-wrapped string, or NULL on error (with expand_string_message set).
+*/
+
+static uschar *
+dewrap(uschar * s, const uschar * wrap)
+{
+uschar * p = s;
+
+while (isspace(*p)) p++;
+
+if (*p == *wrap)
+  {
+  s = ++p;
+  wrap++;
+  while (*p)
+    {
+    if (*p == '\\') p++;
+    else if (*p == *wrap)
+      {
+      *p = '\0';
+      return s;
+      }
+    p++;
+    }
+  }
+expand_string_message = string_sprintf("missing '%c'", *wrap);
+return NULL;
+}
+
+
+/* Pull off the leading array or object element, returning
+a copy in an allocated string.  Update the list pointer.
+
+The element may itself be an abject or array.
+*/
+
+uschar *
+json_nextinlist(const uschar ** list)
+{
+unsigned array_depth = 0, object_depth = 0;
+const uschar * s = *list, * item;
+
+while (isspace(*s)) s++;
+
+for (item = s;
+     *s && (*s != ',' || array_depth != 0 || object_depth != 0);
+     s++)
+  switch (*s)
+    {
+    case '[': array_depth++; break;
+    case ']': array_depth--; break;
+    case '{': object_depth++; break;
+    case '}': object_depth--; break;
+    }
+*list = *s ? s+1 : s;
+return string_copyn(item, s - item);
+}
+
+
+
 /*************************************************
 *                 Expand string                  *
 *************************************************/
@@ -5554,6 +5627,16 @@ while (*s != 0)
       uschar *sub[3];
       int save_expand_nmax =
         save_expand_strings(save_expand_nstring, save_expand_nlength);
+      enum {extract_basic, extract_json} fmt = extract_basic;
+
+      while (isspace(*s)) s++;
+
+      /* Check for a format-variant specifier */
+
+      if (*s != '{')                    /*}*/
+    {
+    if (Ustrncmp(s, "json", 4) == 0) {fmt = extract_json; s += 4;}
+    }


       /* While skipping we cannot rely on the data for expansions being
       available (eg. $item) hence cannot decide on numeric vs. keyed.
@@ -5561,11 +5644,10 @@ while (*s != 0)


       if (skipping)
     {
-        while (isspace(*s)) s++;
-        for (j = 5; j > 0 && *s == '{'; j--)
+        for (j = 5; j > 0 && *s == '{'; j--)            /*'}'*/
       {
           if (!expand_string_internal(s+1, TRUE, &s, skipping, TRUE, &resetok))
-        goto EXPAND_FAILED;                    /*{*/
+        goto EXPAND_FAILED;                    /*'{'*/
           if (*s++ != '}')
         {
         expand_string_message = US"missing '{' for arg of extract";
@@ -5573,13 +5655,13 @@ while (*s != 0)
         }
       while (isspace(*s)) s++;
       }
-    if (  Ustrncmp(s, "fail", 4) == 0
+    if (  Ustrncmp(s, "fail", 4) == 0            /*'{'*/
        && (s[4] == '}' || s[4] == ' ' || s[4] == '\t' || !s[4])
        )
       {
       s += 4;
       while (isspace(*s)) s++;
-      }
+      }                            /*'{'*/
     if (*s != '}')
       {
       expand_string_message = US"missing '}' closing extract";
@@ -5589,11 +5671,11 @@ while (*s != 0)


       else for (i = 0, j = 2; i < j; i++) /* Read the proper number of arguments */
         {
-        while (isspace(*s)) s++;
-        if (*s == '{')                         /*}*/
+    while (isspace(*s)) s++;
+        if (*s == '{')                         /*'}'*/
           {
           sub[i] = expand_string_internal(s+1, TRUE, &s, skipping, TRUE, &resetok);
-          if (sub[i] == NULL) goto EXPAND_FAILED;        /*{*/
+          if (sub[i] == NULL) goto EXPAND_FAILED;        /*'{'*/
           if (*s++ != '}')
         {
         expand_string_message = string_sprintf(
@@ -5604,7 +5686,7 @@ while (*s != 0)
           /* After removal of leading and trailing white space, the first
           argument must not be empty; if it consists entirely of digits
           (optionally preceded by a minus sign), this is a numerical
-          extraction, and we expect 3 arguments. */
+          extraction, and we expect 3 arguments (normal) or 2 (json). */


           if (i == 0)
             {
@@ -5635,7 +5717,7 @@ while (*s != 0)
         if (*p == 0)
           {
           field_number *= x;
-          j = 3;               /* Need 3 args */
+          if (fmt != extract_json) j = 3;               /* Need 3 args */
           field_number_set = TRUE;
           }
             }
@@ -5651,9 +5733,83 @@ while (*s != 0)
       /* Extract either the numbered or the keyed substring into $value. If
       skipping, just pretend the extraction failed. */


-      lookup_value = skipping? NULL : field_number_set?
-        expand_gettokened(field_number, sub[1], sub[2]) :
-        expand_getkeyed(sub[0], sub[1]);
+      if (skipping)
+    lookup_value = NULL;
+      else switch (fmt)
+    {
+    case extract_basic:
+      lookup_value = field_number_set
+        ? expand_gettokened(field_number, sub[1], sub[2])
+        : expand_getkeyed(sub[0], sub[1]);
+      break;
+
+    case extract_json:
+      {
+      uschar * s, * item;
+      const uschar * list;
+
+      /* Array: Bracket-enclosed and comma-separated.
+      Object: Brace-enclosed, comma-sep list of name:value pairs */
+
+      if (!(s = dewrap(sub[1], field_number_set ? US"[]" : US"{}")))
+        {
+        expand_string_message =
+          string_sprintf("%s wrapping %s for extract json",
+        expand_string_message,
+        field_number_set ? "array" : "object");
+        goto EXPAND_FAILED_CURLY;
+        }
+
+      list = s;
+      if (field_number_set)
+        {
+        if (field_number <= 0)
+          {
+          expand_string_message = US"first argument of \"extract\" must "
+        "be greater than zero";
+          goto EXPAND_FAILED;
+          }
+        while (field_number > 0 && (item = json_nextinlist(&list)))
+          field_number--;
+        s = item;
+        lookup_value = s;
+        while (*s) s++;
+        while (--s >= lookup_value && isspace(*s)) *s = '\0';
+        }
+      else
+        {
+        lookup_value = NULL;
+        while ((item = json_nextinlist(&list)))
+          {
+          /* Item is:  string name-sep value.  string is quoted.
+          Dequote the string and compare with the search key. */
+
+          if (!(item = dewrap(item, US"\"\"")))
+        {
+        expand_string_message =
+          string_sprintf("%s wrapping string key for extract json",
+            expand_string_message);
+        goto EXPAND_FAILED_CURLY;
+        }
+          if (Ustrcmp(item, sub[0]) == 0)    /*XXX should be a UTF8-compare */
+        {
+        s = item + strlen(item) + 1;
+        while (isspace(*s)) s++;
+        if (*s != ':')
+          {
+          expand_string_message = string_sprintf(
+            "missing object value-separator for extract json");
+          goto EXPAND_FAILED_CURLY;
+          }
+        s++;
+        while (isspace(*s)) s++;
+        lookup_value = s;
+        break;
+        }
+          }
+        }
+      }
+    }


       /* If no string follows, $value gets substituted; otherwise there can
       be yes/no strings, as for lookup or if. */
@@ -5753,7 +5909,7 @@ while (*s != 0)
       /* Extract the numbered element into $value. If
       skipping, just pretend the extraction failed. */


-      lookup_value = skipping? NULL : expand_getlistele(field_number, sub[1]);
+      lookup_value = skipping ? NULL : expand_getlistele(field_number, sub[1]);


       /* If no string follows, $value gets substituted; otherwise there can
       be yes/no strings, as for lookup or if. */
diff --git a/test/scripts/0000-Basic/0002 b/test/scripts/0000-Basic/0002
index 27bf708..65ce40b 100644
--- a/test/scripts/0000-Basic/0002
+++ b/test/scripts/0000-Basic/0002
@@ -874,6 +874,27 @@ ${if ={1}{1} {true}{${if ={1}{1} {true}{${if ={1}{1}{true}fail}}}}}
 ${env {USER}}
 ${env {NO_SUCH_VARIABLE} {oops, success} {correct}}


+# JSON
+
+${extract json {Url} \
+  {   \{ \"Url\":    \"http://www.example.com/image/481989943\",\
+     \"Height\": 125,\
+     \"Width\":  100\
+      \} \
+  } \
+ }
+${extract json {Width} \
+  {   \{ \"Url\":    \"http://www.example.com/image/481989943\",\
+     \"Height\": 125,\
+     \"Width\":  100\
+      \} \
+  } \
+ }
+${extract json {2} {[116, 943, 234, 38793]} }
+${extract json {2} {${extract json{IDs} {\{"other":"foo", "IDs": [116, 943, 234]\} }}} }
+
+${extract json {seconds} { \{"hours":0, "mins":0, "seconds":59\} }}
+${extract json {seconds} {${extract json {2} { ["irrelevant", \{"hours":0, "mins":0, "seconds":59\}] }}}}
 ****
 # Test "escape" with print_topbitchars
 exim -be -DPTBC=print_topbitchars
diff --git a/test/stdout/0002 b/test/stdout/0002
index 20f4183..a8d1361 100644
--- a/test/stdout/0002
+++ b/test/stdout/0002
@@ -829,6 +829,15 @@ xyz

> CALLER
> correct
>

+> # JSON
+>
+> "http://www.example.com/image/481989943"
+> 100
+> 943
+> 943
+>
+> 59
+> 59
>
> escape: B7?F2?
>