From 873b39dd14b074bf0779f5d06f5c5bfe3bcb416b Mon Sep 17 00:00:00 2001 From: cpq Date: Mon, 5 Jun 2023 10:10:55 +0100 Subject: [PATCH] Add json routines --- README.md | 145 +++++++++++++++++++++++++++++++- str.h | 236 +++++++++++++++++++++++++++++++++++++++++++++++++++- test/main.c | 13 +-- 3 files changed, 385 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index f715bf4..8e69405 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,10 @@ following routines: hex, base64 - `xdtoa()` - convert `double` to string - `xatod()` - convert string to `double` +- `json_get()` - find element in a JSON string +- `json_get_num()` - fetch numeric value from JSON string +- `json_get_bool()` - fetch boolean value from JSON string +- `json_get_str()` - fetch string value from JSON string ## Usage example @@ -98,7 +102,144 @@ Print formatted string into a fixed-size buffer. Parameters: Return value: number of bytes printed. The result is guaranteed to be NUL terminated. -### Pre-defined `%M`, `%m` format functions +### json\_get() +```c +int json_get(const char *buf, int len, const char *path, int *size); +``` + +Parse JSON string `buf`, `len` and return the offset of the element +specified by the JSON `path`. The length of the element is stored in `size`. + +Parameters: +- `buf` - a pointer to a JSON string +- `len` - a length of a JSON string +- `path` - a JSON path. Must start with `$`, e.g. `$.user`, `$[12]`, `$`, etc +- `size` - a pointer that receives element's length. Can be NULL + +Return value: offset of the element, or negative value on error. + +Usage example: + +```c +// JSON string buf, len contains { "a": 1, "b": [2, 3] } +int size, ofs; + +// Lookup "$", which is the whole JSON. Can be used for validation +ofs = json_get(buf, len, "$", &size); // ofs == 0, size == 23 + +// Lookup attribute "a". Point to value "1" +ofs = json_get(buf, len, "$.a", &zize); // ofs = 7, size = 1 + +// Lookup attribute "b". Point to array [2, 3] +ofs = json_get(buf, len, "$.b", &size); // ofs = 15, size = 6 + +// Lookup attribute "b[1]". Point to value "3" +ofs = json_get(buf, len, "$.b[1]", &size); // ofs = 19, size = 1 +``` + +### json\_get\_num() + +```c +int mg_json_get_num(const char *buf, int len, const char *path, double *val); +``` + +Fetch numeric (double) value from the json string `buf`, `len` at JSON path +`path` into a placeholder `val`. Return true if successful. + +Parameters: +- `buf` - a pointer to a JSON string +- `len` - a length of a JSON string +- `path` - a JSON path. Must start with `$` +- `val` - a placeholder for value + +Return value: 1 on success, 0 on error + +Usage example: + +```c +double d = 0.0; +json_get_num("[1,2,3]", 7, "$[1]", &d)); // d == 2 +json_get_num("{\"a\":1.23}", 10, "$.a", &d)); // d == 1.23 +``` + +### json\_get\_bool() + +```c +int mg_json_get_bool(struct mg_str json, const char *path, int *v); +``` + +Fetch boolean (bool) value from the json string `json` at JSON path +`path` into a placeholder `v`. Return true if successful. + +Parameters: +- `buf` - a pointer to a JSON string +- `len` - a length of a JSON string +- `path` - a JSON path. Must start with `$` +- `val` - a placeholder for value + +Return value: 1 on success, 0 on error + +Usage example: + +```c +int b = 0; +json_get_bool("[123]", 5, "$[0]", &b)); // Error. b == 0 +json_get_bool("[true]", 6, "$[0]", &b)); // b == 1 +``` + +### json\_get\_long() + +```c +long json_get_long(const char *buf, int len, const char *path, long default_val); +``` + +Fetch integer numeric (long) value from the json string `buf`, `len` at JSON path +`path`. Return it if found, or `default_val` if not found. + +Parameters: +- `buf` - a pointer to a JSON string +- `len` - a length of a JSON string +- `path` - a JSON path. Must start with `$` +- `default_val` - a default value for the failure case + +Return value: found value, or `default_val` value + +Usage example: + +```c +long a = json_get_long("[123]", 5, "$a", -1)); // a == -1 +long b = json_get_long("[123]", 5, "$[0]", -1)); // b == 123 +``` + +### json\_get\_str() + +```c +int json_get_str(const char *buf, int len, const char *path, char *dst, size_t dstlen); +``` + +Fetch string value from the json string `json` at JSON path +`path`. If found, a string is allocated using `calloc()`, +un-escaped, and returned to the caller. It is the caller's responsibility to +`free()` the returned string. + +Parameters: +- `buf` - a pointer to a JSON string +- `len` - a length of a JSON string +- `path` - a JSON path. Must start with `$` +- `dst` - a pointer to a buffer that holds the result +- `dstlen` - a length of a result buffer + +Return value: length of a decoded string. >= 0 on success, < 0 on error + +Usage example: + +```c +char dst[100]; +json_get_str("[1,2,\"hi\"]", "$[2]", dst, sizeof(dst)); // dst contains "hi" +``` + + +## Pre-defined `%M`, `%m` format functions ```c size_t fmt_*(void (*out)(char, void *), void *arg, va_list *ap); @@ -130,7 +271,7 @@ const char *data = "xyz"; // Print base64 data: xsnprintf(buf, sizeof(buf), "%M", fmt_b64, 3, data); // eHl6 ``` -### Custom `%M`, `%m` format functions +## Custom `%M`, `%m` format functions It is easy to create your own format functions to format data that is specific to your application. For example, if you want to print your diff --git a/str.h b/str.h index 1b01f6f..0148210 100644 --- a/str.h +++ b/str.h @@ -44,7 +44,13 @@ XAPI size_t fmt_mac(void (*out)(char, void *), void *arg, va_list *ap); XAPI size_t fmt_b64(void (*out)(char, void *), void *arg, va_list *ap); XAPI size_t fmt_esc(void (*out)(char, void *), void *arg, va_list *ap); -XAPI int json_parse(const char *buf, size_t len, int *tokens); +// JSON parsing API +XAPI int json_get(const char *buf, int len, const char *path, int *size); +XAPI int json_get_num(const char *buf, int len, const char *path, double *val); +XAPI int json_get_bool(const char *buf, int len, const char *path, int *val); +XAPI int json_get_str(const char *buf, int len, const char *path, char *dst, + size_t dlen); +XAPI long json_get_long(const char *buf, int len, const char *path, long dflt); #if !defined(STR_API_ONLY) typedef void (*xout_t)(char, void *); // Output function @@ -421,6 +427,234 @@ XAPI size_t xvprintf(xout_t out, void *param, const char *fmt, va_list *ap) { return n; } +XAPI char json_esc(int c, int esc) { + const char *p, *e[] = {"\b\f\n\r\t\\\"", "bfnrt\\\""}; + const char *esc1 = esc ? e[0] : e[1], *esc2 = esc ? e[1] : e[0]; + for (p = esc1; *p != '\0'; p++) { + if (*p == c) return esc2[p - esc1]; + } + return 0; +} + +XAPI int json_pass_string(const char *s, int len) { + int i; + for (i = 0; i < len; i++) { + if (s[i] == '\\' && i + 1 < len && json_esc(s[i + 1], 1)) { + i++; + } else if (s[i] == '\0') { + return -1; + } else if (s[i] == '"') { + return i; + } + } + return -1; +} + +XAPI int json_get(const char *s, int len, const char *path, int *toklen) { + enum { S_VALUE, S_KEY, S_COLON, S_COMMA_OR_EOO } expecting = S_VALUE; + unsigned char nesting[20]; + int i = 0; // Current offset in `s` + int j = 0; // Offset in `s` we're looking for (return value) + int depth = 0; // Current depth (nesting level) + int ed = 0; // Expected depth + int pos = 1; // Current position in `path` + int ci = -1, ei = -1; // Current and expected index in array + + if (toklen) *toklen = 0; + if (path[0] != '$') return -1; + +#define MG_CHECKRET(x) \ + do { \ + if (depth == ed && path[pos] == '\0' && ci == ei) { \ + if (toklen) *toklen = i - j + 1; \ + return j; \ + } \ + } while (0) + +// In the ascii table, the distance between `[` and `]` is 2. +// Ditto for `{` and `}`. Hence +2 in the code below. +#define MG_EOO(x) \ + do { \ + if (depth == ed && ci != ei) return -2; \ + if (c != nesting[depth - 1] + 2) return -1; \ + depth--; \ + MG_CHECKRET(x); \ + } while (0) + + for (i = 0; i < len; i++) { + unsigned char c = ((unsigned char *) s)[i]; + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') continue; + switch (expecting) { + case S_VALUE: + // p("V %s [%.*s] %d %d %d %d\n", path, pos, path, depth, ed, ci, ei); + if (depth == ed) j = i; + if (c == '{') { + if (depth >= (int) sizeof(nesting)) return -3; + if (depth == ed && path[pos] == '.' && ci == ei) { + // If we start the object, reset array indices + ed++, pos++, ci = ei = -1; + } + nesting[depth++] = c; + expecting = S_KEY; + break; + } else if (c == '[') { + if (depth >= (int) sizeof(nesting)) return -3; + if (depth == ed && path[pos] == '[' && ei == ci) { + ed++, pos++, ci = 0; + for (ei = 0; path[pos] != ']' && path[pos] != '\0'; pos++) { + ei *= 10; + ei += path[pos] - '0'; + } + if (path[pos] != 0) pos++; + } + nesting[depth++] = c; + break; + } else if (c == ']' && depth > 0) { // Empty array + MG_EOO(']'); + } else if (c == 't' && i + 3 < len && memcmp(&s[i], "true", 4) == 0) { + i += 3; + } else if (c == 'n' && i + 3 < len && memcmp(&s[i], "null", 4) == 0) { + i += 3; + } else if (c == 'f' && i + 4 < len && memcmp(&s[i], "false", 5) == 0) { + i += 4; + } else if (c == '-' || ((c >= '0' && c <= '9'))) { + int numlen = 0; + xatod(&s[i], len - i, &numlen); + i += numlen - 1; + } else if (c == '"') { + int n = json_pass_string(&s[i + 1], len - i - 1); + if (n < 0) return n; + i += n + 1; + } else { + return -1; + } + MG_CHECKRET('V'); + if (depth == ed && ei >= 0) ci++; + expecting = S_COMMA_OR_EOO; + break; + + case S_KEY: + if (c == '"') { + int n = json_pass_string(&s[i + 1], len - i - 1); + if (n < 0) return n; + if (i + 1 + n >= len) return -2; + if (depth < ed) return -2; + if (depth == ed && path[pos - 1] != '.') return -2; + // printf("K %s [%.*s] [%.*s] %d %d %d\n", path, pos, path, n, + // &s[i + 1], n, depth, ed); + // NOTE(cpq): in the check sequence below is important. + // strncmp() must go first: it fails fast if the remaining length of + // the path is smaller than `n`. + if (depth == ed && path[pos - 1] == '.' && + strncmp(&s[i + 1], &path[pos], (size_t) n) == 0 && + (path[pos + n] == '\0' || path[pos + n] == '.' || + path[pos + n] == '[')) { + pos += n; + } + i += n + 1; + expecting = S_COLON; + } else if (c == '}') { // Empty object + MG_EOO('}'); + expecting = S_COMMA_OR_EOO; + } else { + return -1; + } + break; + + case S_COLON: + if (c == ':') { + expecting = S_VALUE; + } else { + return -1; + } + break; + + case S_COMMA_OR_EOO: + if (depth <= 0) { + return -1; + } else if (c == ',') { + expecting = (nesting[depth - 1] == '{') ? S_KEY : S_VALUE; + } else if (c == ']' || c == '}') { + MG_EOO('O'); + if (depth == ed && ei >= 0) ci++; + } else { + return -1; + } + break; + } + } + return -2; +} + +XAPI unsigned char xnimble(unsigned char c) { + return (c >= '0' && c <= '9') ? (unsigned char) (c - '0') + : (c >= 'A' && c <= 'F') ? (unsigned char) (c - '7') + : (unsigned char) (c - 'W'); +} + +XAPI unsigned long xunhexn(const char *s, size_t len) { + unsigned long i = 0, v = 0; + for (i = 0; i < len; i++) v <<= 4, v |= xnimble(((uint8_t *) s)[i]); + return v; +} + +XAPI int json_unescape(const char *buf, size_t len, char *to, size_t n) { + size_t i, j; + for (i = 0, j = 0; i < len && j < n; i++, j++) { + if (buf[i] == '\\' && i + 5 < len && buf[i + 1] == 'u') { + // \uXXXX escape. We could process a simple one-byte chars + // \u00xx from the ASCII range. More complex chars would require + // dragging in a UTF8 library, which is too much for us + if (buf[i + 2] != '0' || buf[i + 3] != '0') return -1; // Give up + ((unsigned char *) to)[j] = (unsigned char) xunhexn(buf + i + 4, 2); + i += 5; + } else if (buf[i] == '\\' && i + 1 < len) { + char c = json_esc(buf[i + 1], 0); + if (c == 0) return -1; + to[j] = c; + i++; + } else { + to[j] = buf[i]; + } + } + if (j >= n) return -1; + if (n > 0) to[j] = '\0'; + return (int) j; +} + +XAPI int json_get_num(const char *buf, int len, const char *path, double *v) { + int found = 0, n = 0, off = json_get(buf, len, path, &n); + if (off >= 0 && (buf[off] == '-' || (buf[off] >= '0' && buf[off] <= '9'))) { + if (v != NULL) *v = xatod(buf + n, n, NULL); + found = 1; + } + return found; +} + +XAPI int json_get_bool(const char *buf, int len, const char *path, int *v) { + int found = 0, off = json_get(buf, len, path, NULL); + if (off >= 0 && (buf[off] == 't' || buf[off] == 'f')) { + if (v != NULL) *v = buf[off] == 't'; + found = 1; + } + return found; +} + +XAPI int json_get_str(const char *buf, int len, const char *path, char *dst, + size_t dlen) { + int result = -1, n = 0, off = json_get(buf, len, path, &n); + if (off >= 0 && n > 1 && buf[off] == '"') { + result = json_unescape(buf + off + 1, (size_t) (n - 2), dst, dlen); + } + return result; +} + +XAPI long json_get_long(const char *buf, int len, const char *path, long dflt) { + double v; + if (json_get_num(buf, len, path, &v)) dflt = (long) v; + return dflt; +} + #endif // STR_API_ONLY #ifdef __cplusplus diff --git a/test/main.c b/test/main.c index f69c015..af0d21e 100644 --- a/test/main.c +++ b/test/main.c @@ -152,7 +152,7 @@ static void test_float(void) { #endif } -static void xputchar(char ch, void *arg) { +static void out(char ch, void *arg) { putchar(ch); (void) arg; } @@ -171,11 +171,12 @@ static void test_m(void) { assert(sf(quo, "_%m_%d", fmt_ip4, &ip4, 123)); assert(sf(quo, "_%m_%d", ESC("127.0.0.1"), 123)); - xprintf(xputchar, NULL, "%s: %g\n", "dbl", 1.234); - xprintf(xputchar, NULL, "%.*s\n", 3, "foobar"); - xprintf(xputchar, NULL, "%#04x\n", 11); - xprintf(xputchar, NULL, "%d %5s\n", 7, "pad"); - xprintf(xputchar, NULL, "JSON: {%m: %g}\n", ESC("value"), 1.234); + xprintf(out, NULL, "%s: %g\n", "dbl", 1.234); // dbl: 1.234 + xprintf(out, NULL, "%.*s\n", 3, "foobar"); // foo + xprintf(out, NULL, "%#04x\n", 11); // 0x0b + xprintf(out, NULL, "%d %5s\n", 7, "pad"); // 7 pad + // JSON: {"value": 1.234} + xprintf(out, NULL, "JSON: {%m: %g}\n", ESC("value"), 1.234); } int main(void) {