summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2014-12-15 22:26:56 +0100
committerLennart Poettering <lennart@poettering.net>2014-12-15 22:27:15 +0100
commite7eebcfc42f00aa481ef31abc8e7e243c16f5b2c (patch)
treeb2d7e393fc5252877c15f46eab95c0450f02d52a /src
parentc532d8a00cacacc6775effb7aadca680b1d39ccd (diff)
shared: add minimal JSON tokenizer
Diffstat (limited to 'src')
-rw-r--r--src/shared/json.c409
-rw-r--r--src/shared/json.h50
-rw-r--r--src/shared/utf8.c46
-rw-r--r--src/shared/utf8.h1
-rw-r--r--src/shared/xml.h2
-rw-r--r--src/test/test-json.c101
6 files changed, 584 insertions, 25 deletions
diff --git a/src/shared/json.c b/src/shared/json.c
new file mode 100644
index 0000000000..f1495e99c8
--- /dev/null
+++ b/src/shared/json.c
@@ -0,0 +1,409 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+ This file is part of systemd.
+
+ Copyright 2014 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <sys/types.h>
+#include <math.h>
+
+#include "macro.h"
+#include "log.h"
+#include "util.h"
+#include "utf8.h"
+#include "json.h"
+
+enum {
+ STATE_NULL,
+ STATE_VALUE,
+ STATE_VALUE_POST,
+};
+
+static void inc_lines(unsigned *line, const char *s, size_t n) {
+ const char *p = s;
+
+ if (!line)
+ return;
+
+ for (;;) {
+ const char *f;
+
+ f = memchr(p, '\n', n);
+ if (!f)
+ return;
+
+ n -= (f - p) + 1;
+ p = f + 1;
+ (*line)++;
+ }
+}
+
+static int json_parse_string(const char **p, char **ret) {
+ _cleanup_free_ char *s = NULL;
+ size_t n = 0, allocated = 0;
+ const char *c;
+
+ assert(p);
+ assert(*p);
+ assert(ret);
+
+ c = *p;
+
+ if (*c != '"')
+ return -EINVAL;
+
+ c++;
+
+ for (;;) {
+ int len;
+
+ /* Check for EOF */
+ if (*c == 0)
+ return -EINVAL;
+
+ /* Check for control characters 0x00..0x1f */
+ if (*c > 0 && *c < ' ')
+ return -EINVAL;
+
+ /* Check for control character 0x7f */
+ if (*c == 0x7f)
+ return -EINVAL;
+
+ if (*c == '"') {
+ if (!s) {
+ s = strdup("");
+ if (!s)
+ return -ENOMEM;
+ } else
+ s[n] = 0;
+
+ *p = c + 1;
+
+ *ret = s;
+ s = NULL;
+ return JSON_STRING;
+ }
+
+ if (*c == '\\') {
+ char ch = 0;
+ c++;
+
+ if (*c == 0)
+ return -EINVAL;
+
+ if (IN_SET(*c, '"', '\\', '/'))
+ ch = *c;
+ else if (*c == 'b')
+ ch = '\b';
+ else if (*c == 'f')
+ ch = '\f';
+ else if (*c == 'n')
+ ch = '\n';
+ else if (*c == 'r')
+ ch = '\r';
+ else if (*c == 't')
+ ch = '\t';
+ else if (*c == 'u') {
+ int aa, bb, cc, dd;
+ uint16_t x;
+
+ aa = unhexchar(c[1]);
+ if (aa < 0)
+ return -EINVAL;
+
+ bb = unhexchar(c[2]);
+ if (bb < 0)
+ return -EINVAL;
+
+ cc = unhexchar(c[3]);
+ if (cc < 0)
+ return -EINVAL;
+
+ dd = unhexchar(c[4]);
+ if (dd < 0)
+ return -EINVAL;
+
+
+ x = ((uint16_t) aa << 12) |
+ ((uint16_t) bb << 8) |
+ ((uint16_t) cc << 4) |
+ ((uint16_t) dd);
+
+ if (x <= 0)
+ return -EINVAL;
+
+ if (!GREEDY_REALLOC(s, allocated, n + 4))
+ return -ENOMEM;
+
+ n += utf8_encode_unichar(x, s + n);
+ c += 5;
+ continue;
+ } else
+ return -EINVAL;
+
+ if (!GREEDY_REALLOC(s, allocated, n + 2))
+ return -ENOMEM;
+
+ s[n++] = ch;
+ c ++;
+ continue;
+ }
+
+ len = utf8_encoded_valid_unichar(c);
+ if (len < 0)
+ return len;
+
+ if (!GREEDY_REALLOC(s, allocated, n + len + 1))
+ return -ENOMEM;
+
+ memcpy(s + n, c, len);
+ n += len;
+ c += len;
+ }
+}
+
+static int json_parse_number(const char **p, union json_value *ret) {
+ bool negative = false, exponent_negative = false, is_double = false;
+ double x = 0.0, y = 0.0, exponent = 0.0, shift = 1.0;
+ intmax_t i = 0;
+ const char *c;
+
+ assert(p);
+ assert(*p);
+ assert(ret);
+
+ c = *p;
+
+ if (*c == '-') {
+ negative = true;
+ c++;
+ }
+
+ if (*c == '0')
+ c++;
+ else {
+ if (!strchr("123456789", *c) || *c == 0)
+ return -EINVAL;
+
+ do {
+ if (!is_double) {
+ int64_t t;
+
+ t = 10 * i + (*c - '0');
+ if (t < i) /* overflow */
+ is_double = false;
+ else
+ i = t;
+ }
+
+ x = 10.0 * x + (*c - '0');
+ c++;
+ } while (strchr("0123456789", *c) && *c != 0);
+ }
+
+ if (*c == '.') {
+ is_double = true;
+ c++;
+
+ if (!strchr("0123456789", *c) || *c == 0)
+ return -EINVAL;
+
+ do {
+ y = 10.0 * y + (*c - '0');
+ shift = 10.0 * shift;
+ c++;
+ } while (strchr("0123456789", *c) && *c != 0);
+ }
+
+ if (*c == 'e' || *c == 'E') {
+ is_double = true;
+ c++;
+
+ if (*c == '-') {
+ exponent_negative = true;
+ c++;
+ } else if (*c == '+')
+ c++;
+
+ if (!strchr("0123456789", *c) || *c == 0)
+ return -EINVAL;
+
+ do {
+ exponent = 10.0 * exponent + (*c - '0');
+ c++;
+ } while (strchr("0123456789", *c) && *c != 0);
+ }
+
+ if (*c != 0)
+ return -EINVAL;
+
+ *p = c;
+
+ if (is_double) {
+ ret->real = ((negative ? -1.0 : 1.0) * (x + (y / shift))) * exp10((exponent_negative ? -1.0 : 1.0) * exponent);
+ return JSON_REAL;
+ } else {
+ ret->integer = negative ? -i : i;
+ return JSON_INTEGER;
+ }
+}
+
+int json_tokenize(
+ const char **p,
+ char **ret_string,
+ union json_value *ret_value,
+ void **state,
+ unsigned *line) {
+
+ const char *c;
+ int t;
+ int r;
+
+ assert(p);
+ assert(*p);
+ assert(ret_string);
+ assert(ret_value);
+ assert(state);
+
+ t = PTR_TO_INT(*state);
+ c = *p;
+
+ if (t == STATE_NULL) {
+ if (line)
+ *line = 1;
+ t = STATE_VALUE;
+ }
+
+ for (;;) {
+ const char *b;
+
+ b = c + strspn(c, WHITESPACE);
+ if (*b == 0)
+ return JSON_END;
+
+ inc_lines(line, c, b - c);
+ c = b;
+
+ switch (t) {
+
+ case STATE_VALUE:
+
+ if (*c == '{') {
+ *ret_string = NULL;
+ *ret_value = JSON_VALUE_NULL;
+ *p = c + 1;
+ *state = INT_TO_PTR(STATE_VALUE);
+ return JSON_OBJECT_OPEN;
+
+ } else if (*c == '}') {
+ *ret_string = NULL;
+ *ret_value = JSON_VALUE_NULL;
+ *p = c + 1;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ return JSON_OBJECT_CLOSE;
+
+ } else if (*c == '[') {
+ *ret_string = NULL;
+ *ret_value = JSON_VALUE_NULL;
+ *p = c + 1;
+ *state = INT_TO_PTR(STATE_VALUE);
+ return JSON_ARRAY_OPEN;
+
+ } else if (*c == ']') {
+ *ret_string = NULL;
+ *ret_value = JSON_VALUE_NULL;
+ *p = c + 1;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ return JSON_ARRAY_CLOSE;
+
+ } else if (*c == '"') {
+ r = json_parse_string(&c, ret_string);
+ if (r < 0)
+ return r;
+
+ *ret_value = JSON_VALUE_NULL;
+ *p = c;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ return r;
+
+ } else if (strchr("-0123456789", *c)) {
+ r = json_parse_number(&c, ret_value);
+ if (r < 0)
+ return r;
+
+ *ret_string = NULL;
+ *p = c;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ return r;
+
+ } else if (startswith(c, "true")) {
+ *ret_string = NULL;
+ ret_value->boolean = true;
+ *p = c + 4;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ return JSON_BOOLEAN;
+
+ } else if (startswith(c, "false")) {
+ *ret_string = NULL;
+ ret_value->boolean = false;
+ *p = c + 5;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ return JSON_BOOLEAN;
+
+ } else if (startswith(c, "null")) {
+ *ret_string = NULL;
+ *ret_value = JSON_VALUE_NULL;
+ *p = c + 4;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ return JSON_NULL;
+
+ } else
+ return -EINVAL;
+
+ case STATE_VALUE_POST:
+
+ if (*c == ':') {
+ *ret_string = NULL;
+ *ret_value = JSON_VALUE_NULL;
+ *p = c + 1;
+ *state = INT_TO_PTR(STATE_VALUE);
+ return JSON_COLON;
+ } else if (*c == ',') {
+ *ret_string = NULL;
+ *ret_value = JSON_VALUE_NULL;
+ *p = c + 1;
+ *state = INT_TO_PTR(STATE_VALUE);
+ return JSON_COMMA;
+ } else if (*c == '}') {
+ *ret_string = NULL;
+ *ret_value = JSON_VALUE_NULL;
+ *p = c + 1;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ return JSON_OBJECT_CLOSE;
+ } else if (*c == ']') {
+ *ret_string = NULL;
+ *ret_value = JSON_VALUE_NULL;
+ *p = c + 1;
+ *state = INT_TO_PTR(STATE_VALUE_POST);
+ return JSON_ARRAY_CLOSE;
+ } else
+ return -EINVAL;
+ }
+
+ }
+}
diff --git a/src/shared/json.h b/src/shared/json.h
new file mode 100644
index 0000000000..a8457132e7
--- /dev/null
+++ b/src/shared/json.h
@@ -0,0 +1,50 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#pragma once
+
+/***
+ This file is part of systemd.
+
+ Copyright 2014 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <stdbool.h>
+#include <inttypes.h>
+
+enum {
+ JSON_END,
+ JSON_COLON,
+ JSON_COMMA,
+ JSON_OBJECT_OPEN,
+ JSON_OBJECT_CLOSE,
+ JSON_ARRAY_OPEN,
+ JSON_ARRAY_CLOSE,
+ JSON_STRING,
+ JSON_REAL,
+ JSON_INTEGER,
+ JSON_BOOLEAN,
+ JSON_NULL,
+};
+
+union json_value {
+ bool boolean;
+ double real;
+ intmax_t integer;
+};
+
+#define JSON_VALUE_NULL ((union json_value) {})
+
+int json_tokenize(const char **p, char **ret_string, union json_value *ret_value, void **state, unsigned *line);
diff --git a/src/shared/utf8.c b/src/shared/utf8.c
index 4469a73751..67f6285eec 100644
--- a/src/shared/utf8.c
+++ b/src/shared/utf8.c
@@ -263,39 +263,37 @@ char *ascii_is_valid(const char *str) {
return (char*) str;
}
+int utf8_encode_unichar(uint16_t c, char *p) {
+ uint8_t *t = (uint8_t*) p;
+ int d;
+
+ if (c < 0x80) {
+ t[0] = (uint8_t) c;
+ return 1;
+ } else if (c < 0x800) {
+ t[0] = (uint8_t) (0xc0 | (c >> 6));
+ t[1] = (uint8_t) (0x80 | (c & 0x3f));
+ return 2;
+ } else {
+ t[0] = (uint8_t) (0xe0 | (c >> 12));
+ t[1] = (uint8_t) (0x80 | ((c >> 6) & 0x3f));
+ t[2] = (uint8_t) (0x80 | (c & 0x3f));
+ return 3;
+ }
+}
+
char *utf16_to_utf8(const void *s, size_t length) {
- char *r;
const uint8_t *f;
- uint8_t *t;
+ char *r, *t;
r = new(char, (length*3+1)/2 + 1);
if (!r)
return NULL;
- t = (uint8_t*) r;
-
- for (f = s; f < (const uint8_t*) s + length; f += 2) {
- uint16_t c;
-
- c = (f[1] << 8) | f[0];
-
- if (c == 0) {
- *t = 0;
- return r;
- } else if (c < 0x80) {
- *(t++) = (uint8_t) c;
- } else if (c < 0x800) {
- *(t++) = (uint8_t) (0xc0 | (c >> 6));
- *(t++) = (uint8_t) (0x80 | (c & 0x3f));
- } else {
- *(t++) = (uint8_t) (0xe0 | (c >> 12));
- *(t++) = (uint8_t) (0x80 | ((c >> 6) & 0x3f));
- *(t++) = (uint8_t) (0x80 | (c & 0x3f));
- }
- }
+ for (f = s, t = r; f < (const uint8_t*) s + length; f += 2)
+ t += utf8_encode_unichar((f[1] << 8) | f[0], t);
*t = 0;
-
return r;
}
diff --git a/src/shared/utf8.h b/src/shared/utf8.h
index 59abee50ac..dcf8588d32 100644
--- a/src/shared/utf8.h
+++ b/src/shared/utf8.h
@@ -36,6 +36,7 @@ bool utf8_is_printable_newline(const char* str, size_t length, bool newline) _pu
char *utf8_escape_invalid(const char *s);
char *utf8_escape_non_printable(const char *str);
+int utf8_encode_unichar(uint16_t c, char *p);
char *utf16_to_utf8(const void *s, size_t length);
int utf8_encoded_valid_unichar(const char *str);
diff --git a/src/shared/xml.h b/src/shared/xml.h
index af71709c33..b256b0ba10 100644
--- a/src/shared/xml.h
+++ b/src/shared/xml.h
@@ -28,7 +28,7 @@ enum {
XML_TAG_CLOSE,
XML_TAG_CLOSE_EMPTY,
XML_ATTRIBUTE_NAME,
- XML_ATTRIBUTE_VALUE
+ XML_ATTRIBUTE_VALUE,
};
int xml_tokenize(const char **p, char **name, void **state, unsigned *line);
diff --git a/src/test/test-json.c b/src/test/test-json.c
new file mode 100644
index 0000000000..8777cf7a40
--- /dev/null
+++ b/src/test/test-json.c
@@ -0,0 +1,101 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+ This file is part of systemd.
+
+ Copyright 2014 Lennart Poettering
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "log.h"
+#include "util.h"
+#include "json.h"
+
+static void test_one(const char *data, ...) {
+ void *state = NULL;
+ va_list ap;
+
+ va_start(ap, data);
+
+ for (;;) {
+ _cleanup_free_ char *str = NULL;
+ union json_value v = {};
+ int t, tt;
+
+ t = json_tokenize(&data, &str, &v, &state, NULL);
+ tt = va_arg(ap, int);
+
+ assert_se(t == tt);
+
+ if (t == JSON_END || t < 0)
+ break;
+
+ else if (t == JSON_STRING) {
+ const char *nn;
+
+ nn = va_arg(ap, const char *);
+ assert_se(streq_ptr(nn, str));
+
+ } else if (t == JSON_REAL) {
+ double d;
+
+ d = va_arg(ap, double);
+ assert_se(abs(d - v.real) < 0.001);
+
+ } else if (t == JSON_INTEGER) {
+ intmax_t i;
+
+ i = va_arg(ap, intmax_t);
+ assert_se(i == v.integer);
+
+ } else if (t == JSON_BOOLEAN) {
+ bool b;
+
+ b = va_arg(ap, int);
+ assert_se(b == v.boolean);
+ }
+ }
+
+ va_end(ap);
+}
+
+int main(int argc, char *argv[]) {
+
+ test_one("x", -EINVAL);
+ test_one("", JSON_END);
+ test_one(" ", JSON_END);
+ test_one("0", JSON_INTEGER, (intmax_t) 0, JSON_END);
+ test_one("1234", JSON_INTEGER, (intmax_t) 1234, JSON_END);
+ test_one("3.141", JSON_REAL, 3.141, JSON_END);
+ test_one("0.0", JSON_REAL, 0.0, JSON_END);
+ test_one("7e3", JSON_REAL, 7e3, JSON_END);
+ test_one("-7e-3", JSON_REAL, -7e-3, JSON_END);
+ test_one("true", JSON_BOOLEAN, true, JSON_END);
+ test_one("false", JSON_BOOLEAN, false, JSON_END);
+ test_one("null", JSON_NULL, JSON_END);
+ test_one("{}", JSON_OBJECT_OPEN, JSON_OBJECT_CLOSE, JSON_END);
+ test_one("\t {\n} \n", JSON_OBJECT_OPEN, JSON_OBJECT_CLOSE, JSON_END);
+ test_one("[]", JSON_ARRAY_OPEN, JSON_ARRAY_CLOSE, JSON_END);
+ test_one("\t [] \n\n", JSON_ARRAY_OPEN, JSON_ARRAY_CLOSE, JSON_END);
+ test_one("\"\"", JSON_STRING, "", JSON_END);
+ test_one("\"foo\"", JSON_STRING, "foo", JSON_END);
+ test_one("\"foo\\nfoo\"", JSON_STRING, "foo\nfoo", JSON_END);
+ test_one("{\"foo\" : \"bar\"}", JSON_OBJECT_OPEN, JSON_STRING, "foo", JSON_COLON, JSON_STRING, "bar", JSON_OBJECT_CLOSE, JSON_END);
+ test_one("{\"foo\" : [true, false]}", JSON_OBJECT_OPEN, JSON_STRING, "foo", JSON_COLON, JSON_ARRAY_OPEN, JSON_BOOLEAN, true, JSON_COMMA, JSON_BOOLEAN, false, JSON_ARRAY_CLOSE, JSON_OBJECT_CLOSE, JSON_END);
+ test_one("\"\xef\xbf\xbd\"", JSON_STRING, "\xef\xbf\xbd", JSON_END);
+ test_one("\"\\ufffd\"", JSON_STRING, "\xef\xbf\xbd", JSON_END);
+
+ return 0;
+}