summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Gundersen <teg@jklm.no>2014-12-22 14:53:40 +0100
committerTom Gundersen <teg@jklm.no>2014-12-22 20:27:20 +0100
commit9bae67d49b861b1f142f1a1e27753fe08e63ade7 (patch)
tree8e3b1d2d0c34558b17344526923697f5ea382c8d
parent04166cb7dd90918385835f246c43d8ec22af0d68 (diff)
shared: json - support escaping utf16 surrogate pairs
We originally only supported escaping ucs2 encoded characters (as \uxxxx). This only covers the BMP. Support escaping also utf16 surrogate pairs (on the form \uxxxx\uyyyy) to cover all of unicode.
-rw-r--r--src/shared/json.c81
-rw-r--r--src/test/test-json.c3
2 files changed, 62 insertions, 22 deletions
diff --git a/src/shared/json.c b/src/shared/json.c
index 47f801c858..bb3d26f0e5 100644
--- a/src/shared/json.c
+++ b/src/shared/json.c
@@ -53,6 +53,42 @@ static void inc_lines(unsigned *line, const char *s, size_t n) {
}
}
+static int unhex_ucs2(const char *c, uint16_t *ret) {
+ int aa, bb, cc, dd;
+ uint16_t x;
+
+ assert(c);
+ assert(ret);
+
+ aa = unhexchar(c[0]);
+ if (aa < 0)
+ return -EINVAL;
+
+ bb = unhexchar(c[1]);
+ if (bb < 0)
+ return -EINVAL;
+
+ cc = unhexchar(c[2]);
+ if (cc < 0)
+ return -EINVAL;
+
+ dd = unhexchar(c[3]);
+ if (dd < 0)
+ return -EINVAL;
+
+ x = ((uint16_t) aa << 12) |
+ ((uint16_t) bb << 8) |
+ ((uint16_t) cc << 4) |
+ ((uint16_t) dd);
+
+ if (x <= 0)
+ return -EINVAL;
+
+ *ret = x;
+
+ return 0;
+}
+
static int json_parse_string(const char **p, char **ret) {
_cleanup_free_ char *s = NULL;
size_t n = 0, allocated = 0;
@@ -119,39 +155,40 @@ static int json_parse_string(const char **p, char **ret) {
else if (*c == 't')
ch = '\t';
else if (*c == 'u') {
- int aa, bb, cc, dd;
uint16_t x;
+ int r;
- aa = unhexchar(c[1]);
- if (aa < 0)
- return -EINVAL;
+ r = unhex_ucs2(c + 1, &x);
+ if (r < 0)
+ return r;
- bb = unhexchar(c[2]);
- if (bb < 0)
- return -EINVAL;
+ c += 5;
- cc = unhexchar(c[3]);
- if (cc < 0)
- return -EINVAL;
+ if (!GREEDY_REALLOC(s, allocated, n + 4))
+ return -ENOMEM;
- dd = unhexchar(c[4]);
- if (dd < 0)
+ if (!utf16_is_surrogate(x))
+ n += utf8_encode_unichar(s + n, x);
+ else if (utf16_is_trailing_surrogate(x))
return -EINVAL;
+ else {
+ uint16_t y;
+ if (c[0] != '\\' || c[1] != 'u')
+ return -EINVAL;
- x = ((uint16_t) aa << 12) |
- ((uint16_t) bb << 8) |
- ((uint16_t) cc << 4) |
- ((uint16_t) dd);
+ r = unhex_ucs2(c + 2, &y);
+ if (r < 0)
+ return r;
- if (x <= 0)
- return -EINVAL;
+ c += 6;
- if (!GREEDY_REALLOC(s, allocated, n + 4))
- return -ENOMEM;
+ if (!utf16_is_trailing_surrogate(y))
+ return -EINVAL;
+
+ n += utf8_encode_unichar(s + n, utf16_surrogate_pair_to_unichar(x, y));
+ }
- n += utf8_encode_unichar(s + n, x);
- c += 5;
continue;
} else
return -EINVAL;
diff --git a/src/test/test-json.c b/src/test/test-json.c
index e53e8ed50f..b09131891c 100644
--- a/src/test/test-json.c
+++ b/src/test/test-json.c
@@ -99,6 +99,9 @@ int main(int argc, char *argv[]) {
test_one("\"\xef\xbf\xbd\"", JSON_STRING, "\xef\xbf\xbd", JSON_END);
test_one("\"\\ufffd\"", JSON_STRING, "\xef\xbf\xbd", JSON_END);
test_one("\"\\uf\"", -EINVAL);
+ test_one("\"\\ud800a\"", -EINVAL);
+ test_one("\"\\udc00\\udc00\"", -EINVAL);
+ test_one("\"\\ud801\\udc37\"", JSON_STRING, "\xf0\x90\x90\xb7", JSON_END);
return 0;
}