diff options
author | Lennart Poettering <lennart@poettering.net> | 2014-12-20 09:19:11 -0500 |
---|---|---|
committer | Anthony G. Basile <blueness@gentoo.org> | 2014-12-20 09:19:11 -0500 |
commit | 3f006fb44de6f2308da94ea26b133773e18cb188 (patch) | |
tree | 9d6f22e4707b3483742b0640e24522b00f5f2cc4 /src/shared/utf8.c | |
parent | 87c9d285d27770daf9764e5bc274d9d5516a9e34 (diff) |
src/shared/utf8.c: follow upstream adding utf8_encode_unichar()
This is part of upstream's e7eebcfc42f00aa481ef31abc8e7e243c16f5b2c
in which they add a minimal JSON tokenizer. We don't add that but
update utf8 stuff from that commit to follow upstream more closely.
Signed-off-by: Anthony G. Basile <blueness@gentoo.org>
Diffstat (limited to 'src/shared/utf8.c')
-rw-r--r-- | src/shared/utf8.c | 46 |
1 files changed, 22 insertions, 24 deletions
diff --git a/src/shared/utf8.c b/src/shared/utf8.c index 75ed7caee3..4420a45b36 100644 --- a/src/shared/utf8.c +++ b/src/shared/utf8.c @@ -177,39 +177,37 @@ char *ascii_is_valid(const char *str) { return (char*) str; } +int utf8_encode_unichar(uint16_t c, char *p) { + uint8_t *t = (uint8_t*) p; + int d; + + if (c < 0x80) { + t[0] = (uint8_t) c; + return 1; + } else if (c < 0x800) { + t[0] = (uint8_t) (0xc0 | (c >> 6)); + t[1] = (uint8_t) (0x80 | (c & 0x3f)); + return 2; + } else { + t[0] = (uint8_t) (0xe0 | (c >> 12)); + t[1] = (uint8_t) (0x80 | ((c >> 6) & 0x3f)); + t[2] = (uint8_t) (0x80 | (c & 0x3f)); + return 3; + } +} + char *utf16_to_utf8(const void *s, size_t length) { - char *r; const uint8_t *f; - uint8_t *t; + char *r, *t; r = new(char, (length*3+1)/2 + 1); if (!r) return NULL; - t = (uint8_t*) r; - - for (f = s; f < (const uint8_t*) s + length; f += 2) { - uint16_t c; - - c = (f[1] << 8) | f[0]; - - if (c == 0) { - *t = 0; - return r; - } else if (c < 0x80) { - *(t++) = (uint8_t) c; - } else if (c < 0x800) { - *(t++) = (uint8_t) (0xc0 | (c >> 6)); - *(t++) = (uint8_t) (0x80 | (c & 0x3f)); - } else { - *(t++) = (uint8_t) (0xe0 | (c >> 12)); - *(t++) = (uint8_t) (0x80 | ((c >> 6) & 0x3f)); - *(t++) = (uint8_t) (0x80 | (c & 0x3f)); - } - } + for (f = s, t = r; f < (const uint8_t*) s + length; f += 2) + t += utf8_encode_unichar((f[1] << 8) | f[0], t); *t = 0; - return r; } |