summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2014-12-20 09:19:11 -0500
committerAnthony G. Basile <blueness@gentoo.org>2014-12-20 09:19:11 -0500
commit3f006fb44de6f2308da94ea26b133773e18cb188 (patch)
tree9d6f22e4707b3483742b0640e24522b00f5f2cc4
parent87c9d285d27770daf9764e5bc274d9d5516a9e34 (diff)
src/shared/utf8.c: follow upstream adding utf8_encode_unichar()
This is part of upstream's e7eebcfc42f00aa481ef31abc8e7e243c16f5b2c in which they add a minimal JSON tokenizer. We don't add that but update utf8 stuff from that commit to follow upstream more closely. Signed-off-by: Anthony G. Basile <blueness@gentoo.org>
-rw-r--r--src/shared/utf8.c46
-rw-r--r--src/shared/utf8.h1
2 files changed, 23 insertions, 24 deletions
diff --git a/src/shared/utf8.c b/src/shared/utf8.c
index 75ed7caee3..4420a45b36 100644
--- a/src/shared/utf8.c
+++ b/src/shared/utf8.c
@@ -177,39 +177,37 @@ char *ascii_is_valid(const char *str) {
return (char*) str;
}
+int utf8_encode_unichar(uint16_t c, char *p) {
+ uint8_t *t = (uint8_t*) p;
+ int d;
+
+ if (c < 0x80) {
+ t[0] = (uint8_t) c;
+ return 1;
+ } else if (c < 0x800) {
+ t[0] = (uint8_t) (0xc0 | (c >> 6));
+ t[1] = (uint8_t) (0x80 | (c & 0x3f));
+ return 2;
+ } else {
+ t[0] = (uint8_t) (0xe0 | (c >> 12));
+ t[1] = (uint8_t) (0x80 | ((c >> 6) & 0x3f));
+ t[2] = (uint8_t) (0x80 | (c & 0x3f));
+ return 3;
+ }
+}
+
char *utf16_to_utf8(const void *s, size_t length) {
- char *r;
const uint8_t *f;
- uint8_t *t;
+ char *r, *t;
r = new(char, (length*3+1)/2 + 1);
if (!r)
return NULL;
- t = (uint8_t*) r;
-
- for (f = s; f < (const uint8_t*) s + length; f += 2) {
- uint16_t c;
-
- c = (f[1] << 8) | f[0];
-
- if (c == 0) {
- *t = 0;
- return r;
- } else if (c < 0x80) {
- *(t++) = (uint8_t) c;
- } else if (c < 0x800) {
- *(t++) = (uint8_t) (0xc0 | (c >> 6));
- *(t++) = (uint8_t) (0x80 | (c & 0x3f));
- } else {
- *(t++) = (uint8_t) (0xe0 | (c >> 12));
- *(t++) = (uint8_t) (0x80 | ((c >> 6) & 0x3f));
- *(t++) = (uint8_t) (0x80 | (c & 0x3f));
- }
- }
+ for (f = s, t = r; f < (const uint8_t*) s + length; f += 2)
+ t += utf8_encode_unichar((f[1] << 8) | f[0], t);
*t = 0;
-
return r;
}
diff --git a/src/shared/utf8.h b/src/shared/utf8.h
index 24232247f5..6112332a4e 100644
--- a/src/shared/utf8.h
+++ b/src/shared/utf8.h
@@ -30,6 +30,7 @@ _pure_ static inline bool utf8_is_printable(const char* str, size_t length) {
return utf8_is_printable_newline(str, length, true);
}
+int utf8_encode_unichar(uint16_t c, char *p);
char *utf16_to_utf8(const void *s, size_t length);
int utf8_encoded_valid_unichar(const char *str);