diff options
-rw-r--r-- | src/basic/escape.c | 51 | ||||
-rw-r--r-- | src/basic/escape.h | 2 | ||||
-rw-r--r-- | src/basic/extract-word.c | 9 |
3 files changed, 24 insertions, 38 deletions
diff --git a/src/basic/escape.c b/src/basic/escape.c index ab282efa3c..5661f36813 100644 --- a/src/basic/escape.c +++ b/src/basic/escape.c @@ -119,16 +119,18 @@ char *cescape(const char *s) { return cescape_length(s, strlen(s)); } -int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode) { +int cunescape_one(const char *p, size_t length, uint32_t *ret, bool *eight_bit) { int r = 1; assert(p); assert(*p); assert(ret); - /* Unescapes C style. Returns the unescaped character in ret, - * unless we encountered a \u sequence in which case the full - * unicode character is returned in ret_unicode, instead. */ + /* Unescapes C style. Returns the unescaped character in ret. + * Sets *eight_bit to true if the escaped sequence either fits in + * one byte in UTF-8 or is a non-unicode literal byte and should + * instead be copied directly. + */ if (length != (size_t) -1 && length < 1) return -EINVAL; @@ -190,7 +192,8 @@ int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode if (a == 0 && b == 0) return -EINVAL; - *ret = (char) ((a << 4U) | b); + *ret = (a << 4U) | b; + *eight_bit = true; r = 3; break; } @@ -217,16 +220,7 @@ int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode if (c == 0) return -EINVAL; - if (c < 128) - *ret = c; - else { - if (!ret_unicode) - return -EINVAL; - - *ret = 0; - *ret_unicode = c; - } - + *ret = c; r = 5; break; } @@ -258,16 +252,7 @@ int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode if (!unichar_is_valid(c)) return -EINVAL; - if (c < 128) - *ret = c; - else { - if (!ret_unicode) - return -EINVAL; - - *ret = 0; - *ret_unicode = c; - } - + *ret = c; r = 9; break; } @@ -309,6 +294,7 @@ int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode return -EINVAL; *ret = m; + *eight_bit = true; r = 3; break; } @@ -342,7 +328,7 @@ int cunescape_length_with_prefix(const char *s, size_t length, const char *prefi for (f = s, t = r + pl; f < s + length; f++) { size_t remaining; uint32_t u; - char c; + bool eight_bit = false; int k; remaining = s + length - f; @@ -365,7 +351,7 @@ int cunescape_length_with_prefix(const char *s, size_t length, const char *prefi return -EINVAL; } - k = cunescape_one(f + 1, remaining - 1, &c, &u); + k = cunescape_one(f + 1, remaining - 1, &u, &eight_bit); if (k < 0) { if (flags & UNESCAPE_RELAX) { /* Invalid escape code, let's take it literal then */ @@ -377,14 +363,13 @@ int cunescape_length_with_prefix(const char *s, size_t length, const char *prefi return k; } - if (c != 0) - /* Non-Unicode? Let's encode this directly */ - *(t++) = c; + f += k; + if (eight_bit) + /* One byte? Set directly as specified */ + *(t++) = u; else - /* Unicode? Then let's encode this in UTF-8 */ + /* Otherwise encode as multi-byte UTF-8 */ t += utf8_encode_unichar(t, u); - - f += k; } *t = 0; diff --git a/src/basic/escape.h b/src/basic/escape.h index c710f01743..d943aa71f5 100644 --- a/src/basic/escape.h +++ b/src/basic/escape.h @@ -45,7 +45,7 @@ size_t cescape_char(char c, char *buf); int cunescape(const char *s, UnescapeFlags flags, char **ret); int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret); int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret); -int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode); +int cunescape_one(const char *p, size_t length, uint32_t *ret, bool *eight_bit); char *xescape(const char *s, const char *bad); diff --git a/src/basic/extract-word.c b/src/basic/extract-word.c index 7cc2a1de13..090d2a7884 100644 --- a/src/basic/extract-word.c +++ b/src/basic/extract-word.c @@ -108,8 +108,9 @@ int extract_first_word(const char **p, char **ret, const char *separators, Extra if (flags & EXTRACT_CUNESCAPE) { uint32_t u; + bool eight_bit = false; - r = cunescape_one(*p, (size_t) -1, &c, &u); + r = cunescape_one(*p, (size_t) -1, &u, &eight_bit); if (r < 0) { if (flags & EXTRACT_CUNESCAPE_RELAX) { s[sz++] = '\\'; @@ -119,10 +120,10 @@ int extract_first_word(const char **p, char **ret, const char *separators, Extra } else { (*p) += r - 1; - if (c != 0) - s[sz++] = c; /* normal explicit char */ + if (eight_bit) + s[sz++] = u; else - sz += utf8_encode_unichar(s + sz, u); /* unicode chars we'll encode as utf8 */ + sz += utf8_encode_unichar(s + sz, u); } } else s[sz++] = c; |