summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/basic/escape.c51
-rw-r--r--src/basic/escape.h2
-rw-r--r--src/basic/extract-word.c9
3 files changed, 24 insertions, 38 deletions
diff --git a/src/basic/escape.c b/src/basic/escape.c
index ab282efa3c..5661f36813 100644
--- a/src/basic/escape.c
+++ b/src/basic/escape.c
@@ -119,16 +119,18 @@ char *cescape(const char *s) {
return cescape_length(s, strlen(s));
}
-int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode) {
+int cunescape_one(const char *p, size_t length, uint32_t *ret, bool *eight_bit) {
int r = 1;
assert(p);
assert(*p);
assert(ret);
- /* Unescapes C style. Returns the unescaped character in ret,
- * unless we encountered a \u sequence in which case the full
- * unicode character is returned in ret_unicode, instead. */
+ /* Unescapes C style. Returns the unescaped character in ret.
+ * Sets *eight_bit to true if the escaped sequence either fits in
+ * one byte in UTF-8 or is a non-unicode literal byte and should
+ * instead be copied directly.
+ */
if (length != (size_t) -1 && length < 1)
return -EINVAL;
@@ -190,7 +192,8 @@ int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode
if (a == 0 && b == 0)
return -EINVAL;
- *ret = (char) ((a << 4U) | b);
+ *ret = (a << 4U) | b;
+ *eight_bit = true;
r = 3;
break;
}
@@ -217,16 +220,7 @@ int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode
if (c == 0)
return -EINVAL;
- if (c < 128)
- *ret = c;
- else {
- if (!ret_unicode)
- return -EINVAL;
-
- *ret = 0;
- *ret_unicode = c;
- }
-
+ *ret = c;
r = 5;
break;
}
@@ -258,16 +252,7 @@ int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode
if (!unichar_is_valid(c))
return -EINVAL;
- if (c < 128)
- *ret = c;
- else {
- if (!ret_unicode)
- return -EINVAL;
-
- *ret = 0;
- *ret_unicode = c;
- }
-
+ *ret = c;
r = 9;
break;
}
@@ -309,6 +294,7 @@ int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode
return -EINVAL;
*ret = m;
+ *eight_bit = true;
r = 3;
break;
}
@@ -342,7 +328,7 @@ int cunescape_length_with_prefix(const char *s, size_t length, const char *prefi
for (f = s, t = r + pl; f < s + length; f++) {
size_t remaining;
uint32_t u;
- char c;
+ bool eight_bit = false;
int k;
remaining = s + length - f;
@@ -365,7 +351,7 @@ int cunescape_length_with_prefix(const char *s, size_t length, const char *prefi
return -EINVAL;
}
- k = cunescape_one(f + 1, remaining - 1, &c, &u);
+ k = cunescape_one(f + 1, remaining - 1, &u, &eight_bit);
if (k < 0) {
if (flags & UNESCAPE_RELAX) {
/* Invalid escape code, let's take it literal then */
@@ -377,14 +363,13 @@ int cunescape_length_with_prefix(const char *s, size_t length, const char *prefi
return k;
}
- if (c != 0)
- /* Non-Unicode? Let's encode this directly */
- *(t++) = c;
+ f += k;
+ if (eight_bit)
+ /* One byte? Set directly as specified */
+ *(t++) = u;
else
- /* Unicode? Then let's encode this in UTF-8 */
+ /* Otherwise encode as multi-byte UTF-8 */
t += utf8_encode_unichar(t, u);
-
- f += k;
}
*t = 0;
diff --git a/src/basic/escape.h b/src/basic/escape.h
index c710f01743..d943aa71f5 100644
--- a/src/basic/escape.h
+++ b/src/basic/escape.h
@@ -45,7 +45,7 @@ size_t cescape_char(char c, char *buf);
int cunescape(const char *s, UnescapeFlags flags, char **ret);
int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret);
int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret);
-int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode);
+int cunescape_one(const char *p, size_t length, uint32_t *ret, bool *eight_bit);
char *xescape(const char *s, const char *bad);
diff --git a/src/basic/extract-word.c b/src/basic/extract-word.c
index 7cc2a1de13..090d2a7884 100644
--- a/src/basic/extract-word.c
+++ b/src/basic/extract-word.c
@@ -108,8 +108,9 @@ int extract_first_word(const char **p, char **ret, const char *separators, Extra
if (flags & EXTRACT_CUNESCAPE) {
uint32_t u;
+ bool eight_bit = false;
- r = cunescape_one(*p, (size_t) -1, &c, &u);
+ r = cunescape_one(*p, (size_t) -1, &u, &eight_bit);
if (r < 0) {
if (flags & EXTRACT_CUNESCAPE_RELAX) {
s[sz++] = '\\';
@@ -119,10 +120,10 @@ int extract_first_word(const char **p, char **ret, const char *separators, Extra
} else {
(*p) += r - 1;
- if (c != 0)
- s[sz++] = c; /* normal explicit char */
+ if (eight_bit)
+ s[sz++] = u;
else
- sz += utf8_encode_unichar(s + sz, u); /* unicode chars we'll encode as utf8 */
+ sz += utf8_encode_unichar(s + sz, u);
}
} else
s[sz++] = c;