summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/shared/utf8.c39
-rw-r--r--src/shared/utf8.h1
-rw-r--r--src/test/test-utf8.c25
3 files changed, 65 insertions, 0 deletions
diff --git a/src/shared/utf8.c b/src/shared/utf8.c
index 8702ceb1b2..0b6c38ebbf 100644
--- a/src/shared/utf8.c
+++ b/src/shared/utf8.c
@@ -212,6 +212,45 @@ char *utf8_escape_invalid(const char *str) {
return p;
}
+char *utf8_escape_non_printable(const char *str) {
+ char *p, *s;
+
+ assert(str);
+
+ p = s = malloc(strlen(str) * 4 + 1);
+ if (!p)
+ return NULL;
+
+ while (*str) {
+ int len;
+
+ len = utf8_encoded_valid_unichar(str);
+ if (len > 0) {
+ if (utf8_is_printable(str, len)) {
+ s = mempcpy(s, str, len);
+ str += len;
+ } else {
+ if ((*str < ' ') || (*str >= 127)) {
+ *(s++) = '\\';
+ *(s++) = 'x';
+ *(s++) = hexchar((int) *str >> 4);
+ *(s++) = hexchar((int) *str);
+ } else
+ *(s++) = *str;
+
+ str += 1;
+ }
+ } else {
+ s = mempcpy(s, UTF8_REPLACEMENT_CHARACTER, strlen(UTF8_REPLACEMENT_CHARACTER));
+ str += 1;
+ }
+ }
+
+ *s = '\0';
+
+ return p;
+}
+
char *ascii_is_valid(const char *str) {
const char *p;
diff --git a/src/shared/utf8.h b/src/shared/utf8.h
index c087995930..1fe1a350d5 100644
--- a/src/shared/utf8.h
+++ b/src/shared/utf8.h
@@ -30,6 +30,7 @@
const char *utf8_is_valid(const char *s) _pure_;
char *ascii_is_valid(const char *s) _pure_;
char *utf8_escape_invalid(const char *s);
+char *utf8_escape_non_printable(const char *str);
bool utf8_is_printable_newline(const char* str, size_t length, bool newline) _pure_;
_pure_ static inline bool utf8_is_printable(const char* str, size_t length) {
diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c
index b7d988f22d..6dde63cdff 100644
--- a/src/test/test-utf8.c
+++ b/src/test/test-utf8.c
@@ -66,12 +66,37 @@ static void test_utf8_escaping(void) {
assert_se(utf8_is_valid(p3));
}
+static void test_utf8_escaping_printable(void) {
+ _cleanup_free_ char *p1, *p2, *p3, *p4, *p5;
+
+ p1 = utf8_escape_non_printable("goo goo goo");
+ puts(p1);
+ assert_se(utf8_is_valid(p1));
+
+ p2 = utf8_escape_non_printable("\341\204\341\204");
+ puts(p2);
+ assert_se(utf8_is_valid(p2));
+
+ p3 = utf8_escape_non_printable("\341\204");
+ puts(p3);
+ assert_se(utf8_is_valid(p3));
+
+ p4 = utf8_escape_non_printable("ąę\n가너도루\n1234\n\341\204\341\204\n\001 \019\20\a");
+ puts(p4);
+ assert_se(utf8_is_valid(p4));
+
+ p5 = utf8_escape_non_printable("\001 \019\20\a");
+ puts(p5);
+ assert_se(utf8_is_valid(p5));
+}
+
int main(int argc, char *argv[]) {
test_utf8_is_valid();
test_utf8_is_printable();
test_ascii_is_valid();
test_utf8_encoded_valid_unichar();
test_utf8_escaping();
+ test_utf8_escaping_printable();
return 0;
}