diff options
Diffstat (limited to 'src/shared')
-rw-r--r-- | src/shared/utf8.c | 39 | ||||
-rw-r--r-- | src/shared/utf8.h | 1 |
2 files changed, 40 insertions, 0 deletions
diff --git a/src/shared/utf8.c b/src/shared/utf8.c index 8702ceb1b2..0b6c38ebbf 100644 --- a/src/shared/utf8.c +++ b/src/shared/utf8.c @@ -212,6 +212,45 @@ char *utf8_escape_invalid(const char *str) { return p; } +char *utf8_escape_non_printable(const char *str) { + char *p, *s; + + assert(str); + + p = s = malloc(strlen(str) * 4 + 1); + if (!p) + return NULL; + + while (*str) { + int len; + + len = utf8_encoded_valid_unichar(str); + if (len > 0) { + if (utf8_is_printable(str, len)) { + s = mempcpy(s, str, len); + str += len; + } else { + if ((*str < ' ') || (*str >= 127)) { + *(s++) = '\\'; + *(s++) = 'x'; + *(s++) = hexchar((int) *str >> 4); + *(s++) = hexchar((int) *str); + } else + *(s++) = *str; + + str += 1; + } + } else { + s = mempcpy(s, UTF8_REPLACEMENT_CHARACTER, strlen(UTF8_REPLACEMENT_CHARACTER)); + str += 1; + } + } + + *s = '\0'; + + return p; +} + char *ascii_is_valid(const char *str) { const char *p; diff --git a/src/shared/utf8.h b/src/shared/utf8.h index c087995930..1fe1a350d5 100644 --- a/src/shared/utf8.h +++ b/src/shared/utf8.h @@ -30,6 +30,7 @@ const char *utf8_is_valid(const char *s) _pure_; char *ascii_is_valid(const char *s) _pure_; char *utf8_escape_invalid(const char *s); +char *utf8_escape_non_printable(const char *str); bool utf8_is_printable_newline(const char* str, size_t length, bool newline) _pure_; _pure_ static inline bool utf8_is_printable(const char* str, size_t length) { |