1 files changed, 69 insertions, 53 deletions
diff --git a/src/libudev/utf8.c b/src/libudev/utf8.c
index c9e84b804c..1044fb6489 100644
--- a/src/libudev/utf8.c
+++ b/src/libudev/utf8.c
@@ -63,6 +63,19 @@ static inline bool is_unicode_valid(uint32_t ch) {
 
         return true;
 }
+
+static bool is_unicode_control(uint32_t ch) {
+
+        /*
+          0 to ' '-1 is the C0 range.
+          DEL=0x7F, and DEL+1 to 0x9F is C1 range.
+          '\t' is in C0 range, but more or less harmless and commonly used.
+        */
+
+        return (ch < ' ' && ch != '\t' && ch != '\n') ||
+                (0x7F <= ch && ch <= 0x9F);
+}
+
 /* count of characters used to encode one unicode char */
 static int utf8_encoded_expected_len(const char *str) {
         unsigned char c = (unsigned char)str[0];
@@ -121,24 +134,73 @@ int utf8_encoded_to_unichar(const char *str) {
         return unichar;
 }
 
-const char *utf8_is_valid(const char *str) {
+bool utf8_is_printable(const char* str, size_t length) {
         const uint8_t *p;
 
         assert(str);
 
-        for (p = (const uint8_t*) str; *p; ) {
-                int len;
+        for (p = (const uint8_t*) str; length;) {
+                int encoded_len = utf8_encoded_valid_unichar((const char *)p);
+                int val = utf8_encoded_to_unichar((const char*)p);
 
-                len = utf8_encoded_valid_unichar((const char *)p);
+                if (encoded_len < 0 || val < 0 || is_unicode_control(val))
+                        return false;
+
+                length -= encoded_len;
+                p += encoded_len;
+        }
+
+        return true;
+}
 
-                if (len < 0)
+char *ascii_is_valid(const char *str) {
+        const char *p;
+
+        assert(str);
+
+        for (p = str; *p; p++)
+                if ((unsigned char) *p >= 128)
                         return NULL;
 
-                p += len;
+        return (char*) str;
+}
+
+char *utf16_to_utf8(const void *s, size_t length) {
+        char *r;
+        const uint8_t *f;
+        uint8_t *t;
+
+        r = new(char, (length*3+1)/2 + 1);
+        if (!r)
+                return NULL;
+
+        t = (uint8_t*) r;
+
+        for (f = s; f < (const uint8_t*) s + length; f += 2) {
+                uint16_t c;
+
+                c = (f[1] << 8) | f[0];
+
+                if (c == 0) {
+                        *t = 0;
+                        return r;
+                } else if (c < 0x80) {
+                        *(t++) = (uint8_t) c;
+                } else if (c < 0x800) {
+                        *(t++) = (uint8_t) (0xc0 | (c >> 6));
+                        *(t++) = (uint8_t) (0x80 | (c & 0x3f));
+                } else {
+                        *(t++) = (uint8_t) (0xe0 | (c >> 12));
+                        *(t++) = (uint8_t) (0x80 | ((c >> 6) & 0x3f));
+                        *(t++) = (uint8_t) (0x80 | (c & 0x3f));
+                }
         }
 
-        return str;
+        *t = 0;
+
+        return r;
 }
+
 /* expected size used to encode one unicode char */
 static int utf8_unichar_to_encoded_len(int unichar) {
         if (unichar < 0x80)
@@ -185,49 +247,3 @@ int utf8_encoded_valid_unichar(const char *str) {
 
         return len;
 }
-
-int is_utf8_encoding_whitelisted(char c, const char *white) {
-        if ((c >= '0' && c <= '9') ||
-            (c >= 'A' && c <= 'Z') ||
-            (c >= 'a' && c <= 'z') ||
-            strchr("#+-.:=@_", c) != NULL ||
-            (white != NULL && strchr(white, c) != NULL))
-                return 1;
-        return 0;
-}
-
-int udev_encode_string(const char *str, char *str_enc, size_t len) {
-        size_t i, j;
-
-        if (str == NULL || str_enc == NULL)
-                return -1;
-
-        for (i = 0, j = 0; str[i] != '\0'; i++) {
-                int seqlen;
-
-                seqlen = utf8_encoded_valid_unichar(&str[i]);
-                if (seqlen > 1) {
-                        if (len-j < (size_t)seqlen)
-                                goto err;
-                        memcpy(&str_enc[j], &str[i], seqlen);
-                        j += seqlen;
-                        i += (seqlen-1);
-                } else if (str[i] == '\\' || !is_utf8_encoding_whitelisted(str[i], NULL)) {
-                        if (len-j < 4)
-                                goto err;
-                        sprintf(&str_enc[j], "\\x%02x", (unsigned char) str[i]);
-                        j += 4;
-                } else {
-                        if (len-j < 1)
-                                goto err;
-                        str_enc[j] = str[i];
-                        j++;
-                }
-        }
-        if (len-j < 1)
-                goto err;
-        str_enc[j] = '\0';
-        return 0;
-err:
-        return -1;
-}