diff options
author | Dave Reisner <dreisner@archlinux.org> | 2014-01-09 12:20:25 -0500 |
---|---|---|
committer | Anthony G. Basile <blueness@gentoo.org> | 2014-01-09 12:21:20 -0500 |
commit | 7ed87c74dfb81761cbcefc10cd4f79394a1d36a3 (patch) | |
tree | d42454fd7146d18041770ace67e7d623bdc77830 /src/libudev/libudev-util.c | |
parent | 0a6fb50746ab00d650145cf17fc1a15f109bc386 (diff) |
move utf8 functions from libudev-private.h to utf8.h
There's now some more obvious overlap amongst the two
utf8 validation functions, but no more than there already
was previously. This also adds some menial tests for anyone
who wants to do more merging of these two in the future.
Adopted for eudev: Anthony G. Basile <blueness@gentoo.org>
Signed-off-by: Anthony G. Basile <blueness@gentoo.org>
Diffstat (limited to 'src/libudev/libudev-util.c')
-rw-r--r-- | src/libudev/libudev-util.c | 171 |
1 files changed, 3 insertions, 168 deletions
diff --git a/src/libudev/libudev-util.c b/src/libudev/libudev-util.c index ae5e285ec0..b4452f60d4 100644 --- a/src/libudev/libudev-util.c +++ b/src/libudev/libudev-util.c @@ -37,6 +37,7 @@ #include "libudev.h" #include "libudev-private.h" +#include "utf8.h" /** * SECTION:libudev-util @@ -314,129 +315,6 @@ void util_remove_trailing_chars(char *path, char c) path[--len] = '\0'; } -/* count of characters used to encode one unicode char */ -static int utf8_encoded_expected_len(const char *str) -{ - unsigned char c = (unsigned char)str[0]; - - if (c < 0x80) - return 1; - if ((c & 0xe0) == 0xc0) - return 2; - if ((c & 0xf0) == 0xe0) - return 3; - if ((c & 0xf8) == 0xf0) - return 4; - if ((c & 0xfc) == 0xf8) - return 5; - if ((c & 0xfe) == 0xfc) - return 6; - return 0; -} - -/* decode one unicode char */ -static int utf8_encoded_to_unichar(const char *str) -{ - int unichar; - int len; - int i; - - len = utf8_encoded_expected_len(str); - switch (len) { - case 1: - return (int)str[0]; - case 2: - unichar = str[0] & 0x1f; - break; - case 3: - unichar = (int)str[0] & 0x0f; - break; - case 4: - unichar = (int)str[0] & 0x07; - break; - case 5: - unichar = (int)str[0] & 0x03; - break; - case 6: - unichar = (int)str[0] & 0x01; - break; - default: - return -1; - } - - for (i = 1; i < len; i++) { - if (((int)str[i] & 0xc0) != 0x80) - return -1; - unichar <<= 6; - unichar |= (int)str[i] & 0x3f; - } - - return unichar; -} - -/* expected size used to encode one unicode char */ -static int utf8_unichar_to_encoded_len(int unichar) -{ - if (unichar < 0x80) - return 1; - if (unichar < 0x800) - return 2; - if (unichar < 0x10000) - return 3; - if (unichar < 0x200000) - return 4; - if (unichar < 0x4000000) - return 5; - return 6; -} - -/* check if unicode char has a valid numeric range */ -static int utf8_unichar_valid_range(int unichar) -{ - if (unichar > 0x10ffff) - return 0; - if ((unichar & 0xfffff800) == 0xd800) - return 0; - if ((unichar > 0xfdcf) && (unichar < 0xfdf0)) - return 0; - if ((unichar & 0xffff) == 0xffff) - return 0; - return 1; -} - -/* validate one encoded unicode char and return its length */ -static int utf8_encoded_valid_unichar(const char *str) -{ - int len; - int unichar; - int i; - - len = utf8_encoded_expected_len(str); - if (len == 0) - return -1; - - /* ascii is valid */ - if (len == 1) - return 1; - - /* check if expected encoded chars are available */ - for (i = 0; i < len; i++) - if ((str[i] & 0x80) != 0x80) - return -1; - - unichar = utf8_encoded_to_unichar(str); - - /* check if encoded length matches encoded value */ - if (utf8_unichar_to_encoded_len(unichar) != len) - return -1; - - /* check if value has valid range */ - if (!utf8_unichar_valid_range(unichar)) - return -1; - - return len; -} - int util_replace_whitespace(const char *str, char *to, size_t len) { size_t i, j; @@ -465,17 +343,6 @@ int util_replace_whitespace(const char *str, char *to, size_t len) return 0; } -static int is_whitelisted(char c, const char *white) -{ - if ((c >= '0' && c <= '9') || - (c >= 'A' && c <= 'Z') || - (c >= 'a' && c <= 'z') || - strchr("#+-.:=@_", c) != NULL || - (white != NULL && strchr(white, c) != NULL)) - return 1; - return 0; -} - /* allow chars in whitelist, plain ascii, hex-escaping and valid utf8 */ int util_replace_chars(char *str, const char *white) { @@ -485,7 +352,7 @@ int util_replace_chars(char *str, const char *white) while (str[i] != '\0') { int len; - if (is_whitelisted(str[i], white)) { + if (is_utf8_encoding_whitelisted(str[i], white)) { i++; continue; } @@ -533,39 +400,7 @@ int util_replace_chars(char *str, const char *white) **/ _public_ int udev_util_encode_string(const char *str, char *str_enc, size_t len) { - size_t i, j; - - if (str == NULL || str_enc == NULL) - return -1; - - for (i = 0, j = 0; str[i] != '\0'; i++) { - int seqlen; - - seqlen = utf8_encoded_valid_unichar(&str[i]); - if (seqlen > 1) { - if (len-j < (size_t)seqlen) - goto err; - memcpy(&str_enc[j], &str[i], seqlen); - j += seqlen; - i += (seqlen-1); - } else if (str[i] == '\\' || !is_whitelisted(str[i], NULL)) { - if (len-j < 4) - goto err; - sprintf(&str_enc[j], "\\x%02x", (unsigned char) str[i]); - j += 4; - } else { - if (len-j < 1) - goto err; - str_enc[j] = str[i]; - j++; - } - } - if (len-j < 1) - goto err; - str_enc[j] = '\0'; - return 0; -err: - return -1; + return udev_encode_string(str, str_enc, len); } /* |