device-nodes: move device node specific code to own file

In the process, rename udev_encode_string which is poorly named for what it does. It deals specifically with encoding names that udev creates and has its own rules: utf8 is valid but some ascii is not (e.g. path separators), and everything else is simply escaped. Rename it to encode_devnode_name. Adopted for eudev: Anthony G. Basile <blueness@gentoo.org> Signed-off-by: Anthony G. Basile <blueness@gentoo.org>
author: Dave Reisner <dreisner@archlinux.org> 2014-01-09 14:02:56 -0500
committer: Anthony G. Basile <blueness@gentoo.org> 2014-01-09 14:02:56 -0500
commit: 855ce449eba82c417c005d17aa680aba2048ed8d (patch)
tree: 9c9c5c9cdd30ed2dfbbdef820118124e891b6662
parent: 7ed87c74dfb81761cbcefc10cd4f79394a1d36a3 (diff)
10 files changed, 266 insertions, 84 deletions
diff --git a/.gitignore b/.gitignore
index 3f8ef48036..afd8e6f6fd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -61,4 +61,6 @@ src/udev/keyboard-keys.txt
 test/test-libudev
 test/test-udev
 test/test
+test/test-device_nodes
+test/test-utf8
 test-driver
diff --git a/src/libudev/Makefile.am b/src/libudev/Makefile.am
index 568c4884b4..5211550857 100644
--- a/src/libudev/Makefile.am
+++ b/src/libudev/Makefile.am
@@ -35,6 +35,7 @@ libudev_la_SOURCES =\
 	libudev-hwdb.c \
 	cgroup-util.c \
 	conf-files.c \
+	device-nodes.c \
 	exit-status.c \
 	hashmap.c \
 	log.c \
@@ -52,6 +53,7 @@ noinst_HEADERS = \
 	cgroup-util.h \
 	conf-files.h \
 	def.h \
+	device-nodes.h \
 	exit-status.h \
 	hashmap.h \
 	ioprio.h \
diff --git a/src/libudev/device-nodes.c b/src/libudev/device-nodes.c
new file mode 100644
index 0000000000..c548f1ffff
--- /dev/null
+++ b/src/libudev/device-nodes.c
@@ -0,0 +1,74 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of eudev, forked from systemd.
+
+  Copyright 2012 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "device-nodes.h"
+#include "utf8.h"
+
+int whitelisted_char_for_devnode(char c, const char *white) {
+        if ((c >= '0' && c <= '9') ||
+            (c >= 'A' && c <= 'Z') ||
+            (c >= 'a' && c <= 'z') ||
+            strchr("#+-.:=@_", c) != NULL ||
+            (white != NULL && strchr(white, c) != NULL))
+                return 1;
+        return 0;
+}
+
+int encode_devnode_name(const char *str, char *str_enc, size_t len) {
+        size_t i, j;
+
+        if (str == NULL || str_enc == NULL)
+                return -1;
+
+        for (i = 0, j = 0; str[i] != '\0'; i++) {
+                int seqlen;
+
+                seqlen = utf8_encoded_valid_unichar(&str[i]);
+                if (seqlen > 1) {
+                        if (len-j < (size_t)seqlen)
+                                goto err;
+                        memcpy(&str_enc[j], &str[i], seqlen);
+                        j += seqlen;
+                        i += (seqlen-1);
+                } else if (str[i] == '\\' || !whitelisted_char_for_devnode(str[i], NULL)) {
+                        if (len-j < 4)
+                                goto err;
+                        sprintf(&str_enc[j], "\\x%02x", (unsigned char) str[i]);
+                        j += 4;
+                } else {
+                        if (len-j < 1)
+                                goto err;
+                        str_enc[j] = str[i];
+                        j++;
+                }
+        }
+        if (len-j < 1)
+                goto err;
+        str_enc[j] = '\0';
+        return 0;
+err:
+        return -1;
+}
diff --git a/src/libudev/device-nodes.h b/src/libudev/device-nodes.h
new file mode 100644
index 0000000000..57ed97d326
--- /dev/null
+++ b/src/libudev/device-nodes.h
@@ -0,0 +1,23 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of eudev, forked from systemd.
+
+  Copyright 2012 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+int encode_devnode_name(const char *str, char *str_enc, size_t len);
+int whitelisted_char_for_devnode(char c, const char *additional);
diff --git a/src/libudev/libudev-util.c b/src/libudev/libudev-util.c
index b4452f60d4..4d59980a70 100644
--- a/src/libudev/libudev-util.c
+++ b/src/libudev/libudev-util.c
@@ -35,6 +35,7 @@
 #include <sys/stat.h>
 #include <sys/param.h>
 
+#include "device-nodes.h"
 #include "libudev.h"
 #include "libudev-private.h"
 #include "utf8.h"
@@ -352,7 +353,7 @@ int util_replace_chars(char *str, const char *white)
         while (str[i] != '\0') {
                 int len;
 
-                if (is_utf8_encoding_whitelisted(str[i], white)) {
+                if (whitelisted_char_for_devnode(str[i], white)) {
                         i++;
                         continue;
                 }
@@ -400,7 +401,7 @@ int util_replace_chars(char *str, const char *white)
  **/
 _public_ int udev_util_encode_string(const char *str, char *str_enc, size_t len)
 {
-        return udev_encode_string(str, str_enc, len);
+        return encode_devnode_name(str, str_enc, len);
 }
 
 /*
diff --git a/src/libudev/utf8.c b/src/libudev/utf8.c
index c9e84b804c..1044fb6489 100644
--- a/src/libudev/utf8.c
+++ b/src/libudev/utf8.c
@@ -63,6 +63,19 @@ static inline bool is_unicode_valid(uint32_t ch) {
 
         return true;
 }
+
+static bool is_unicode_control(uint32_t ch) {
+
+        /*
+          0 to ' '-1 is the C0 range.
+          DEL=0x7F, and DEL+1 to 0x9F is C1 range.
+          '\t' is in C0 range, but more or less harmless and commonly used.
+        */
+
+        return (ch < ' ' && ch != '\t' && ch != '\n') ||
+                (0x7F <= ch && ch <= 0x9F);
+}
+
 /* count of characters used to encode one unicode char */
 static int utf8_encoded_expected_len(const char *str) {
         unsigned char c = (unsigned char)str[0];
@@ -121,24 +134,73 @@ int utf8_encoded_to_unichar(const char *str) {
         return unichar;
 }
 
-const char *utf8_is_valid(const char *str) {
+bool utf8_is_printable(const char* str, size_t length) {
         const uint8_t *p;
 
         assert(str);
 
-        for (p = (const uint8_t*) str; *p; ) {
-                int len;
+        for (p = (const uint8_t*) str; length;) {
+                int encoded_len = utf8_encoded_valid_unichar((const char *)p);
+                int val = utf8_encoded_to_unichar((const char*)p);
 
-                len = utf8_encoded_valid_unichar((const char *)p);
+                if (encoded_len < 0 || val < 0 || is_unicode_control(val))
+                        return false;
+
+                length -= encoded_len;
+                p += encoded_len;
+        }
+
+        return true;
+}
 
-                if (len < 0)
+char *ascii_is_valid(const char *str) {
+        const char *p;
+
+        assert(str);
+
+        for (p = str; *p; p++)
+                if ((unsigned char) *p >= 128)
                         return NULL;
 
-                p += len;
+        return (char*) str;
+}
+
+char *utf16_to_utf8(const void *s, size_t length) {
+        char *r;
+        const uint8_t *f;
+        uint8_t *t;
+
+        r = new(char, (length*3+1)/2 + 1);
+        if (!r)
+                return NULL;
+
+        t = (uint8_t*) r;
+
+        for (f = s; f < (const uint8_t*) s + length; f += 2) {
+                uint16_t c;
+
+                c = (f[1] << 8) | f[0];
+
+                if (c == 0) {
+                        *t = 0;
+                        return r;
+                } else if (c < 0x80) {
+                        *(t++) = (uint8_t) c;
+                } else if (c < 0x800) {
+                        *(t++) = (uint8_t) (0xc0 | (c >> 6));
+                        *(t++) = (uint8_t) (0x80 | (c & 0x3f));
+                } else {
+                        *(t++) = (uint8_t) (0xe0 | (c >> 12));
+                        *(t++) = (uint8_t) (0x80 | ((c >> 6) & 0x3f));
+                        *(t++) = (uint8_t) (0x80 | (c & 0x3f));
+                }
         }
 
-        return str;
+        *t = 0;
+
+        return r;
 }
+
 /* expected size used to encode one unicode char */
 static int utf8_unichar_to_encoded_len(int unichar) {
         if (unichar < 0x80)
@@ -185,49 +247,3 @@ int utf8_encoded_valid_unichar(const char *str) {
 
         return len;
 }
-
-int is_utf8_encoding_whitelisted(char c, const char *white) {
-        if ((c >= '0' && c <= '9') ||
-            (c >= 'A' && c <= 'Z') ||
-            (c >= 'a' && c <= 'z') ||
-            strchr("#+-.:=@_", c) != NULL ||
-            (white != NULL && strchr(white, c) != NULL))
-                return 1;
-        return 0;
-}
-
-int udev_encode_string(const char *str, char *str_enc, size_t len) {
-        size_t i, j;
-
-        if (str == NULL || str_enc == NULL)
-                return -1;
-
-        for (i = 0, j = 0; str[i] != '\0'; i++) {
-                int seqlen;
-
-                seqlen = utf8_encoded_valid_unichar(&str[i]);
-                if (seqlen > 1) {
-                        if (len-j < (size_t)seqlen)
-                                goto err;
-                        memcpy(&str_enc[j], &str[i], seqlen);
-                        j += seqlen;
-                        i += (seqlen-1);
-                } else if (str[i] == '\\' || !is_utf8_encoding_whitelisted(str[i], NULL)) {
-                        if (len-j < 4)
-                                goto err;
-                        sprintf(&str_enc[j], "\\x%02x", (unsigned char) str[i]);
-                        j += 4;
-                } else {
-                        if (len-j < 1)
-                                goto err;
-                        str_enc[j] = str[i];
-                        j++;
-                }
-        }
-        if (len-j < 1)
-                goto err;
-        str_enc[j] = '\0';
-        return 0;
-err:
-        return -1;
-}
diff --git a/src/libudev/utf8.h b/src/libudev/utf8.h
index 380036da18..9d09153c1c 100644
--- a/src/libudev/utf8.h
+++ b/src/libudev/utf8.h
@@ -21,6 +21,11 @@
 
 #include "macro.h"
 
+char *ascii_is_valid(const char *s) _pure_;
+
+bool utf8_is_printable(const char* str, size_t length) _pure_;
+
+char *utf16_to_utf8(const void *s, size_t length);
+
 int utf8_encoded_valid_unichar(const char *str);
-int is_utf8_encoding_whitelisted(char c, const char *white);
-int udev_encode_string(const char *str, char *str_enc, size_t len);
+int utf8_encoded_to_unichar(const char *str);
diff --git a/test/Makefile.am b/test/Makefile.am
index a9c28c9316..f12ee20f04 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -9,7 +9,8 @@ AM_CPPFLAGS = \
 noinst_PROGRAMS = \
 	test-libudev \
 	test-udev \
-	test_utf8
+	test-utf8 \
+	test-device_nodes
 
 test_libudev_SOURCES = \
 	test-libudev.c
@@ -40,6 +41,15 @@ test_utf8_CFLAGS = \
 test_utf8_LDADD = \
 	$(top_builddir)/src/libudev/libudev-private.la
 
+test_device_nodes_SOURCES = \
+	test-device-nodes.c
+
+test_device_nodes_CFLAGS = \
+	$(AM_CFLAGS)
+
+test_device_nodes_LDADD = \
+	$(top_builddir)/src/libudev/libudev-private.la
+
 if HAVE_LIBKMOD
 test_udev_LDADD += $(KMOD_LIBS)
 endif
diff --git a/test/test-device-nodes.c b/test/test-device-nodes.c
new file mode 100644
index 0000000000..2f3dedb90f
--- /dev/null
+++ b/test/test-device-nodes.c
@@ -0,0 +1,55 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2013 Dave Reisner
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <sys/types.h>
+
+#include "device-nodes.h"
+#include "util.h"
+
+/* helpers for test_encode_devnode_name */
+static char *do_encode_string(const char *in) {
+        size_t out_len = strlen(in) * 4;
+        char *out = malloc(out_len);
+
+        assert_se(out);
+        assert_se(encode_devnode_name(in, out, out_len) >= 0);
+        puts(out);
+
+        return out;
+}
+
+static bool expect_encoded_as(const char *in, const char *expected) {
+        _cleanup_free_ char *encoded = do_encode_string(in);
+        return streq(encoded, expected);
+}
+
+static void test_encode_devnode_name(void) {
+        assert_se(expect_encoded_as("systemd sucks", "systemd\\x20sucks"));
+        assert_se(expect_encoded_as("pinkiepie", "pinkiepie"));
+        assert_se(expect_encoded_as("valíd\\ųtf8", "valíd\\x5cųtf8"));
+        assert_se(expect_encoded_as("s/ash/ng", "s\\x2fash\\x2fng"));
+}
+
+int main(int argc, char *argv[]) {
+        test_encode_devnode_name();
+
+        return 0;
+}
diff --git a/test/test-utf8.c b/test/test-utf8.c
index d2b9771f4b..bd8ca86834 100644
--- a/test/test-utf8.c
+++ b/test/test-utf8.c
@@ -19,41 +19,35 @@
   along with systemd; If not, see <http://www.gnu.org/licenses/>.
 ***/
 
-
 #include "utf8.h"
 #include "util.h"
 
-/* helpers for test_udev_encode_string */
-static char *do_encode_string(const char *in) {
-        size_t out_len = strlen(in) * 4;
-        char *out = malloc(out_len);
-
-        assert_se(out);
-        assert_se(udev_encode_string(in, out, out_len) >= 0);
-        puts(out);
-
-        return out;
+static void test_utf8_is_printable(void) {
+        assert_se(utf8_is_printable("ascii is valid\tunicode", 22));
+        assert_se(utf8_is_printable("\342\204\242", 3));
+        assert_se(!utf8_is_printable("\341\204", 2));
+        assert_se(utf8_is_printable("ąę", 4));
 }
 
-static bool expect_encoded_as(const char *in, const char *expected) {
-        _cleanup_free_ char *encoded = do_encode_string(in);
-        return streq(encoded, expected);
+static void test_ascii_is_valid(void) {
+        assert_se(ascii_is_valid("alsdjf\t\vbarr\nba z"));
+        assert_se(!ascii_is_valid("\342\204\242"));
+        assert_se(!ascii_is_valid("\341\204"));
 }
 
-static void test_udev_encode_string(void) {
-        assert_se(expect_encoded_as("systemd sucks", "systemd\\x20sucks"));
-        assert_se(expect_encoded_as("pinkiepie", "pinkiepie"));
-        assert_se(expect_encoded_as("valíd\\ųtf8", "valíd\\x5cųtf8"));
-        assert_se(expect_encoded_as("s/ash/ng", "s\\x2fash\\x2fng"));
-}
+static void test_utf8_encoded_valid_unichar(void) {
+        assert_se(utf8_encoded_valid_unichar("\342\204\242") == 3);
+        assert_se(utf8_encoded_valid_unichar("\302\256") == 2);
+        assert_se(utf8_encoded_valid_unichar("a") == 1);
+        assert_se(utf8_encoded_valid_unichar("\341\204") < 0);
+        assert_se(utf8_encoded_valid_unichar("\341\204\341\204") < 0);
 
-static void test_utf8_is_valid(void) {
-        assert_se(utf8_is_valid("ascii is valid unicode"));
-        assert_se(utf8_is_valid("\341\204\242"));
-        assert_se(!utf8_is_valid("\341\204"));
 }
 
 int main(int argc, char *argv[]) {
-        test_utf8_is_valid();
-        test_udev_encode_string();
+        test_utf8_is_printable();
+        test_ascii_is_valid();
+        test_utf8_encoded_valid_unichar();
+
+        return 0;
 }
author	Dave Reisner <dreisner@archlinux.org>	2014-01-09 14:02:56 -0500
committer	Anthony G. Basile <blueness@gentoo.org>	2014-01-09 14:02:56 -0500
commit	855ce449eba82c417c005d17aa680aba2048ed8d (patch)
tree	9c9c5c9cdd30ed2dfbbdef820118124e891b6662
parent	7ed87c74dfb81761cbcefc10cd4f79394a1d36a3 (diff)