From 299a34c11a4241f8c5832ccd5a7bc13263f8488b Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Thu, 20 Oct 2016 23:41:21 -0400 Subject: detect-virt: add --private-users switch to check if a userns is active Various things don't work when we're running in a user namespace, but it's pretty hard to reliably detect if that is true. A function is added which looks at /proc/self/uid_map and returns false if the default "0 0 UINT32_MAX" is found, and true if it finds anything else. This misses the case where an 1:1 mapping with the full range was used, but I don't know how to distinguish this case. 'systemd-detect-virt --private-users' is very similar to 'systemd-detect-virt --chroot', but we check for a user namespace instead. --- man/systemd-detect-virt.xml | 16 ++++++++-- man/systemd.unit.xml | 3 +- src/basic/virt.c | 70 +++++++++++++++++++++++++++++++++++++++++++ src/basic/virt.h | 1 + src/detect-virt/detect-virt.c | 31 ++++++++++++++----- 5 files changed, 111 insertions(+), 10 deletions(-) diff --git a/man/systemd-detect-virt.xml b/man/systemd-detect-virt.xml index 61a5f8937f..996c2fa256 100644 --- a/man/systemd-detect-virt.xml +++ b/man/systemd-detect-virt.xml @@ -50,7 +50,8 @@ - systemd-detect-virt OPTIONS + systemd-detect-virt + OPTIONS @@ -217,6 +218,16 @@ environment or not. + + + + Detect whether invoked in a user namespace. In this mode, no + output is written, but the return value indicates whether the process was invoked + inside of a user namespace or not. See + user_namespaces7 + for more information. + + @@ -243,7 +254,8 @@ systemd1, systemd-nspawn1, - chroot2 + chroot2, + namespaces7 diff --git a/man/systemd.unit.xml b/man/systemd.unit.xml index 04efee2891..40c4cfd854 100644 --- a/man/systemd.unit.xml +++ b/man/systemd.unit.xml @@ -908,7 +908,8 @@ systemd-nspawn, docker, rkt to test - against a specific implementation. See + against a specific implementation, or + private-users to check whether we are running in a user namespace. See systemd-detect-virt1 for a full list of known virtualization technologies and their identifiers. If multiple virtualization technologies are diff --git a/src/basic/virt.c b/src/basic/virt.c index 41012d52a0..69b0f96183 100644 --- a/src/basic/virt.c +++ b/src/basic/virt.c @@ -485,6 +485,76 @@ int detect_virtualization(void) { return r; } +static int userns_has_mapping(const char *name) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *buf = NULL; + size_t n_allocated = 0; + ssize_t n; + uint32_t a, b, c; + int r; + + f = fopen(name, "re"); + if (!f) { + log_debug_errno(errno, "Failed to open %s: %m", name); + return errno == -ENOENT ? false : -errno; + } + + n = getline(&buf, &n_allocated, f); + if (n < 0) { + if (feof(f)) { + log_debug("%s is empty, we're in an uninitialized user namespace", name); + return true; + } + + return log_debug_errno(errno, "Failed to read %s: %m", name); + } + + r = sscanf(buf, "%"PRIu32" %"PRIu32" %"PRIu32, &a, &b, &c); + if (r < 3) + return log_debug_errno(errno, "Failed to parse %s: %m", name); + + if (a == 0 && b == 0 && c == UINT32_MAX) { + /* The kernel calls mappings_overlap() and does not allow overlaps */ + log_debug("%s has a full 1:1 mapping", name); + return false; + } + + /* Anything else implies that we are in a user namespace */ + log_debug("Mapping found in %s, we're in a user namespace", name); + return true; +} + +int running_in_userns(void) { + _cleanup_free_ char *line = NULL; + int r; + + r = userns_has_mapping("/proc/self/uid_map"); + if (r != 0) + return r; + + r = userns_has_mapping("/proc/self/gid_map"); + if (r != 0) + return r; + + /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also + * possible to compile a kernel without CONFIG_USER_NS, in which case "setgroups" + * also does not exist. We cannot distinguish those two cases, so assume that + * we're running on a stripped-down recent kernel, rather than on an old one, + * and if the file is not found, return false. + */ + r = read_one_line_file("/proc/self/setgroups", &line); + if (r < 0) { + log_debug_errno(r, "/proc/self/setgroups: %m"); + return r == -ENOENT ? false : r; + } + + truncate_nl(line); + r = streq(line, "deny"); + /* See user_namespaces(7) for a description of this "setgroups" contents. */ + log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in"); + return r; +} + int running_in_chroot(void) { int ret; diff --git a/src/basic/virt.h b/src/basic/virt.h index bc5b3ae94d..7d15169112 100644 --- a/src/basic/virt.h +++ b/src/basic/virt.h @@ -67,6 +67,7 @@ int detect_vm(void); int detect_container(void); int detect_virtualization(void); +int running_in_userns(void); int running_in_chroot(void); const char *virtualization_to_string(int v) _const_; diff --git a/src/detect-virt/detect-virt.c b/src/detect-virt/detect-virt.c index 5d51589a31..4b8956f0ad 100644 --- a/src/detect-virt/detect-virt.c +++ b/src/detect-virt/detect-virt.c @@ -31,6 +31,7 @@ static enum { ONLY_VM, ONLY_CONTAINER, ONLY_CHROOT, + ONLY_PRIVATE_USERS, } arg_mode = ANY_VIRTUALIZATION; static void help(void) { @@ -41,6 +42,7 @@ static void help(void) { " -c --container Only detect whether we are run in a container\n" " -v --vm Only detect whether we are run in a VM\n" " -r --chroot Detect whether we are run in a chroot() environment\n" + " --private-users Only detect whether we are running in a user namespace\n" " -q --quiet Don't output anything, just set return value\n" , program_invocation_short_name); } @@ -48,16 +50,18 @@ static void help(void) { static int parse_argv(int argc, char *argv[]) { enum { - ARG_VERSION = 0x100 + ARG_VERSION = 0x100, + ARG_PRIVATE_USERS, }; static const struct option options[] = { - { "help", no_argument, NULL, 'h' }, - { "version", no_argument, NULL, ARG_VERSION }, - { "container", no_argument, NULL, 'c' }, - { "vm", no_argument, NULL, 'v' }, - { "chroot", no_argument, NULL, 'r' }, - { "quiet", no_argument, NULL, 'q' }, + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, ARG_VERSION }, + { "container", no_argument, NULL, 'c' }, + { "vm", no_argument, NULL, 'v' }, + { "chroot", no_argument, NULL, 'r' }, + { "private-users", no_argument, NULL, ARG_PRIVATE_USERS }, + { "quiet", no_argument, NULL, 'q' }, {} }; @@ -85,6 +89,10 @@ static int parse_argv(int argc, char *argv[]) { arg_mode = ONLY_CONTAINER; break; + case ARG_PRIVATE_USERS: + arg_mode = ONLY_PRIVATE_USERS; + break; + case 'v': arg_mode = ONLY_VM; break; @@ -151,6 +159,15 @@ int main(int argc, char *argv[]) { return r ? EXIT_SUCCESS : EXIT_FAILURE; + case ONLY_PRIVATE_USERS: + r = running_in_userns(); + if (r < 0) { + log_error_errno(r, "Failed to check for user namespace: %m"); + return EXIT_FAILURE; + } + + return r ? EXIT_SUCCESS : EXIT_FAILURE; + case ANY_VIRTUALIZATION: default: r = detect_virtualization(); -- cgit v1.2.3-54-g00ecf From 239a5707e1bd7740b075e78a4837a77f1129cdaa Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Fri, 21 Oct 2016 22:56:58 -0400 Subject: shared/condition: add ConditionVirtualization=[!]private-users This can be useful to silence warnings about units which fail in userns container. --- src/shared/condition.c | 3 +++ src/test/test-condition.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/src/shared/condition.c b/src/shared/condition.c index f13fa6a9fd..376606e004 100644 --- a/src/shared/condition.c +++ b/src/shared/condition.c @@ -146,6 +146,9 @@ static int condition_test_virtualization(Condition *c) { assert(c->parameter); assert(c->type == CONDITION_VIRTUALIZATION); + if (streq(c->parameter, "private-users")) + return running_in_userns(); + v = detect_virtualization(); if (v < 0) return v; diff --git a/src/test/test-condition.c b/src/test/test-condition.c index 6f7d71ef9a..dd985f5863 100644 --- a/src/test/test-condition.c +++ b/src/test/test-condition.c @@ -31,6 +31,8 @@ #include "macro.h" #include "selinux-util.h" #include "smack-util.h" +#include "strv.h" +#include "virt.h" #include "util.h" static void test_condition_test_path(void) { @@ -265,7 +267,64 @@ static void test_condition_test_security(void) { condition_free(condition); } +static void test_condition_test_virtualization(void) { + Condition *condition; + const char *virt; + int r; + + condition = condition_new(CONDITION_VIRTUALIZATION, "garbage oifdsjfoidsjoj", false, false); + assert_se(condition); + r = condition_test(condition); + log_info("ConditionVirtualization=garbage → %i", r); + assert_se(r == 0); + condition_free(condition); + + condition = condition_new(CONDITION_VIRTUALIZATION, "container", false, false); + assert_se(condition); + r = condition_test(condition); + log_info("ConditionVirtualization=container → %i", r); + assert_se(r == !!detect_container()); + condition_free(condition); + + condition = condition_new(CONDITION_VIRTUALIZATION, "vm", false, false); + assert_se(condition); + r = condition_test(condition); + log_info("ConditionVirtualization=vm → %i", r); + assert_se(r == (detect_vm() && !detect_container())); + condition_free(condition); + + condition = condition_new(CONDITION_VIRTUALIZATION, "private-users", false, false); + assert_se(condition); + r = condition_test(condition); + log_info("ConditionVirtualization=private-users → %i", r); + assert_se(r == !!running_in_userns()); + condition_free(condition); + + NULSTR_FOREACH(virt, + "kvm\0" + "qemu\0" + "bochs\0" + "xen\0" + "uml\0" + "vmware\0" + "oracle\0" + "microsoft\0" + "zvm\0" + "parallels\0" + "bhyve\0" + "vm_other\0") { + + condition = condition_new(CONDITION_VIRTUALIZATION, virt, false, false); + assert_se(condition); + r = condition_test(condition); + log_info("ConditionVirtualization=%s → %i", virt, r); + assert_se(r >= 0); + condition_free(condition); + } +} + int main(int argc, char *argv[]) { + log_set_max_level(LOG_DEBUG); log_parse_environment(); log_open(); @@ -276,6 +335,7 @@ int main(int argc, char *argv[]) { test_condition_test_kernel_command_line(); test_condition_test_null(); test_condition_test_security(); + test_condition_test_virtualization(); return 0; } -- cgit v1.2.3-54-g00ecf From d09f968657f379025c30c02cff7047b642b0866a Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Mon, 24 Oct 2016 19:17:50 -0400 Subject: test-tables: test ConditionVirtualization --- src/test/test-tables.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/test-tables.c b/src/test/test-tables.c index 0be74921fc..f954179b04 100644 --- a/src/test/test-tables.c +++ b/src/test/test-tables.c @@ -48,6 +48,7 @@ #include "unit-name.h" #include "unit.h" #include "util.h" +#include "virt.h" int main(int argc, char **argv) { test_table(architecture, ARCHITECTURE); @@ -114,6 +115,7 @@ int main(int argc, char **argv) { test_table(unit_load_state, UNIT_LOAD_STATE); test_table(unit_type, UNIT_TYPE); test_table(locale_variable, VARIABLE_LC); + test_table(virtualization, VIRTUALIZATION); test_table_sparse(object_compressed, OBJECT_COMPRESSED); -- cgit v1.2.3-54-g00ecf From 0809d7740c5cf988e049781b4b80e14a3bbefb70 Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Mon, 24 Oct 2016 22:53:07 -0400 Subject: condition: simplify condition_test_virtualization Rewrite the function to be slightly simpler. In particular, if a specific match is found (like ConditionVirtualization=yes), simply return an answer immediately, instead of relying that "yes" will not be matched by any of the virtualization names below. No functional change. --- src/shared/condition.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/shared/condition.c b/src/shared/condition.c index 376606e004..17b80d9e0c 100644 --- a/src/shared/condition.c +++ b/src/shared/condition.c @@ -155,19 +155,15 @@ static int condition_test_virtualization(Condition *c) { /* First, compare with yes/no */ b = parse_boolean(c->parameter); - - if (v > 0 && b > 0) - return true; - - if (v == 0 && b == 0) - return true; + if (b >= 0) + return b == !!v; /* Then, compare categorization */ - if (VIRTUALIZATION_IS_VM(v) && streq(c->parameter, "vm")) - return true; + if (streq(c->parameter, "vm")) + return VIRTUALIZATION_IS_VM(v); - if (VIRTUALIZATION_IS_CONTAINER(v) && streq(c->parameter, "container")) - return true; + if (streq(c->parameter, "container")) + return VIRTUALIZATION_IS_CONTAINER(v); /* Finally compare id */ return v != VIRTUALIZATION_NONE && streq(c->parameter, virtualization_to_string(v)); -- cgit v1.2.3-54-g00ecf From 4bb30aeaf8e756b20d66e13af2eac0c5a30b01fa Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Fri, 21 Oct 2016 23:00:38 -0400 Subject: units: disable /dev/hugepages in private user namespaces The mount fails, even though CAP_SYS_ADMIN is granted. --- units/dev-hugepages.mount | 1 + 1 file changed, 1 insertion(+) diff --git a/units/dev-hugepages.mount b/units/dev-hugepages.mount index 882adb4545..489cc777e4 100644 --- a/units/dev-hugepages.mount +++ b/units/dev-hugepages.mount @@ -13,6 +13,7 @@ DefaultDependencies=no Before=sysinit.target ConditionPathExists=/sys/kernel/mm/hugepages ConditionCapability=CAP_SYS_ADMIN +ConditionVirtualization=!private-users [Mount] What=hugetlbfs -- cgit v1.2.3-54-g00ecf