diff options
Diffstat (limited to 'src')
96 files changed, 3651 insertions, 1052 deletions
diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 7cdc97ee3c..302b958d0d 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -134,6 +134,20 @@ int cg_read_event(const char *controller, const char *path, const char *event, return -ENOENT; } +bool cg_ns_supported(void) { + static thread_local int enabled = -1; + + if (enabled >= 0) + return enabled; + + if (access("/proc/self/ns/cgroup", F_OK) == 0) + enabled = 1; + else + enabled = 0; + + return enabled; +} + int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) { _cleanup_free_ char *fs = NULL; int r; @@ -197,7 +211,15 @@ int cg_rmdir(const char *controller, const char *path) { return 0; } -int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) { +int cg_kill( + const char *controller, + const char *path, + int sig, + CGroupFlags flags, + Set *s, + cg_kill_log_func_t log_kill, + void *userdata) { + _cleanup_set_free_ Set *allocated_set = NULL; bool done = false; int r, ret = 0; @@ -205,6 +227,11 @@ int cg_kill(const char *controller, const char *path, int sig, bool sigcont, boo assert(sig >= 0); + /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send + * SIGCONT on SIGKILL. */ + if (IN_SET(sig, SIGCONT, SIGKILL)) + flags &= ~CGROUP_SIGCONT; + /* This goes through the tasks list and kills them all. This * is repeated until no further processes are added to the * tasks list, to properly handle forking processes */ @@ -232,19 +259,22 @@ int cg_kill(const char *controller, const char *path, int sig, bool sigcont, boo while ((r = cg_read_pid(f, &pid)) > 0) { - if (ignore_self && pid == my_pid) + if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid) continue; if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid)) continue; + if (log_kill) + log_kill(pid, sig, userdata); + /* If we haven't killed this process yet, kill * it */ if (kill(pid, sig) < 0) { if (ret >= 0 && errno != ESRCH) ret = -errno; } else { - if (sigcont && sig != SIGKILL) + if (flags & CGROUP_SIGCONT) (void) kill(pid, SIGCONT); if (ret == 0) @@ -278,7 +308,15 @@ int cg_kill(const char *controller, const char *path, int sig, bool sigcont, boo return ret; } -int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) { +int cg_kill_recursive( + const char *controller, + const char *path, + int sig, + CGroupFlags flags, + Set *s, + cg_kill_log_func_t log_kill, + void *userdata) { + _cleanup_set_free_ Set *allocated_set = NULL; _cleanup_closedir_ DIR *d = NULL; int r, ret; @@ -293,7 +331,7 @@ int cg_kill_recursive(const char *controller, const char *path, int sig, bool si return -ENOMEM; } - ret = cg_kill(controller, path, sig, sigcont, ignore_self, s); + ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata); r = cg_enumerate_subgroups(controller, path, &d); if (r < 0) { @@ -311,15 +349,14 @@ int cg_kill_recursive(const char *controller, const char *path, int sig, bool si if (!p) return -ENOMEM; - r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s); + r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata); if (r != 0 && ret >= 0) ret = r; } - if (ret >= 0 && r < 0) ret = r; - if (rem) { + if (flags & CGROUP_REMOVE) { r = cg_rmdir(controller, path); if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY) return r; @@ -328,7 +365,13 @@ int cg_kill_recursive(const char *controller, const char *path, int sig, bool si return ret; } -int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) { +int cg_migrate( + const char *cfrom, + const char *pfrom, + const char *cto, + const char *pto, + CGroupFlags flags) { + bool done = false; _cleanup_set_free_ Set *s = NULL; int r, ret = 0; @@ -363,7 +406,7 @@ int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char /* This might do weird stuff if we aren't a * single-threaded program. However, we * luckily know we are not */ - if (ignore_self && pid == my_pid) + if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid) continue; if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid)) @@ -411,8 +454,7 @@ int cg_migrate_recursive( const char *pfrom, const char *cto, const char *pto, - bool ignore_self, - bool rem) { + CGroupFlags flags) { _cleanup_closedir_ DIR *d = NULL; int r, ret = 0; @@ -423,7 +465,7 @@ int cg_migrate_recursive( assert(cto); assert(pto); - ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self); + ret = cg_migrate(cfrom, pfrom, cto, pto, flags); r = cg_enumerate_subgroups(cfrom, pfrom, &d); if (r < 0) { @@ -441,7 +483,7 @@ int cg_migrate_recursive( if (!p) return -ENOMEM; - r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem); + r = cg_migrate_recursive(cfrom, p, cto, pto, flags); if (r != 0 && ret >= 0) ret = r; } @@ -449,7 +491,7 @@ int cg_migrate_recursive( if (r < 0 && ret >= 0) ret = r; - if (rem) { + if (flags & CGROUP_REMOVE) { r = cg_rmdir(cfrom, pfrom); if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY) return r; @@ -463,8 +505,7 @@ int cg_migrate_recursive_fallback( const char *pfrom, const char *cto, const char *pto, - bool ignore_self, - bool rem) { + CGroupFlags flags) { int r; @@ -473,7 +514,7 @@ int cg_migrate_recursive_fallback( assert(cto); assert(pto); - r = cg_migrate_recursive(cfrom, pfrom, cto, pto, ignore_self, rem); + r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags); if (r < 0) { char prefix[strlen(pto) + 1]; @@ -482,7 +523,7 @@ int cg_migrate_recursive_fallback( PATH_FOREACH_PREFIX(prefix, pto) { int q; - q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem); + q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags); if (q >= 0) return q; } @@ -1955,7 +1996,7 @@ int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to int r = 0, unified; if (!path_equal(from, to)) { - r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true); + r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE); if (r < 0) return r; } @@ -1979,7 +2020,7 @@ int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to if (!p) p = to; - (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, false, false); + (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0); } return 0; diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index 4bb5291296..ec5c715987 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -135,12 +135,20 @@ int cg_read_event(const char *controller, const char *path, const char *event, int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d); int cg_read_subgroup(DIR *d, char **fn); -int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s); -int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool remove, Set *s); +typedef enum CGroupFlags { + CGROUP_SIGCONT = 1, + CGROUP_IGNORE_SELF = 2, + CGROUP_REMOVE = 4, +} CGroupFlags; -int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self); -int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self, bool remove); -int cg_migrate_recursive_fallback(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self, bool rem); +typedef void (*cg_kill_log_func_t)(pid_t pid, int sig, void *userdata); + +int cg_kill(const char *controller, const char *path, int sig, CGroupFlags flags, Set *s, cg_kill_log_func_t kill_log, void *userdata); +int cg_kill_recursive(const char *controller, const char *path, int sig, CGroupFlags flags, Set *s, cg_kill_log_func_t kill_log, void *userdata); + +int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags); +int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags); +int cg_migrate_recursive_fallback(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags); int cg_split_spec(const char *spec, char **controller, char **path); int cg_mangle_path(const char *path, char **result); @@ -214,6 +222,8 @@ int cg_mask_supported(CGroupMask *ret); int cg_kernel_controllers(Set *controllers); +bool cg_ns_supported(void); + int cg_unified(void); void cg_unified_flush(void); diff --git a/src/basic/fileio.c b/src/basic/fileio.c index 47ccfc39d8..f183de4999 100644 --- a/src/basic/fileio.c +++ b/src/basic/fileio.c @@ -1259,7 +1259,8 @@ int open_tmpfile_unlinkable(const char *directory, int flags) { char *p; int fd; - assert(directory); + if (!directory) + directory = "/tmp"; /* Returns an unlinked temporary file that cannot be linked into the file system anymore */ diff --git a/src/basic/missing.h b/src/basic/missing.h index b1272f8799..f8e096605e 100644 --- a/src/basic/missing.h +++ b/src/basic/missing.h @@ -445,6 +445,10 @@ struct btrfs_ioctl_quota_ctl_args { #define CGROUP2_SUPER_MAGIC 0x63677270 #endif +#ifndef CLONE_NEWCGROUP +#define CLONE_NEWCGROUP 0x02000000 +#endif + #ifndef TMPFS_MAGIC #define TMPFS_MAGIC 0x01021994 #endif diff --git a/src/basic/mount-util.c b/src/basic/mount-util.c index 63dff3dd5c..28dc778969 100644 --- a/src/basic/mount-util.c +++ b/src/basic/mount-util.c @@ -448,21 +448,21 @@ int bind_remount_recursive(const char *prefix, bool ro) { if (r < 0) return r; - /* Try to reuse the original flag set, but - * don't care for errors, in case of - * obstructed mounts */ + /* Deal with mount points that are obstructed by a + * later mount */ + r = path_is_mount_point(x, 0); + if (r == -ENOENT || r == 0) + continue; + if (r < 0) + return r; + + /* Try to reuse the original flag set */ orig_flags = 0; (void) get_mount_flags(x, &orig_flags); orig_flags &= ~MS_RDONLY; - if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) { - - /* Deal with mount points that are - * obstructed by a later mount */ - - if (errno != ENOENT) - return -errno; - } + if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) + return -errno; } } @@ -534,15 +534,22 @@ int repeat_unmount(const char *path, int flags) { } const char* mode_to_inaccessible_node(mode_t mode) { + /* This function maps a node type to the correspondent inaccessible node type. + * Character and block inaccessible devices may not be created (because major=0 and minor=0), + * in such case we map character and block devices to the inaccessible node type socket. */ switch(mode & S_IFMT) { case S_IFREG: return "/run/systemd/inaccessible/reg"; case S_IFDIR: return "/run/systemd/inaccessible/dir"; case S_IFCHR: - return "/run/systemd/inaccessible/chr"; + if (access("/run/systemd/inaccessible/chr", F_OK) == 0) + return "/run/systemd/inaccessible/chr"; + return "/run/systemd/inaccessible/sock"; case S_IFBLK: - return "/run/systemd/inaccessible/blk"; + if (access("/run/systemd/inaccessible/blk", F_OK) == 0) + return "/run/systemd/inaccessible/blk"; + return "/run/systemd/inaccessible/sock"; case S_IFIFO: return "/run/systemd/inaccessible/fifo"; case S_IFSOCK: diff --git a/src/basic/nss-util.h b/src/basic/nss-util.h index bf7c4854fc..e7844fff96 100644 --- a/src/basic/nss-util.h +++ b/src/basic/nss-util.h @@ -137,7 +137,7 @@ enum nss_status _nss_##module##_getpwnam_r( \ struct passwd *pwd, \ char *buffer, size_t buflen, \ int *errnop) _public_; \ -enum nss_status _nss_mymachines_getpwuid_r( \ +enum nss_status _nss_##module##_getpwuid_r( \ uid_t uid, \ struct passwd *pwd, \ char *buffer, size_t buflen, \ diff --git a/src/basic/process-util.c b/src/basic/process-util.c index e38b67405e..54b644ad56 100644 --- a/src/basic/process-util.c +++ b/src/basic/process-util.c @@ -625,8 +625,10 @@ int kill_and_sigcont(pid_t pid, int sig) { r = kill(pid, sig) < 0 ? -errno : 0; - if (r >= 0) - kill(pid, SIGCONT); + /* If this worked, also send SIGCONT, unless we already just sent a SIGCONT, or SIGKILL was sent which isn't + * affected by a process being suspended anyway. */ + if (r >= 0 && !IN_SET(SIGCONT, SIGKILL)) + (void) kill(pid, SIGCONT); return r; } diff --git a/src/basic/socket-util.c b/src/basic/socket-util.c index 385c3e4df3..6093e47172 100644 --- a/src/basic/socket-util.c +++ b/src/basic/socket-util.c @@ -1046,3 +1046,17 @@ int flush_accept(int fd) { close(cfd); } } + +struct cmsghdr* cmsg_find(struct msghdr *mh, int level, int type, socklen_t length) { + struct cmsghdr *cmsg; + + assert(mh); + + CMSG_FOREACH(cmsg, mh) + if (cmsg->cmsg_level == level && + cmsg->cmsg_type == type && + (length == (socklen_t) -1 || length == cmsg->cmsg_len)) + return cmsg; + + return NULL; +} diff --git a/src/basic/socket-util.h b/src/basic/socket-util.h index e9230e4a9f..2536b085f9 100644 --- a/src/basic/socket-util.h +++ b/src/basic/socket-util.h @@ -142,6 +142,8 @@ int flush_accept(int fd); #define CMSG_FOREACH(cmsg, mh) \ for ((cmsg) = CMSG_FIRSTHDR(mh); (cmsg); (cmsg) = CMSG_NXTHDR((mh), (cmsg))) +struct cmsghdr* cmsg_find(struct msghdr *mh, int level, int type, socklen_t length); + /* Covers only file system and abstract AF_UNIX socket addresses, but not unnamed socket addresses. */ #define SOCKADDR_UN_LEN(sa) \ ({ \ diff --git a/src/basic/string-util.c b/src/basic/string-util.c index 293a15f9c0..5d4510e1b3 100644 --- a/src/basic/string-util.c +++ b/src/basic/string-util.c @@ -22,6 +22,7 @@ #include <stdint.h> #include <stdio.h> #include <stdlib.h> +#include <string.h> #include "alloc-util.h" #include "gunicode.h" @@ -323,6 +324,14 @@ char ascii_tolower(char x) { return x; } +char ascii_toupper(char x) { + + if (x >= 'a' && x <= 'z') + return x - 'a' + 'A'; + + return x; +} + char *ascii_strlower(char *t) { char *p; @@ -334,6 +343,17 @@ char *ascii_strlower(char *t) { return t; } +char *ascii_strupper(char *t) { + char *p; + + assert(t); + + for (p = t; *p; p++) + *p = ascii_toupper(*p); + + return t; +} + char *ascii_strlower_n(char *t, size_t n) { size_t i; @@ -803,25 +823,20 @@ int free_and_strdup(char **p, const char *s) { return 1; } -#pragma GCC push_options -#pragma GCC optimize("O0") +/* + * Pointer to memset is volatile so that compiler must de-reference + * the pointer and can't assume that it points to any function in + * particular (such as memset, which it then might further "optimize") + * This approach is inspired by openssl's crypto/mem_clr.c. + */ +typedef void *(*memset_t)(void *,int,size_t); -void* memory_erase(void *p, size_t l) { - volatile uint8_t* x = (volatile uint8_t*) p; +static volatile memset_t memset_func = memset; - /* This basically does what memset() does, but hopefully isn't - * optimized away by the compiler. One of those days, when - * glibc learns memset_s() we should replace this call by - * memset_s(), but until then this has to do. */ - - for (; l > 0; l--) - *(x++) = 'x'; - - return p; +void* memory_erase(void *p, size_t l) { + return memset_func(p, 'x', l); } -#pragma GCC pop_options - char* string_erase(char *x) { if (!x) diff --git a/src/basic/string-util.h b/src/basic/string-util.h index 1209e1e2e1..b75aba63c2 100644 --- a/src/basic/string-util.h +++ b/src/basic/string-util.h @@ -137,6 +137,9 @@ char ascii_tolower(char x); char *ascii_strlower(char *s); char *ascii_strlower_n(char *s, size_t n); +char ascii_toupper(char x); +char *ascii_strupper(char *s); + int ascii_strcasecmp_n(const char *a, const char *b, size_t n); int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m); diff --git a/src/basic/user-util.c b/src/basic/user-util.c index e9d668ddfc..122d9a0c7c 100644 --- a/src/basic/user-util.c +++ b/src/basic/user-util.c @@ -29,6 +29,7 @@ #include <string.h> #include <sys/stat.h> #include <unistd.h> +#include <utmp.h> #include "missing.h" #include "alloc-util.h" @@ -39,6 +40,7 @@ #include "path-util.h" #include "string-util.h" #include "user-util.h" +#include "utf8.h" bool uid_is_valid(uid_t uid) { @@ -479,3 +481,94 @@ int take_etc_passwd_lock(const char *root) { return fd; } + +bool valid_user_group_name(const char *u) { + const char *i; + long sz; + + /* Checks if the specified name is a valid user/group name. */ + + if (isempty(u)) + return false; + + if (!(u[0] >= 'a' && u[0] <= 'z') && + !(u[0] >= 'A' && u[0] <= 'Z') && + u[0] != '_') + return false; + + for (i = u+1; *i; i++) { + if (!(*i >= 'a' && *i <= 'z') && + !(*i >= 'A' && *i <= 'Z') && + !(*i >= '0' && *i <= '9') && + *i != '_' && + *i != '-') + return false; + } + + sz = sysconf(_SC_LOGIN_NAME_MAX); + assert_se(sz > 0); + + if ((size_t) (i-u) > (size_t) sz) + return false; + + if ((size_t) (i-u) > UT_NAMESIZE - 1) + return false; + + return true; +} + +bool valid_user_group_name_or_id(const char *u) { + + /* Similar as above, but is also fine with numeric UID/GID specifications, as long as they are in the right + * range, and not the invalid user ids. */ + + if (isempty(u)) + return false; + + if (valid_user_group_name(u)) + return true; + + return parse_uid(u, NULL) >= 0; +} + +bool valid_gecos(const char *d) { + + if (!d) + return false; + + if (!utf8_is_valid(d)) + return false; + + if (string_has_cc(d, NULL)) + return false; + + /* Colons are used as field separators, and hence not OK */ + if (strchr(d, ':')) + return false; + + return true; +} + +bool valid_home(const char *p) { + + if (isempty(p)) + return false; + + if (!utf8_is_valid(p)) + return false; + + if (string_has_cc(p, NULL)) + return false; + + if (!path_is_absolute(p)) + return false; + + if (!path_is_safe(p)) + return false; + + /* Colons are used as field separators, and hence not OK */ + if (strchr(p, ':')) + return false; + + return true; +} diff --git a/src/basic/user-util.h b/src/basic/user-util.h index 8026eca3f4..36f71fb004 100644 --- a/src/basic/user-util.h +++ b/src/basic/user-util.h @@ -68,3 +68,8 @@ int take_etc_passwd_lock(const char *root); static inline bool userns_supported(void) { return access("/proc/self/uid_map", F_OK) >= 0; } + +bool valid_user_group_name(const char *u); +bool valid_user_group_name_or_id(const char *u); +bool valid_gecos(const char *d); +bool valid_home(const char *p); diff --git a/src/basic/util.c b/src/basic/util.c index 09d16697b7..9d66d28eb7 100644 --- a/src/basic/util.c +++ b/src/basic/util.c @@ -581,47 +581,6 @@ int on_ac_power(void) { return found_online || !found_offline; } -bool id128_is_valid(const char *s) { - size_t i, l; - - l = strlen(s); - if (l == 32) { - - /* Simple formatted 128bit hex string */ - - for (i = 0; i < l; i++) { - char c = s[i]; - - if (!(c >= '0' && c <= '9') && - !(c >= 'a' && c <= 'z') && - !(c >= 'A' && c <= 'Z')) - return false; - } - - } else if (l == 36) { - - /* Formatted UUID */ - - for (i = 0; i < l; i++) { - char c = s[i]; - - if ((i == 8 || i == 13 || i == 18 || i == 23)) { - if (c != '-') - return false; - } else { - if (!(c >= '0' && c <= '9') && - !(c >= 'a' && c <= 'z') && - !(c >= 'A' && c <= 'Z')) - return false; - } - } - - } else - return false; - - return true; -} - int container_get_leader(const char *machine, pid_t *pid) { _cleanup_free_ char *s = NULL, *class = NULL; const char *p; @@ -832,6 +791,61 @@ uint64_t physical_memory_scale(uint64_t v, uint64_t max) { return r; } +uint64_t system_tasks_max(void) { + +#if SIZEOF_PID_T == 4 +#define TASKS_MAX ((uint64_t) (INT32_MAX-1)) +#elif SIZEOF_PID_T == 2 +#define TASKS_MAX ((uint64_t) (INT16_MAX-1)) +#else +#error "Unknown pid_t size" +#endif + + _cleanup_free_ char *value = NULL, *root = NULL; + uint64_t a = TASKS_MAX, b = TASKS_MAX; + + /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this + * limit: + * + * a) the maximum value for the pid_t type + * b) the cgroups pids_max attribute for the system + * c) the kernel's configure maximum PID value + * + * And then pick the smallest of the three */ + + if (read_one_line_file("/proc/sys/kernel/pid_max", &value) >= 0) + (void) safe_atou64(value, &a); + + if (cg_get_root_path(&root) >= 0) { + value = mfree(value); + + if (cg_get_attribute("pids", root, "pids.max", &value) >= 0) + (void) safe_atou64(value, &b); + } + + return MIN3(TASKS_MAX, + a <= 0 ? TASKS_MAX : a, + b <= 0 ? TASKS_MAX : b); +} + +uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) { + uint64_t t, m; + + assert(max > 0); + + /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages + * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */ + + t = system_tasks_max(); + assert(t > 0); + + m = t * v; + if (m / t != v) /* overflow? */ + return UINT64_MAX; + + return m / max; +} + int update_reboot_parameter_and_warn(const char *param) { int r; diff --git a/src/basic/util.h b/src/basic/util.h index db105197e8..44497dcd78 100644 --- a/src/basic/util.h +++ b/src/basic/util.h @@ -176,8 +176,6 @@ static inline unsigned log2u_round_up(unsigned x) { return log2u(x - 1) + 1; } -bool id128_is_valid(const char *s) _pure_; - int container_get_leader(const char *machine, pid_t *pid); int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd); @@ -186,6 +184,9 @@ int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int uint64_t physical_memory(void); uint64_t physical_memory_scale(uint64_t v, uint64_t max); +uint64_t system_tasks_max(void); +uint64_t system_tasks_max_scale(uint64_t v, uint64_t max); + int update_reboot_parameter_and_warn(const char *param); int version(void); diff --git a/src/boot/bootctl.c b/src/boot/bootctl.c index 7cb2259717..ff8c7a38dd 100644 --- a/src/boot/bootctl.c +++ b/src/boot/bootctl.c @@ -26,6 +26,7 @@ #include <ftw.h> #include <getopt.h> #include <limits.h> +#include <linux/magic.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> @@ -42,22 +43,53 @@ #include "fd-util.h" #include "fileio.h" #include "locale-util.h" +#include "parse-util.h" #include "rm-rf.h" #include "string-util.h" +#include "strv.h" +#include "umask-util.h" #include "util.h" +#include "verbs.h" +#include "virt.h" +#include "stat-util.h" + +static char *arg_path = NULL; +static bool arg_touch_variables = true; + +static int verify_esp( + bool searching, + const char *p, + uint32_t *ret_part, + uint64_t *ret_pstart, + uint64_t *ret_psize, + sd_id128_t *ret_uuid) { -static int verify_esp(const char *p, uint32_t *part, uint64_t *pstart, uint64_t *psize, sd_id128_t *uuid) { - struct statfs sfs; - struct stat st, st2; - _cleanup_free_ char *t = NULL; _cleanup_blkid_free_probe_ blkid_probe b = NULL; - int r; + _cleanup_free_ char *t = NULL; + uint64_t pstart = 0, psize = 0; + struct stat st, st2; const char *v, *t2; + struct statfs sfs; + sd_id128_t uuid = SD_ID128_NULL; + uint32_t part = 0; + int r; + + assert(p); + + if (statfs(p, &sfs) < 0) { + + /* If we are searching for the mount point, don't generate a log message if we can't find the path */ + if (errno == ENOENT && searching) + return -ENOENT; - if (statfs(p, &sfs) < 0) return log_error_errno(errno, "Failed to check file system type of \"%s\": %m", p); + } + + if (!F_TYPE_EQUAL(sfs.f_type, MSDOS_SUPER_MAGIC)) { + + if (searching) + return -EADDRNOTAVAIL; - if (sfs.f_type != 0x4d44) { log_error("File system \"%s\" is not a FAT EFI System Partition (ESP) file system.", p); return -ENODEV; } @@ -80,6 +112,11 @@ static int verify_esp(const char *p, uint32_t *part, uint64_t *pstart, uint64_t return -ENODEV; } + /* In a container we don't have access to block devices, skip this part of the verification, we trust the + * container manager set everything up correctly on its own. */ + if (detect_container() > 0) + goto finish; + r = asprintf(&t, "/dev/block/%u:%u", major(st.st_dev), minor(st.st_dev)); if (r < 0) return log_oom(); @@ -117,7 +154,6 @@ static int verify_esp(const char *p, uint32_t *part, uint64_t *pstart, uint64_t r = errno ? -errno : -EIO; return log_error_errno(r, "Failed to probe file system type \"%s\": %m", p); } - if (!streq(v, "vfat")) { log_error("File system \"%s\" is not FAT.", p); return -ENODEV; @@ -129,7 +165,6 @@ static int verify_esp(const char *p, uint32_t *part, uint64_t *pstart, uint64_t r = errno ? -errno : -EIO; return log_error_errno(r, "Failed to probe partition scheme \"%s\": %m", p); } - if (!streq(v, "gpt")) { log_error("File system \"%s\" is not on a GPT partition table.", p); return -ENODEV; @@ -141,7 +176,6 @@ static int verify_esp(const char *p, uint32_t *part, uint64_t *pstart, uint64_t r = errno ? -errno : -EIO; return log_error_errno(r, "Failed to probe partition type UUID \"%s\": %m", p); } - if (!streq(v, "c12a7328-f81f-11d2-ba4b-00a0c93ec93b")) { log_error("File system \"%s\" has wrong type for an EFI System Partition (ESP).", p); return -ENODEV; @@ -153,8 +187,7 @@ static int verify_esp(const char *p, uint32_t *part, uint64_t *pstart, uint64_t r = errno ? -errno : -EIO; return log_error_errno(r, "Failed to probe partition entry UUID \"%s\": %m", p); } - - r = sd_id128_from_string(v, uuid); + r = sd_id128_from_string(v, &uuid); if (r < 0) { log_error("Partition \"%s\" has invalid UUID \"%s\".", p, v); return -EIO; @@ -166,7 +199,9 @@ static int verify_esp(const char *p, uint32_t *part, uint64_t *pstart, uint64_t r = errno ? -errno : -EIO; return log_error_errno(r, "Failed to probe partition number \"%s\": m", p); } - *part = strtoul(v, NULL, 10); + r = safe_atou32(v, &part); + if (r < 0) + return log_error_errno(r, "Failed to parse PART_ENTRY_NUMBER field."); errno = 0; r = blkid_probe_lookup_value(b, "PART_ENTRY_OFFSET", &v, NULL); @@ -174,7 +209,9 @@ static int verify_esp(const char *p, uint32_t *part, uint64_t *pstart, uint64_t r = errno ? -errno : -EIO; return log_error_errno(r, "Failed to probe partition offset \"%s\": %m", p); } - *pstart = strtoul(v, NULL, 10); + r = safe_atou64(v, &pstart); + if (r < 0) + return log_error_errno(r, "Failed to parse PART_ENTRY_OFFSET field."); errno = 0; r = blkid_probe_lookup_value(b, "PART_ENTRY_SIZE", &v, NULL); @@ -182,11 +219,50 @@ static int verify_esp(const char *p, uint32_t *part, uint64_t *pstart, uint64_t r = errno ? -errno : -EIO; return log_error_errno(r, "Failed to probe partition size \"%s\": %m", p); } - *psize = strtoul(v, NULL, 10); + r = safe_atou64(v, &psize); + if (r < 0) + return log_error_errno(r, "Failed to parse PART_ENTRY_SIZE field."); + +finish: + if (ret_part) + *ret_part = part; + if (ret_pstart) + *ret_pstart = pstart; + if (ret_psize) + *ret_psize = psize; + if (ret_uuid) + *ret_uuid = uuid; return 0; } +static int find_esp(uint32_t *part, uint64_t *pstart, uint64_t *psize, sd_id128_t *uuid) { + const char *path; + int r; + + if (arg_path) + return verify_esp(false, arg_path, part, pstart, psize, uuid); + + FOREACH_STRING(path, "/efi", "/boot", "/boot/efi") { + + r = verify_esp(true, path, part, pstart, psize, uuid); + if (IN_SET(r, -ENOENT, -EADDRNOTAVAIL)) /* This one is not it */ + continue; + if (r < 0) + return r; + + arg_path = strdup(path); + if (!arg_path) + return log_oom(); + + log_info("Using EFI System Parition at %s.", path); + return 0; + } + + log_error("Couldn't find EFI system partition. It is recommended to mount it to /boot. Alternatively, use --path= to specify path to mount point."); + return -ENOENT; +} + /* search for "#### LoaderInfo: systemd-boot 218 ####" string inside the binary */ static int get_file_version(int fd, char **v) { struct stat st; @@ -199,14 +275,16 @@ static int get_file_version(int fd, char **v) { assert(v); if (fstat(fd, &st) < 0) - return -errno; + return log_error_errno(errno, "Failed to stat EFI binary: %m"); - if (st.st_size < 27) + if (st.st_size < 27) { + *v = NULL; return 0; + } buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (buf == MAP_FAILED) - return -errno; + return log_error_errno(errno, "Failed to memory map EFI binary: %m"); s = memmem(buf, st.st_size - 8, "#### LoaderInfo: ", 17); if (!s) @@ -228,7 +306,7 @@ static int get_file_version(int fd, char **v) { r = 1; finish: - munmap(buf, st.st_size); + (void) munmap(buf, st.st_size); *v = x; return r; } @@ -311,7 +389,7 @@ static int print_efi_option(uint16_t id, bool in_order) { return r; /* print only configured entries with partition information */ - if (!path || sd_id128_equal(partition, SD_ID128_NULL)) + if (!path || sd_id128_is_null(partition)) return 0; efi_tilt_backslashes(path); @@ -338,9 +416,10 @@ static int status_variables(void) { n_options = efi_get_boot_options(&options); if (n_options == -ENOENT) - return log_error_errno(ENOENT, "Failed to access EFI variables, efivarfs" + return log_error_errno(n_options, + "Failed to access EFI variables, efivarfs" " needs to be available at /sys/firmware/efi/efivars/."); - else if (n_options < 0) + if (n_options < 0) return log_error_errno(n_options, "Failed to read EFI boot entries: %m"); n_order = efi_get_boot_order(&order); @@ -360,11 +439,9 @@ static int status_variables(void) { for (j = 0; j < n_order; j++) if (options[i] == order[j]) - goto next; + continue; print_efi_option(options[i], false); - next: - continue; } return 0; @@ -523,15 +600,6 @@ error: return r; } -static char* strupper(char *s) { - char *p; - - for (p = s; *p; p++) - *p = toupper(*p); - - return s; -} - static int mkdir_one(const char *prefix, const char *suffix) { char *p; @@ -554,11 +622,11 @@ static const char *efi_subdirs[] = { }; static int create_dirs(const char *esp_path) { + const char **i; int r; - unsigned i; - for (i = 0; i < ELEMENTSOF(efi_subdirs); i++) { - r = mkdir_one(esp_path, efi_subdirs[i]); + STRV_FOREACH(i, efi_subdirs) { + r = mkdir_one(esp_path, *i); if (r < 0) return r; } @@ -580,7 +648,7 @@ static int copy_one_file(const char *esp_path, const char *name, bool force) { /* Create the EFI default boot loader name (specified for removable devices) */ v = strjoina(esp_path, "/EFI/BOOT/BOOT", name + strlen("systemd-boot")); - strupper(strrchr(v, '/') + 1); + ascii_strupper(strrchr(v, '/') + 1); k = copy_file(p, v, force); if (k < 0 && r == 0) @@ -751,8 +819,8 @@ static int install_variables(const char *esp_path, if (access(p, F_OK) < 0) { if (errno == ENOENT) return 0; - else - return log_error_errno(errno, "Cannot access \"%s\": %m", p); + + return log_error_errno(errno, "Cannot access \"%s\": %m", p); } r = find_slot(uuid, path, &slot); @@ -762,7 +830,7 @@ static int install_variables(const char *esp_path, "Failed to access EFI variables. Is the \"efivarfs\" filesystem mounted?" : "Failed to determine current boot order: %m"); - if (first || r == false) { + if (first || r == 0) { r = efi_add_boot_option(slot, "Linux Boot Manager", part, pstart, psize, uuid, path); @@ -872,46 +940,39 @@ static int remove_variables(sd_id128_t uuid, const char *path, bool in_order) { if (in_order) return remove_from_order(slot); - else - return 0; + + return 0; } static int install_loader_config(const char *esp_path) { - char *p; - char line[64]; - char *machine = NULL; - _cleanup_fclose_ FILE *f = NULL, *g = NULL; - f = fopen("/etc/machine-id", "re"); - if (!f) - return errno == ENOENT ? 0 : -errno; + _cleanup_fclose_ FILE *f = NULL; + char machine_string[SD_ID128_STRING_MAX]; + sd_id128_t machine_id; + const char *p; + int r; - if (fgets(line, sizeof(line), f) != NULL) { - char *s; + r = sd_id128_get_machine(&machine_id); + if (r < 0) + return log_error_errno(r, "Failed to get machine did: %m"); - s = strchr(line, '\n'); - if (s) - s[0] = '\0'; - if (strlen(line) == 32) - machine = line; - } + p = strjoina(esp_path, "/loader/loader.conf"); + f = fopen(p, "wxe"); + if (!f) + return log_error_errno(errno, "Failed to open loader.conf for writing: %m"); - if (!machine) - return -ESRCH; + fprintf(f, "#timeout 3\n"); + fprintf(f, "default %s-*\n", sd_id128_to_string(machine_id, machine_string)); - p = strjoina(esp_path, "/loader/loader.conf"); - g = fopen(p, "wxe"); - if (g) { - fprintf(g, "#timeout 3\n"); - fprintf(g, "default %s-*\n", machine); - if (ferror(g)) - return log_error_errno(EIO, "Failed to write \"%s\": %m", p); - } + r = fflush_and_check(f); + if (r < 0) + return log_error_errno(r, "Failed to write \"%s\": %m", p); return 0; } -static int help(void) { +static int help(int argc, char *argv[], void *userdata) { + printf("%s [COMMAND] [OPTIONS...]\n" "\n" "Install, update or remove the systemd-boot EFI boot manager.\n\n" @@ -930,9 +991,6 @@ static int help(void) { return 0; } -static const char *arg_path = "/boot"; -static bool arg_touch_variables = true; - static int parse_argv(int argc, char *argv[]) { enum { ARG_PATH = 0x100, @@ -948,7 +1006,7 @@ static int parse_argv(int argc, char *argv[]) { { NULL, 0, NULL, 0 } }; - int c; + int c, r; assert(argc >= 0); assert(argv); @@ -957,14 +1015,16 @@ static int parse_argv(int argc, char *argv[]) { switch (c) { case 'h': - help(); + help(0, NULL, NULL); return 0; case ARG_VERSION: return version(); case ARG_PATH: - arg_path = optarg; + r = free_and_strdup(&arg_path, optarg); + if (r < 0) + return log_oom(); break; case ARG_NO_VARIABLES: @@ -989,149 +1049,170 @@ static void read_loader_efi_var(const char *name, char **var) { log_warning_errno(r, "Failed to read EFI variable %s: %m", name); } -static int bootctl_main(int argc, char*argv[]) { - enum action { - ACTION_STATUS, - ACTION_INSTALL, - ACTION_UPDATE, - ACTION_REMOVE - } arg_action = ACTION_STATUS; - static const struct { - const char* verb; - enum action action; - } verbs[] = { - { "status", ACTION_STATUS }, - { "install", ACTION_INSTALL }, - { "update", ACTION_UPDATE }, - { "remove", ACTION_REMOVE }, - }; +static int must_be_root(void) { - sd_id128_t uuid = {}; - uint32_t part = 0; - uint64_t pstart = 0, psize = 0; - int r, q; + if (geteuid() == 0) + return 0; - if (argv[optind]) { - unsigned i; + log_error("Need to be root."); + return -EPERM; +} - for (i = 0; i < ELEMENTSOF(verbs); i++) { - if (!streq(argv[optind], verbs[i].verb)) - continue; - arg_action = verbs[i].action; - break; - } - if (i >= ELEMENTSOF(verbs)) { - log_error("Unknown operation \"%s\"", argv[optind]); - return -EINVAL; - } - } +static int verb_status(int argc, char *argv[], void *userdata) { - if (geteuid() != 0) - return log_error_errno(EPERM, "Need to be root."); + sd_id128_t uuid = SD_ID128_NULL; + int r; - r = verify_esp(arg_path, &part, &pstart, &psize, &uuid); - if (r == -ENODEV && !arg_path) - log_notice("You might want to use --path= to indicate the path to your ESP, in case it is not mounted on /boot."); + r = must_be_root(); if (r < 0) return r; - switch (arg_action) { - case ACTION_STATUS: { - _cleanup_free_ char *fw_type = NULL; - _cleanup_free_ char *fw_info = NULL; - _cleanup_free_ char *loader = NULL; - _cleanup_free_ char *loader_path = NULL; - sd_id128_t loader_part_uuid = {}; - - if (is_efi_boot()) { - read_loader_efi_var("LoaderFirmwareType", &fw_type); - read_loader_efi_var("LoaderFirmwareInfo", &fw_info); - read_loader_efi_var("LoaderInfo", &loader); - read_loader_efi_var("LoaderImageIdentifier", &loader_path); - if (loader_path) - efi_tilt_backslashes(loader_path); - r = efi_loader_get_device_part_uuid(&loader_part_uuid); - if (r < 0 && r == -ENOENT) - log_warning_errno(r, "Failed to read EFI variable LoaderDevicePartUUID: %m"); - - printf("System:\n"); - printf(" Firmware: %s (%s)\n", strna(fw_type), strna(fw_info)); - - r = is_efi_secure_boot(); - if (r < 0) - log_warning_errno(r, "Failed to query secure boot status: %m"); - else - printf(" Secure Boot: %s\n", r ? "enabled" : "disabled"); + r = find_esp(NULL, NULL, NULL, &uuid); + if (r < 0) + return r; - r = is_efi_secure_boot_setup_mode(); - if (r < 0) - log_warning_errno(r, "Failed to query secure boot mode: %m"); - else - printf(" Setup Mode: %s\n", r ? "setup" : "user"); - printf("\n"); - - printf("Loader:\n"); - printf(" Product: %s\n", strna(loader)); - if (!sd_id128_equal(loader_part_uuid, SD_ID128_NULL)) - printf(" Partition: /dev/disk/by-partuuid/%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x\n", - SD_ID128_FORMAT_VAL(loader_part_uuid)); - else - printf(" Partition: n/a\n"); - printf(" File: %s%s\n", special_glyph(TREE_RIGHT), strna(loader_path)); - printf("\n"); - } else - printf("System:\n Not booted with EFI\n"); - - r = status_binaries(arg_path, uuid); + if (is_efi_boot()) { + _cleanup_free_ char *fw_type = NULL, *fw_info = NULL, *loader = NULL, *loader_path = NULL; + sd_id128_t loader_part_uuid = SD_ID128_NULL; + + read_loader_efi_var("LoaderFirmwareType", &fw_type); + read_loader_efi_var("LoaderFirmwareInfo", &fw_info); + read_loader_efi_var("LoaderInfo", &loader); + read_loader_efi_var("LoaderImageIdentifier", &loader_path); + + if (loader_path) + efi_tilt_backslashes(loader_path); + + r = efi_loader_get_device_part_uuid(&loader_part_uuid); + if (r < 0 && r != -ENOENT) + log_warning_errno(r, "Failed to read EFI variable LoaderDevicePartUUID: %m"); + + printf("System:\n"); + printf(" Firmware: %s (%s)\n", strna(fw_type), strna(fw_info)); + + r = is_efi_secure_boot(); if (r < 0) - return r; + log_warning_errno(r, "Failed to query secure boot status: %m"); + else + printf(" Secure Boot: %s\n", r ? "enabled" : "disabled"); - if (arg_touch_variables) - r = status_variables(); - break; - } + r = is_efi_secure_boot_setup_mode(); + if (r < 0) + log_warning_errno(r, "Failed to query secure boot mode: %m"); + else + printf(" Setup Mode: %s\n", r ? "setup" : "user"); + printf("\n"); + + printf("Loader:\n"); + printf(" Product: %s\n", strna(loader)); + if (!sd_id128_is_null(loader_part_uuid)) + printf(" Partition: /dev/disk/by-partuuid/%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x\n", + SD_ID128_FORMAT_VAL(loader_part_uuid)); + else + printf(" Partition: n/a\n"); + printf(" File: %s%s\n", special_glyph(TREE_RIGHT), strna(loader_path)); + printf("\n"); + } else + printf("System:\n Not booted with EFI\n"); - case ACTION_INSTALL: - case ACTION_UPDATE: - umask(0002); + r = status_binaries(arg_path, uuid); + if (r < 0) + return r; + + if (arg_touch_variables) + r = status_variables(); - r = install_binaries(arg_path, arg_action == ACTION_INSTALL); + return r; +} + +static int verb_install(int argc, char *argv[], void *userdata) { + + sd_id128_t uuid = SD_ID128_NULL; + uint64_t pstart = 0, psize = 0; + uint32_t part = 0; + bool install; + int r; + + r = must_be_root(); + if (r < 0) + return r; + + r = find_esp(&part, &pstart, &psize, &uuid); + if (r < 0) + return r; + + install = streq(argv[0], "install"); + + RUN_WITH_UMASK(0002) { + r = install_binaries(arg_path, install); if (r < 0) return r; - if (arg_action == ACTION_INSTALL) { + if (install) { r = install_loader_config(arg_path); if (r < 0) return r; } + } - if (arg_touch_variables) - r = install_variables(arg_path, - part, pstart, psize, uuid, - "/EFI/systemd/systemd-boot" EFI_MACHINE_TYPE_NAME ".efi", - arg_action == ACTION_INSTALL); - break; + if (arg_touch_variables) + r = install_variables(arg_path, + part, pstart, psize, uuid, + "/EFI/systemd/systemd-boot" EFI_MACHINE_TYPE_NAME ".efi", + install); - case ACTION_REMOVE: - r = remove_binaries(arg_path); + return r; +} - if (arg_touch_variables) { - q = remove_variables(uuid, "/EFI/systemd/systemd-boot" EFI_MACHINE_TYPE_NAME ".efi", true); - if (q < 0 && r == 0) - r = q; - } - break; +static int verb_remove(int argc, char *argv[], void *userdata) { + sd_id128_t uuid = SD_ID128_NULL; + int r; + + r = must_be_root(); + if (r < 0) + return r; + + r = find_esp(NULL, NULL, NULL, &uuid); + if (r < 0) + return r; + + r = remove_binaries(arg_path); + + if (arg_touch_variables) { + int q; + + q = remove_variables(uuid, "/EFI/systemd/systemd-boot" EFI_MACHINE_TYPE_NAME ".efi", true); + if (q < 0 && r == 0) + r = q; } return r; } +static int bootctl_main(int argc, char *argv[]) { + + static const Verb verbs[] = { + { "help", VERB_ANY, VERB_ANY, 0, help }, + { "status", VERB_ANY, 1, VERB_DEFAULT, verb_status }, + { "install", VERB_ANY, 1, 0, verb_install }, + { "update", VERB_ANY, 1, 0, verb_install }, + { "remove", VERB_ANY, 1, 0, verb_remove }, + {} + }; + + return dispatch_verb(argc, argv, verbs, NULL); +} + int main(int argc, char *argv[]) { int r; log_parse_environment(); log_open(); + /* If we run in a container, automatically turn of EFI file system access */ + if (detect_container() > 0) + arg_touch_variables = false; + r = parse_argv(argc, argv); if (r <= 0) goto finish; @@ -1139,5 +1220,6 @@ int main(int argc, char *argv[]) { r = bootctl_main(argc, argv); finish: + free(arg_path); return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS; } diff --git a/src/core/automount.c b/src/core/automount.c index 85803a9c4a..4e9891569c 100644 --- a/src/core/automount.c +++ b/src/core/automount.c @@ -98,9 +98,6 @@ static void unmount_autofs(Automount *a) { if (a->pipe_fd < 0) return; - automount_send_ready(a, a->tokens, -EHOSTDOWN); - automount_send_ready(a, a->expire_tokens, -EHOSTDOWN); - a->pipe_event_source = sd_event_source_unref(a->pipe_event_source); a->pipe_fd = safe_close(a->pipe_fd); @@ -109,6 +106,9 @@ static void unmount_autofs(Automount *a) { if (a->where && (UNIT(a)->manager->exit_code != MANAGER_RELOAD && UNIT(a)->manager->exit_code != MANAGER_REEXECUTE)) { + automount_send_ready(a, a->tokens, -EHOSTDOWN); + automount_send_ready(a, a->expire_tokens, -EHOSTDOWN); + r = repeat_unmount(a->where, MNT_DETACH); if (r < 0) log_error_errno(r, "Failed to unmount: %m"); diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 94d1161605..c19e43f571 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -800,7 +800,10 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { "/dev/random\0" "rwm\0" "/dev/urandom\0" "rwm\0" "/dev/tty\0" "rwm\0" - "/dev/pts/ptmx\0" "rw\0"; /* /dev/pts/ptmx may not be duplicated, but accessed */ + "/dev/pts/ptmx\0" "rw\0" /* /dev/pts/ptmx may not be duplicated, but accessed */ + /* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */ + "/run/systemd/inaccessible/chr\0" "rwm\0" + "/run/systemd/inaccessible/blk\0" "rwm\0"; const char *x, *y; @@ -1705,7 +1708,7 @@ int manager_setup_cgroup(Manager *m) { /* also, move all other userspace processes remaining * in the root cgroup into that scope. */ - r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, false); + r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, 0); if (r < 0) log_warning_errno(r, "Couldn't move remaining userspace processes, ignoring: %m"); diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index 6167ce92cd..85b0c86a2f 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -856,7 +856,7 @@ int bus_cgroup_set_property( return 1; - } else if (STR_IN_SET(name, "MemoryLowByPhysicalMemory", "MemoryHighByPhysicalMemory", "MemoryMaxByPhysicalMemory")) { + } else if (STR_IN_SET(name, "MemoryLowScale", "MemoryHighScale", "MemoryMaxScale")) { uint32_t raw; uint64_t v; @@ -872,7 +872,7 @@ int bus_cgroup_set_property( const char *e; /* Chop off suffix */ - assert_se(e = endswith(name, "ByPhysicalMemory")); + assert_se(e = endswith(name, "Scale")); name = strndupa(name, e - name); if (streq(name, "MemoryLow")) @@ -883,7 +883,8 @@ int bus_cgroup_set_property( c->memory_max = v; unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY); - unit_write_drop_in_private_format(u, mode, name, "%s=%" PRIu32 "%%", name, (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100, (uint64_t) UINT32_MAX))); + unit_write_drop_in_private_format(u, mode, name, "%s=%" PRIu32 "%%", name, + (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX))); } return 1; @@ -909,7 +910,7 @@ int bus_cgroup_set_property( return 1; - } else if (streq(name, "MemoryLimitByPhysicalMemory")) { + } else if (streq(name, "MemoryLimitScale")) { uint64_t limit; uint32_t raw; @@ -924,7 +925,8 @@ int bus_cgroup_set_property( if (mode != UNIT_CHECK) { c->memory_limit = limit; unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY); - unit_write_drop_in_private_format(u, mode, "MemoryLimit", "MemoryLimit=%" PRIu32 "%%", (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100, (uint64_t) UINT32_MAX))); + unit_write_drop_in_private_format(u, mode, "MemoryLimit", "MemoryLimit=%" PRIu32 "%%", + (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX))); } return 1; @@ -1060,6 +1062,8 @@ int bus_cgroup_set_property( r = sd_bus_message_read(message, "t", &limit); if (r < 0) return r; + if (limit <= 0) + return sd_bus_error_set_errnof(error, EINVAL, "%s= is too small", name); if (mode != UNIT_CHECK) { c->tasks_max = limit; @@ -1072,6 +1076,26 @@ int bus_cgroup_set_property( } return 1; + } else if (streq(name, "TasksMaxScale")) { + uint64_t limit; + uint32_t raw; + + r = sd_bus_message_read(message, "u", &raw); + if (r < 0) + return r; + + limit = system_tasks_max_scale(raw, UINT32_MAX); + if (limit <= 0 || limit >= UINT64_MAX) + return sd_bus_error_set_errnof(error, EINVAL, "%s= is out of range", name); + + if (mode != UNIT_CHECK) { + c->tasks_max = limit; + unit_invalidate_cgroup(u, CGROUP_MASK_PIDS); + unit_write_drop_in_private_format(u, mode, name, "TasksMax=%" PRIu32 "%%", + (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX))); + } + + return 1; } if (u->transient && u->load_state == UNIT_STUB) { diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 307c3d8e7a..9c50cd93e5 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -44,6 +44,7 @@ #endif #include "strv.h" #include "syslog-util.h" +#include "user-util.h" #include "utf8.h" BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_exec_output, exec_output, ExecOutput); @@ -693,6 +694,7 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("AmbientCapabilities", "t", property_get_ambient_capabilities, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("User", "s", NULL, offsetof(ExecContext, user), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Group", "s", NULL, offsetof(ExecContext, group), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("DynamicUser", "b", bus_property_get_bool, offsetof(ExecContext, dynamic_user), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SupplementaryGroups", "as", NULL, offsetof(ExecContext, supplementary_groups), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PAMName", "s", NULL, offsetof(ExecContext, pam_name), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ReadWriteDirectories", "as", NULL, offsetof(ExecContext, read_write_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN), @@ -840,6 +842,9 @@ int bus_exec_context_set_transient_property( if (r < 0) return r; + if (!isempty(uu) && !valid_user_group_name_or_id(uu)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid user name: %s", uu); + if (mode != UNIT_CHECK) { if (isempty(uu)) @@ -859,6 +864,9 @@ int bus_exec_context_set_transient_property( if (r < 0) return r; + if (!isempty(gg) && !valid_user_group_name_or_id(gg)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid group name: %s", gg); + if (mode != UNIT_CHECK) { if (isempty(gg)) @@ -1061,7 +1069,8 @@ int bus_exec_context_set_transient_property( } else if (STR_IN_SET(name, "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "PrivateTmp", "PrivateDevices", "PrivateNetwork", - "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute", "RestrictRealtime")) { + "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute", + "RestrictRealtime", "DynamicUser")) { int b; r = sd_bus_message_read(message, "b", &b); @@ -1089,6 +1098,8 @@ int bus_exec_context_set_transient_property( c->memory_deny_write_execute = b; else if (streq(name, "RestrictRealtime")) c->restrict_realtime = b; + else if (streq(name, "DynamicUser")) + c->dynamic_user = b; unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, yes_no(b)); } diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c index d05968bd65..ef05a75a8b 100644 --- a/src/core/dbus-manager.c +++ b/src/core/dbus-manager.c @@ -43,6 +43,7 @@ #include "string-util.h" #include "strv.h" #include "syslog-util.h" +#include "user-util.h" #include "virt.h" #include "watchdog.h" @@ -1511,8 +1512,8 @@ static int method_unset_and_set_environment(sd_bus_message *message, void *userd } static int method_set_exit_code(sd_bus_message *message, void *userdata, sd_bus_error *error) { - uint8_t code; Manager *m = userdata; + uint8_t code; int r; assert(message); @@ -1534,6 +1535,61 @@ static int method_set_exit_code(sd_bus_message *message, void *userdata, sd_bus_ return sd_bus_reply_method_return(message, NULL); } +static int method_lookup_dynamic_user_by_name(sd_bus_message *message, void *userdata, sd_bus_error *error) { + Manager *m = userdata; + const char *name; + uid_t uid; + int r; + + assert(message); + assert(m); + + r = sd_bus_message_read_basic(message, 's', &name); + if (r < 0) + return r; + + if (!MANAGER_IS_SYSTEM(m)) + return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Dynamic users are only supported in the system instance."); + if (!valid_user_group_name(name)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "User name invalid: %s", name); + + r = dynamic_user_lookup_name(m, name, &uid); + if (r == -ESRCH) + return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_DYNAMIC_USER, "Dynamic user %s does not exist.", name); + if (r < 0) + return r; + + return sd_bus_reply_method_return(message, "u", (uint32_t) uid); +} + +static int method_lookup_dynamic_user_by_uid(sd_bus_message *message, void *userdata, sd_bus_error *error) { + _cleanup_free_ char *name = NULL; + Manager *m = userdata; + uid_t uid; + int r; + + assert(message); + assert(m); + + assert_cc(sizeof(uid) == sizeof(uint32_t)); + r = sd_bus_message_read_basic(message, 'u', &uid); + if (r < 0) + return r; + + if (!MANAGER_IS_SYSTEM(m)) + return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Dynamic users are only supported in the system instance."); + if (!uid_is_valid(uid)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "User ID invalid: " UID_FMT, uid); + + r = dynamic_user_lookup_uid(m, uid, &name); + if (r == -ESRCH) + return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_DYNAMIC_USER, "Dynamic user ID " UID_FMT " does not exist.", uid); + if (r < 0) + return r; + + return sd_bus_reply_method_return(message, "s", name); +} + static int list_unit_files_by_patterns(sd_bus_message *message, void *userdata, sd_bus_error *error, char **states, char **patterns) { _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; Manager *m = userdata; @@ -2199,6 +2255,8 @@ const sd_bus_vtable bus_manager_vtable[] = { SD_BUS_METHOD("PresetAllUnitFiles", "sbb", "a(sss)", method_preset_all_unit_files, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("AddDependencyUnitFiles", "asssbb", "a(sss)", method_add_dependency_unit_files, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("SetExitCode", "y", NULL, method_set_exit_code, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("LookupDynamicUserByName", "s", "u", method_lookup_dynamic_user_by_name, SD_BUS_VTABLE_UNPRIVILEGED), + SD_BUS_METHOD("LookupDynamicUserByUID", "u", "s", method_lookup_dynamic_user_by_uid, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_SIGNAL("UnitNew", "so", 0), SD_BUS_SIGNAL("UnitRemoved", "so", 0), diff --git a/src/core/dbus-scope.c b/src/core/dbus-scope.c index f557eedfc3..1abaf9f658 100644 --- a/src/core/dbus-scope.c +++ b/src/core/dbus-scope.c @@ -225,5 +225,5 @@ int bus_scope_send_request_stop(Scope *s) { if (r < 0) return r; - return sd_bus_send_to(UNIT(s)->manager->api_bus, m, /* s->controller */ NULL, NULL); + return sd_bus_send_to(UNIT(s)->manager->api_bus, m, s->controller, NULL); } diff --git a/src/core/dynamic-user.c b/src/core/dynamic-user.c new file mode 100644 index 0000000000..8035bee231 --- /dev/null +++ b/src/core/dynamic-user.c @@ -0,0 +1,763 @@ +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <grp.h> +#include <pwd.h> +#include <sys/file.h> + +#include "dynamic-user.h" +#include "fd-util.h" +#include "fs-util.h" +#include "parse-util.h" +#include "random-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "user-util.h" +#include "fileio.h" + +/* Let's pick a UIDs within the 16bit range, so that we are compatible with containers using 16bit user namespacing. At + * least on Fedora normal users are allocated until UID 60000, hence do not allocate from below this. Also stay away + * from the upper end of the range as that is often used for overflow/nobody users. */ +#define UID_PICK_MIN ((uid_t) UINT32_C(0x0000EF00)) +#define UID_PICK_MAX ((uid_t) UINT32_C(0x0000FFEF)) + +/* Takes a value generated randomly or by hashing and turns it into a UID in the right range */ +#define UID_CLAMP_INTO_RANGE(rnd) (((uid_t) (rnd) % (UID_PICK_MAX - UID_PICK_MIN + 1)) + UID_PICK_MIN) + +static DynamicUser* dynamic_user_free(DynamicUser *d) { + if (!d) + return NULL; + + if (d->manager) + (void) hashmap_remove(d->manager->dynamic_users, d->name); + + safe_close_pair(d->storage_socket); + free(d); + + return NULL; +} + +static int dynamic_user_add(Manager *m, const char *name, int storage_socket[2], DynamicUser **ret) { + DynamicUser *d = NULL; + int r; + + assert(m); + assert(name); + assert(storage_socket); + + r = hashmap_ensure_allocated(&m->dynamic_users, &string_hash_ops); + if (r < 0) + return r; + + d = malloc0(offsetof(DynamicUser, name) + strlen(name) + 1); + if (!d) + return -ENOMEM; + + strcpy(d->name, name); + + d->storage_socket[0] = storage_socket[0]; + d->storage_socket[1] = storage_socket[1]; + + r = hashmap_put(m->dynamic_users, d->name, d); + if (r < 0) { + free(d); + return r; + } + + d->manager = m; + + if (ret) + *ret = d; + + return 0; +} + +int dynamic_user_acquire(Manager *m, const char *name, DynamicUser** ret) { + _cleanup_close_pair_ int storage_socket[2] = { -1, -1 }; + DynamicUser *d; + int r; + + assert(m); + assert(name); + + /* Return the DynamicUser structure for a specific user name. Note that this won't actually allocate a UID for + * it, but just prepare the data structure for it. The UID is allocated only on demand, when it's really + * needed, and in the child process we fork off, since allocation involves NSS checks which are not OK to do + * from PID 1. To allow the children and PID 1 share information about allocated UIDs we use an anonymous + * AF_UNIX/SOCK_DGRAM socket (called the "storage socket") that contains at most one datagram with the + * allocated UID number, plus an fd referencing the lock file for the UID + * (i.e. /run/systemd/dynamic-uid/$UID). Why involve the socket pair? So that PID 1 and all its children can + * share the same storage for the UID and lock fd, simply by inheriting the storage socket fds. The socket pair + * may exist in three different states: + * + * a) no datagram stored. This is the initial state. In this case the dynamic user was never realized. + * + * b) a datagram containing a UID stored, but no lock fd attached to it. In this case there was already a + * statically assigned UID by the same name, which we are reusing. + * + * c) a datagram containing a UID stored, and a lock fd is attached to it. In this case we allocated a dynamic + * UID and locked it in the file system, using the lock fd. + * + * As PID 1 and various children might access the socket pair simultaneously, and pop the datagram or push it + * back in any time, we also maintain a lock on the socket pair. Note one peculiarity regarding locking here: + * the UID lock on disk is protected via a BSD file lock (i.e. an fd-bound lock), so that the lock is kept in + * place as long as there's a reference to the fd open. The lock on the storage socket pair however is a POSIX + * file lock (i.e. a process-bound lock), as all users share the same fd of this (after all it is anonymous, + * nobody else could get any access to it except via our own fd) and we want to synchronize access between all + * processes that have access to it. */ + + d = hashmap_get(m->dynamic_users, name); + if (d) { + /* We already have a structure for the dynamic user, let's increase the ref count and reuse it */ + d->n_ref++; + *ret = d; + return 0; + } + + if (!valid_user_group_name_or_id(name)) + return -EINVAL; + + if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, storage_socket) < 0) + return -errno; + + r = dynamic_user_add(m, name, storage_socket, &d); + if (r < 0) + return r; + + storage_socket[0] = storage_socket[1] = -1; + + if (ret) { + d->n_ref++; + *ret = d; + } + + return 1; +} + +static int pick_uid(const char *name, uid_t *ret_uid) { + + static const uint8_t hash_key[] = { + 0x37, 0x53, 0x7e, 0x31, 0xcf, 0xce, 0x48, 0xf5, + 0x8a, 0xbb, 0x39, 0x57, 0x8d, 0xd9, 0xec, 0x59 + }; + + unsigned n_tries = 100; + uid_t candidate; + int r; + + /* A static user by this name does not exist yet. Let's find a free ID then, and use that. We start with a UID + * generated as hash from the user name. */ + candidate = UID_CLAMP_INTO_RANGE(siphash24(name, strlen(name), hash_key)); + + (void) mkdir("/run/systemd/dynamic-uid", 0755); + + for (;;) { + char lock_path[strlen("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1]; + _cleanup_close_ int lock_fd = -1; + ssize_t l; + + if (--n_tries <= 0) /* Give up retrying eventually */ + return -EBUSY; + + if (candidate < UID_PICK_MIN || candidate > UID_PICK_MAX) + goto next; + + xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, candidate); + + for (;;) { + struct stat st; + + lock_fd = open(lock_path, O_CREAT|O_RDWR|O_NOFOLLOW|O_CLOEXEC|O_NOCTTY, 0600); + if (lock_fd < 0) + return -errno; + + r = flock(lock_fd, LOCK_EX|LOCK_NB); /* Try to get a BSD file lock on the UID lock file */ + if (r < 0) { + if (errno == EBUSY || errno == EAGAIN) + goto next; /* already in use */ + + return -errno; + } + + if (fstat(lock_fd, &st) < 0) + return -errno; + if (st.st_nlink > 0) + break; + + /* Oh, bummer, we got got the lock, but the file was unlinked between the time we opened it and + * got the lock. Close it, and try again. */ + lock_fd = safe_close(lock_fd); + } + + /* Some superficial check whether this UID/GID might already be taken by some static user */ + if (getpwuid(candidate) || getgrgid((gid_t) candidate)) { + (void) unlink(lock_path); + goto next; + } + + /* Let's store the user name in the lock file, so that we can use it for looking up the username for a UID */ + l = pwritev(lock_fd, + (struct iovec[2]) { + { .iov_base = (char*) name, .iov_len = strlen(name) }, + { .iov_base = (char[1]) { '\n' }, .iov_len = 1 } + }, 2, 0); + if (l < 0) { + (void) unlink(lock_path); + return -errno; + } + + (void) ftruncate(lock_fd, l); + + *ret_uid = candidate; + r = lock_fd; + lock_fd = -1; + + return r; + + next: + /* Pick another random UID, and see if that works for us. */ + random_bytes(&candidate, sizeof(candidate)); + candidate = UID_CLAMP_INTO_RANGE(candidate); + } +} + +static int dynamic_user_pop(DynamicUser *d, uid_t *ret_uid, int *ret_lock_fd) { + uid_t uid = UID_INVALID; + struct iovec iov = { + .iov_base = &uid, + .iov_len = sizeof(uid), + }; + union { + struct cmsghdr cmsghdr; + uint8_t buf[CMSG_SPACE(sizeof(int))]; + } control = {}; + struct msghdr mh = { + .msg_control = &control, + .msg_controllen = sizeof(control), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + struct cmsghdr *cmsg; + + ssize_t k; + int lock_fd = -1; + + assert(d); + assert(ret_uid); + assert(ret_lock_fd); + + /* Read the UID and lock fd that is stored in the storage AF_UNIX socket. This should be called with the lock + * on the socket taken. */ + + k = recvmsg(d->storage_socket[0], &mh, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC); + if (k < 0) + return -errno; + + cmsg = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int))); + if (cmsg) + lock_fd = *(int*) CMSG_DATA(cmsg); + else + cmsg_close_all(&mh); /* just in case... */ + + *ret_uid = uid; + *ret_lock_fd = lock_fd; + + return 0; +} + +static int dynamic_user_push(DynamicUser *d, uid_t uid, int lock_fd) { + struct iovec iov = { + .iov_base = &uid, + .iov_len = sizeof(uid), + }; + union { + struct cmsghdr cmsghdr; + uint8_t buf[CMSG_SPACE(sizeof(int))]; + } control = {}; + struct msghdr mh = { + .msg_control = &control, + .msg_controllen = sizeof(control), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + ssize_t k; + + assert(d); + + /* Store the UID and lock_fd in the storage socket. This should be called with the socket pair lock taken. */ + + if (lock_fd >= 0) { + struct cmsghdr *cmsg; + + cmsg = CMSG_FIRSTHDR(&mh); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + memcpy(CMSG_DATA(cmsg), &lock_fd, sizeof(int)); + + mh.msg_controllen = CMSG_SPACE(sizeof(int)); + } else { + mh.msg_control = NULL; + mh.msg_controllen = 0; + } + + k = sendmsg(d->storage_socket[1], &mh, MSG_DONTWAIT|MSG_NOSIGNAL); + if (k < 0) + return -errno; + + return 0; +} + +static void unlink_uid_lock(int lock_fd, uid_t uid) { + char lock_path[strlen("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1]; + + if (lock_fd < 0) + return; + + xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, uid); + (void) unlink_noerrno(lock_path); +} + +int dynamic_user_realize(DynamicUser *d, uid_t *ret) { + + _cleanup_close_ int etc_passwd_lock_fd = -1, uid_lock_fd = -1; + uid_t uid = UID_INVALID; + int r; + + assert(d); + + /* Acquire a UID for the user name. This will allocate a UID for the user name if the user doesn't exist + * yet. If it already exists its existing UID/GID will be reused. */ + + if (lockf(d->storage_socket[0], F_LOCK, 0) < 0) + return -errno; + + r = dynamic_user_pop(d, &uid, &uid_lock_fd); + if (r < 0) { + int new_uid_lock_fd; + uid_t new_uid; + + if (r != -EAGAIN) + goto finish; + + /* OK, nothing stored yet, let's try to find something useful. While we are working on this release the + * lock however, so that nobody else blocks on our NSS lookups. */ + (void) lockf(d->storage_socket[0], F_ULOCK, 0); + + /* Let's see if a proper, static user or group by this name exists. Try to take the lock on + * /etc/passwd, if that fails with EROFS then /etc is read-only. In that case it's fine if we don't + * take the lock, given that users can't be added there anyway in this case. */ + etc_passwd_lock_fd = take_etc_passwd_lock(NULL); + if (etc_passwd_lock_fd < 0 && etc_passwd_lock_fd != -EROFS) + return etc_passwd_lock_fd; + + /* First, let's parse this as numeric UID */ + r = parse_uid(d->name, &uid); + if (r < 0) { + struct passwd *p; + struct group *g; + + /* OK, this is not a numeric UID. Let's see if there's a user by this name */ + p = getpwnam(d->name); + if (p) + uid = p->pw_uid; + + /* Let's see if there's a group by this name */ + g = getgrnam(d->name); + if (g) { + /* If the UID/GID of the user/group of the same don't match, refuse operation */ + if (uid != UID_INVALID && uid != (uid_t) g->gr_gid) + return -EILSEQ; + + uid = (uid_t) g->gr_gid; + } + } + + if (uid == UID_INVALID) { + /* No static UID assigned yet, excellent. Let's pick a new dynamic one, and lock it. */ + + uid_lock_fd = pick_uid(d->name, &uid); + if (uid_lock_fd < 0) + return uid_lock_fd; + } + + /* So, we found a working UID/lock combination. Let's see if we actually still need it. */ + if (lockf(d->storage_socket[0], F_LOCK, 0) < 0) { + unlink_uid_lock(uid_lock_fd, uid); + return -errno; + } + + r = dynamic_user_pop(d, &new_uid, &new_uid_lock_fd); + if (r < 0) { + if (r != -EAGAIN) { + /* OK, something bad happened, let's get rid of the bits we acquired. */ + unlink_uid_lock(uid_lock_fd, uid); + goto finish; + } + + /* Great! Nothing is stored here, still. Store our newly acquired data. */ + } else { + /* Hmm, so as it appears there's now something stored in the storage socket. Throw away what we + * acquired, and use what's stored now. */ + + unlink_uid_lock(uid_lock_fd, uid); + safe_close(uid_lock_fd); + + uid = new_uid; + uid_lock_fd = new_uid_lock_fd; + } + } + + /* If the UID/GID was already allocated dynamically, push the data we popped out back in. If it was already + * allocated statically, push the UID back too, but do not push the lock fd in. If we allocated the UID + * dynamically right here, push that in along with the lock fd for it. */ + r = dynamic_user_push(d, uid, uid_lock_fd); + if (r < 0) + goto finish; + + *ret = uid; + r = 0; + +finish: + (void) lockf(d->storage_socket[0], F_ULOCK, 0); + return r; +} + +int dynamic_user_current(DynamicUser *d, uid_t *ret) { + _cleanup_close_ int lock_fd = -1; + uid_t uid; + int r; + + assert(d); + assert(ret); + + /* Get the currently assigned UID for the user, if there's any. This simply pops the data from the storage socket, and pushes it back in right-away. */ + + if (lockf(d->storage_socket[0], F_LOCK, 0) < 0) + return -errno; + + r = dynamic_user_pop(d, &uid, &lock_fd); + if (r < 0) + goto finish; + + r = dynamic_user_push(d, uid, lock_fd); + if (r < 0) + goto finish; + + *ret = uid; + r = 0; + +finish: + (void) lockf(d->storage_socket[0], F_ULOCK, 0); + return r; +} + +DynamicUser* dynamic_user_ref(DynamicUser *d) { + if (!d) + return NULL; + + assert(d->n_ref > 0); + d->n_ref++; + + return d; +} + +DynamicUser* dynamic_user_unref(DynamicUser *d) { + if (!d) + return NULL; + + /* Note that this doesn't actually release any resources itself. If a dynamic user should be fully destroyed + * and its UID released, use dynamic_user_destroy() instead. NB: the dynamic user table may contain entries + * with no references, which is commonly the case right before a daemon reload. */ + + assert(d->n_ref > 0); + d->n_ref--; + + return NULL; +} + +static int dynamic_user_close(DynamicUser *d) { + _cleanup_close_ int lock_fd = -1; + uid_t uid; + int r; + + /* Release the user ID, by releasing the lock on it, and emptying the storage socket. After this the user is + * unrealized again, much like it was after it the DynamicUser object was first allocated. */ + + if (lockf(d->storage_socket[0], F_LOCK, 0) < 0) + return -errno; + + r = dynamic_user_pop(d, &uid, &lock_fd); + if (r == -EAGAIN) { + /* User wasn't realized yet, nothing to do. */ + r = 0; + goto finish; + } + if (r < 0) + goto finish; + + /* This dynamic user was realized and dynamically allocated. In this case, let's remove the lock file. */ + unlink_uid_lock(lock_fd, uid); + r = 1; + +finish: + (void) lockf(d->storage_socket[0], F_ULOCK, 0); + return r; +} + +DynamicUser* dynamic_user_destroy(DynamicUser *d) { + if (!d) + return NULL; + + /* Drop a reference to a DynamicUser object, and destroy the user completely if this was the last + * reference. This is called whenever a service is shut down and wants its dynamic UID gone. Note that + * dynamic_user_unref() is what is called whenever a service is simply freed, for example during a reload + * cycle, where the dynamic users should not be destroyed, but our datastructures should. */ + + dynamic_user_unref(d); + + if (d->n_ref > 0) + return NULL; + + (void) dynamic_user_close(d); + return dynamic_user_free(d); +} + +int dynamic_user_serialize(Manager *m, FILE *f, FDSet *fds) { + DynamicUser *d; + Iterator i; + + assert(m); + assert(f); + assert(fds); + + /* Dump the dynamic user database into the manager serialization, to deal with daemon reloads. */ + + HASHMAP_FOREACH(d, m->dynamic_users, i) { + int copy0, copy1; + + copy0 = fdset_put_dup(fds, d->storage_socket[0]); + if (copy0 < 0) + return copy0; + + copy1 = fdset_put_dup(fds, d->storage_socket[1]); + if (copy1 < 0) + return copy1; + + fprintf(f, "dynamic-user=%s %i %i\n", d->name, copy0, copy1); + } + + return 0; +} + +void dynamic_user_deserialize_one(Manager *m, const char *value, FDSet *fds) { + _cleanup_free_ char *name = NULL, *s0 = NULL, *s1 = NULL; + int r, fd0, fd1; + + assert(m); + assert(value); + assert(fds); + + /* Parse the serialization again, after a daemon reload */ + + r = extract_many_words(&value, NULL, 0, &name, &s0, &s1, NULL); + if (r != 3 || !isempty(value)) { + log_debug("Unable to parse dynamic user line."); + return; + } + + if (safe_atoi(s0, &fd0) < 0 || !fdset_contains(fds, fd0)) { + log_debug("Unable to process dynamic user fd specification."); + return; + } + + if (safe_atoi(s1, &fd1) < 0 || !fdset_contains(fds, fd1)) { + log_debug("Unable to process dynamic user fd specification."); + return; + } + + r = dynamic_user_add(m, name, (int[]) { fd0, fd1 }, NULL); + if (r < 0) { + log_debug_errno(r, "Failed to add dynamic user: %m"); + return; + } + + (void) fdset_remove(fds, fd0); + (void) fdset_remove(fds, fd1); +} + +void dynamic_user_vacuum(Manager *m, bool close_user) { + DynamicUser *d; + Iterator i; + + assert(m); + + /* Empty the dynamic user database, optionally cleaning up orphaned dynamic users, i.e. destroy and free users + * to which no reference exist. This is called after a daemon reload finished, in order to destroy users which + * might not be referenced anymore. */ + + HASHMAP_FOREACH(d, m->dynamic_users, i) { + if (d->n_ref > 0) + continue; + + if (close_user) { + log_debug("Removing orphaned dynamic user %s", d->name); + (void) dynamic_user_close(d); + } + + dynamic_user_free(d); + } +} + +int dynamic_user_lookup_uid(Manager *m, uid_t uid, char **ret) { + char lock_path[strlen("/run/systemd/dynamic-uid/") + DECIMAL_STR_MAX(uid_t) + 1]; + _cleanup_free_ char *user = NULL; + uid_t check_uid; + int r; + + assert(m); + assert(ret); + + /* A friendly way to translate a dynamic user's UID into a his name. */ + + if (uid < UID_PICK_MIN) + return -ESRCH; + if (uid > UID_PICK_MAX) + return -ESRCH; + + xsprintf(lock_path, "/run/systemd/dynamic-uid/" UID_FMT, uid); + r = read_one_line_file(lock_path, &user); + if (r == -ENOENT) + return -ESRCH; + if (r < 0) + return r; + + /* The lock file might be stale, hence let's verify the data before we return it */ + r = dynamic_user_lookup_name(m, user, &check_uid); + if (r < 0) + return r; + if (check_uid != uid) /* lock file doesn't match our own idea */ + return -ESRCH; + + *ret = user; + user = NULL; + + return 0; +} + +int dynamic_user_lookup_name(Manager *m, const char *name, uid_t *ret) { + DynamicUser *d; + int r; + + assert(m); + assert(name); + assert(ret); + + /* A friendly call for translating a dynamic user's name into its UID */ + + d = hashmap_get(m->dynamic_users, name); + if (!d) + return -ESRCH; + + r = dynamic_user_current(d, ret); + if (r == -EAGAIN) /* not realized yet? */ + return -ESRCH; + + return r; +} + +int dynamic_creds_acquire(DynamicCreds *creds, Manager *m, const char *user, const char *group) { + bool acquired = false; + int r; + + assert(creds); + assert(m); + + /* A DynamicUser object encapsulates an allocation of both a UID and a GID for a specific name. However, some + * services use different user and groups. For cases like that there's DynamicCreds containing a pair of user + * and group. This call allocates a pair. */ + + if (!creds->user && user) { + r = dynamic_user_acquire(m, user, &creds->user); + if (r < 0) + return r; + + acquired = true; + } + + if (!creds->group) { + + if (creds->user && (!group || streq_ptr(user, group))) + creds->group = dynamic_user_ref(creds->user); + else { + r = dynamic_user_acquire(m, group, &creds->group); + if (r < 0) { + if (acquired) + creds->user = dynamic_user_unref(creds->user); + return r; + } + } + } + + return 0; +} + +int dynamic_creds_realize(DynamicCreds *creds, uid_t *uid, gid_t *gid) { + uid_t u = UID_INVALID; + gid_t g = GID_INVALID; + int r; + + assert(creds); + assert(uid); + assert(gid); + + /* Realize both the referenced user and group */ + + if (creds->user) { + r = dynamic_user_realize(creds->user, &u); + if (r < 0) + return r; + } + + if (creds->group && creds->group != creds->user) { + r = dynamic_user_realize(creds->group, &g); + if (r < 0) + return r; + } else + g = u; + + *uid = u; + *gid = g; + + return 0; +} + +void dynamic_creds_unref(DynamicCreds *creds) { + assert(creds); + + creds->user = dynamic_user_unref(creds->user); + creds->group = dynamic_user_unref(creds->group); +} + +void dynamic_creds_destroy(DynamicCreds *creds) { + assert(creds); + + creds->user = dynamic_user_destroy(creds->user); + creds->group = dynamic_user_destroy(creds->group); +} diff --git a/src/core/dynamic-user.h b/src/core/dynamic-user.h new file mode 100644 index 0000000000..0b8bce1a72 --- /dev/null +++ b/src/core/dynamic-user.h @@ -0,0 +1,66 @@ +#pragma once + +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +typedef struct DynamicUser DynamicUser; + +typedef struct DynamicCreds { + /* A combination of a dynamic user and group */ + DynamicUser *user; + DynamicUser *group; +} DynamicCreds; + +#include "manager.h" + +/* Note that this object always allocates a pair of user and group under the same name, even if one of them isn't + * used. This means, if you want to allocate a group and user pair, and they might have two different names, then you + * need to allocated two of these objects. DynamicCreds below makes that easy. */ +struct DynamicUser { + int n_ref; + Manager *manager; + + /* An AF_UNIX socket pair that contains a datagram containing both the numeric ID assigned, as well as a lock + * file fd locking the user ID we picked. */ + int storage_socket[2]; + + char name[]; +}; + +int dynamic_user_acquire(Manager *m, const char *name, DynamicUser **ret); + +int dynamic_user_realize(DynamicUser *d, uid_t *ret); +int dynamic_user_current(DynamicUser *d, uid_t *ret); + +DynamicUser* dynamic_user_ref(DynamicUser *d); +DynamicUser* dynamic_user_unref(DynamicUser *d); +DynamicUser* dynamic_user_destroy(DynamicUser *d); + +int dynamic_user_serialize(Manager *m, FILE *f, FDSet *fds); +void dynamic_user_deserialize_one(Manager *m, const char *value, FDSet *fds); +void dynamic_user_vacuum(Manager *m, bool close_user); + +int dynamic_user_lookup_uid(Manager *m, uid_t uid, char **ret); +int dynamic_user_lookup_name(Manager *m, const char *name, uid_t *ret); + +int dynamic_creds_acquire(DynamicCreds *creds, Manager *m, const char *user, const char *group); +int dynamic_creds_realize(DynamicCreds *creds, uid_t *uid, gid_t *gid); + +void dynamic_creds_unref(DynamicCreds *creds); +void dynamic_creds_destroy(DynamicCreds *creds); diff --git a/src/core/execute.c b/src/core/execute.c index 05dc1aaec1..26e9cd5339 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -290,10 +290,10 @@ static int connect_journal_socket(int fd, uid_t uid, gid_t gid) { } static int connect_logger_as( + Unit *unit, const ExecContext *context, ExecOutput output, const char *ident, - const char *unit_id, int nfd, uid_t uid, gid_t gid) { @@ -329,7 +329,7 @@ static int connect_logger_as( "%i\n" "%i\n", context->syslog_identifier ? context->syslog_identifier : ident, - unit_id, + unit->id, context->syslog_priority, !!context->syslog_level_prefix, output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE, @@ -544,7 +544,7 @@ static int setup_output( case EXEC_OUTPUT_KMSG_AND_CONSOLE: case EXEC_OUTPUT_JOURNAL: case EXEC_OUTPUT_JOURNAL_AND_CONSOLE: - r = connect_logger_as(context, o, ident, unit->id, fileno, uid, gid); + r = connect_logger_as(unit, context, o, ident, fileno, uid, gid); if (r < 0) { log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr"); r = open_null_as(O_WRONLY, fileno); @@ -1526,14 +1526,28 @@ static bool exec_needs_mount_namespace( return false; } +static void append_socket_pair(int *array, unsigned *n, int pair[2]) { + assert(array); + assert(n); + + if (!pair) + return; + + if (pair[0] >= 0) + array[(*n)++] = pair[0]; + if (pair[1] >= 0) + array[(*n)++] = pair[1]; +} + static int close_remaining_fds( const ExecParameters *params, ExecRuntime *runtime, + DynamicCreds *dcreds, int socket_fd, int *fds, unsigned n_fds) { unsigned n_dont_close = 0; - int dont_close[n_fds + 7]; + int dont_close[n_fds + 11]; assert(params); @@ -1551,11 +1565,14 @@ static int close_remaining_fds( n_dont_close += n_fds; } - if (runtime) { - if (runtime->netns_storage_socket[0] >= 0) - dont_close[n_dont_close++] = runtime->netns_storage_socket[0]; - if (runtime->netns_storage_socket[1] >= 0) - dont_close[n_dont_close++] = runtime->netns_storage_socket[1]; + if (runtime) + append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket); + + if (dcreds) { + if (dcreds->user) + append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket); + if (dcreds->group) + append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket); } return close_all_fds(dont_close, n_dont_close); @@ -1567,6 +1584,7 @@ static int exec_child( const ExecContext *context, const ExecParameters *params, ExecRuntime *runtime, + DynamicCreds *dcreds, char **argv, int socket_fd, int *fds, unsigned n_fds, @@ -1617,7 +1635,7 @@ static int exec_child( log_forget_fds(); - r = close_remaining_fds(params, runtime, socket_fd, fds, n_fds); + r = close_remaining_fds(params, runtime, dcreds, socket_fd, fds, n_fds); if (r < 0) { *exit_status = EXIT_FDS; return r; @@ -1650,25 +1668,48 @@ static int exec_child( } } - if (context->user) { - username = context->user; - r = get_user_creds(&username, &uid, &gid, &home, &shell); + if (context->dynamic_user && dcreds) { + + /* Make sure we bypass our own NSS module for any NSS checks */ + if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) { + *exit_status = EXIT_USER; + return -errno; + } + + r = dynamic_creds_realize(dcreds, &uid, &gid); if (r < 0) { *exit_status = EXIT_USER; return r; } - } - if (context->group) { - const char *g = context->group; + if (uid == UID_INVALID || gid == GID_INVALID) { + *exit_status = EXIT_USER; + return -ESRCH; + } - r = get_group_creds(&g, &gid); - if (r < 0) { - *exit_status = EXIT_GROUP; - return r; + if (dcreds->user) + username = dcreds->user->name; + + } else { + if (context->user) { + username = context->user; + r = get_user_creds(&username, &uid, &gid, &home, &shell); + if (r < 0) { + *exit_status = EXIT_USER; + return r; + } } - } + if (context->group) { + const char *g = context->group; + + r = get_group_creds(&g, &gid); + if (r < 0) { + *exit_status = EXIT_GROUP; + return r; + } + } + } /* If a socket is connected to STDIN/STDOUT/STDERR, we * must sure to drop O_NONBLOCK */ @@ -2192,6 +2233,7 @@ int exec_spawn(Unit *unit, const ExecContext *context, const ExecParameters *params, ExecRuntime *runtime, + DynamicCreds *dcreds, pid_t *ret) { _cleanup_strv_free_ char **files_env = NULL; @@ -2250,6 +2292,7 @@ int exec_spawn(Unit *unit, context, params, runtime, + dcreds, argv, socket_fd, fds, n_fds, @@ -2723,6 +2766,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { if (c->group) fprintf(f, "%sGroup: %s\n", prefix, c->group); + fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user)); + if (strv_length(c->supplementary_groups) > 0) { fprintf(f, "%sSupplementaryGroups:", prefix); strv_fprintf(f, c->supplementary_groups); @@ -3062,7 +3107,7 @@ int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) { return r; if (c->private_network && (*rt)->netns_storage_socket[0] < 0) { - if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0) + if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0) return -errno; } diff --git a/src/core/execute.h b/src/core/execute.h index 73b8a119b0..48cc18fbb3 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -30,6 +30,7 @@ typedef struct ExecParameters ExecParameters; #include <stdio.h> #include <sys/capability.h> +#include "cgroup-util.h" #include "fdset.h" #include "list.h" #include "missing.h" @@ -91,6 +92,8 @@ struct ExecRuntime { char *tmp_dir; char *var_tmp_dir; + /* An AF_UNIX socket pair, that contains a datagram containing a file descriptor referring to the network + * namespace. */ int netns_storage_socket[2]; }; @@ -173,6 +176,8 @@ struct ExecContext { bool no_new_privileges; + bool dynamic_user; + /* This is not exposed to the user but available * internally. We need it to make sure that whenever we spawn * /usr/bin/mount it is run in the same process group as us so @@ -203,9 +208,6 @@ struct ExecContext { bool no_new_privileges_set:1; }; -#include "cgroup-util.h" -#include "cgroup.h" - struct ExecParameters { char **argv; char **environment; @@ -236,11 +238,15 @@ struct ExecParameters { int stderr_fd; }; +#include "unit.h" +#include "dynamic-user.h" + int exec_spawn(Unit *unit, ExecCommand *command, const ExecContext *context, const ExecParameters *exec_params, ExecRuntime *runtime, + DynamicCreds *dynamic_creds, pid_t *ret); void exec_command_done(ExecCommand *c); diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 6a5c16a000..c9cdbe8ba7 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -19,9 +19,9 @@ m4_dnl Define the context options only once m4_define(`EXEC_CONTEXT_CONFIG_ITEMS', `$1.WorkingDirectory, config_parse_working_directory, 0, offsetof($1, exec_context) $1.RootDirectory, config_parse_unit_path_printf, 0, offsetof($1, exec_context.root_directory) -$1.User, config_parse_unit_string_printf, 0, offsetof($1, exec_context.user) -$1.Group, config_parse_unit_string_printf, 0, offsetof($1, exec_context.group) -$1.SupplementaryGroups, config_parse_strv, 0, offsetof($1, exec_context.supplementary_groups) +$1.User, config_parse_user_group, 0, offsetof($1, exec_context.user) +$1.Group, config_parse_user_group, 0, offsetof($1, exec_context.group) +$1.SupplementaryGroups, config_parse_user_group_strv, 0, offsetof($1, exec_context.supplementary_groups) $1.Nice, config_parse_exec_nice, 0, offsetof($1, exec_context) $1.OOMScoreAdjust, config_parse_exec_oom_score_adjust, 0, offsetof($1, exec_context) $1.IOSchedulingClass, config_parse_exec_io_class, 0, offsetof($1, exec_context) @@ -34,6 +34,7 @@ $1.UMask, config_parse_mode, 0, $1.Environment, config_parse_environ, 0, offsetof($1, exec_context.environment) $1.EnvironmentFile, config_parse_unit_env_file, 0, offsetof($1, exec_context.environment_files) $1.PassEnvironment, config_parse_pass_environ, 0, offsetof($1, exec_context.pass_environment) +$1.DynamicUser, config_parse_bool, 0, offsetof($1, exec_context.dynamic_user) $1.StandardInput, config_parse_input, 0, offsetof($1, exec_context.std_input) $1.StandardOutput, config_parse_output, 0, offsetof($1, exec_context.std_output) $1.StandardError, config_parse_output, 0, offsetof($1, exec_context.std_error) @@ -285,8 +286,8 @@ Socket.ExecStartPost, config_parse_exec, SOCKET_EXEC Socket.ExecStopPre, config_parse_exec, SOCKET_EXEC_STOP_PRE, offsetof(Socket, exec_command) Socket.ExecStopPost, config_parse_exec, SOCKET_EXEC_STOP_POST, offsetof(Socket, exec_command) Socket.TimeoutSec, config_parse_sec, 0, offsetof(Socket, timeout_usec) -Socket.SocketUser, config_parse_unit_string_printf, 0, offsetof(Socket, user) -Socket.SocketGroup, config_parse_unit_string_printf, 0, offsetof(Socket, group) +Socket.SocketUser, config_parse_user_group, 0, offsetof(Socket, user) +Socket.SocketGroup, config_parse_user_group, 0, offsetof(Socket, group) Socket.SocketMode, config_parse_mode, 0, offsetof(Socket, socket_mode) Socket.DirectoryMode, config_parse_mode, 0, offsetof(Socket, directory_mode) Socket.Accept, config_parse_bool, 0, offsetof(Socket, accept) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 782e420e4c..e8cb3a4249 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -64,6 +64,7 @@ #include "unit-name.h" #include "unit-printf.h" #include "unit.h" +#include "user-util.h" #include "utf8.h" #include "web-util.h" @@ -620,7 +621,7 @@ int config_parse_exec( ignore = true; else if (*f == '@' && !separate_argv0) separate_argv0 = true; - else if (*f == '!' && !privileged) + else if (*f == '+' && !privileged) privileged = true; else break; @@ -1763,6 +1764,123 @@ int config_parse_sec_fix_0( return 0; } +int config_parse_user_group( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + char **user = data, *n; + Unit *u = userdata; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(u); + + if (isempty(rvalue)) + n = NULL; + else { + _cleanup_free_ char *k = NULL; + + r = unit_full_printf(u, rvalue, &k); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", rvalue); + return 0; + } + + if (!valid_user_group_name_or_id(k)) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid user/group name or numeric ID, ignoring: %s", k); + return 0; + } + + n = k; + k = NULL; + } + + free(*user); + *user = n; + + return 0; +} + +int config_parse_user_group_strv( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + char ***users = data; + Unit *u = userdata; + const char *p; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(u); + + if (isempty(rvalue)) { + char **empty; + + empty = new0(char*, 1); + if (!empty) + return log_oom(); + + strv_free(*users); + *users = empty; + + return 0; + } + + p = rvalue; + for (;;) { + _cleanup_free_ char *word = NULL, *k = NULL; + + r = extract_first_word(&p, &word, WHITESPACE, 0); + if (r == 0) + break; + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Invalid syntax, ignoring: %s", rvalue); + break; + } + + r = unit_full_printf(u, word, &k); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", word); + continue; + } + + if (!valid_user_group_name_or_id(k)) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid user/group name or numeric ID, ignoring: %s", k); + continue; + } + + r = strv_push(users, k); + if (r < 0) + return log_oom(); + + k = NULL; + } + + return 0; +} + int config_parse_busname_service( const char *unit, const char *filename, @@ -2823,8 +2941,8 @@ int config_parse_memory_limit( } else bytes = physical_memory_scale(r, 100U); - if (bytes < 1) { - log_syntax(unit, LOG_ERR, filename, line, 0, "Memory limit '%s' too small. Ignoring.", rvalue); + if (bytes <= 0 || bytes >= UINT64_MAX) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Memory limit '%s' out of range. Ignoring.", rvalue); return 0; } } @@ -2861,9 +2979,18 @@ int config_parse_tasks_max( return 0; } - r = safe_atou64(rvalue, &u); - if (r < 0 || u < 1) { - log_syntax(unit, LOG_ERR, filename, line, r, "Maximum tasks value '%s' invalid. Ignoring.", rvalue); + r = parse_percent(rvalue); + if (r < 0) { + r = safe_atou64(rvalue, &u); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Maximum tasks value '%s' invalid. Ignoring.", rvalue); + return 0; + } + } else + u = system_tasks_max_scale(r, 100U); + + if (u <= 0 || u >= UINT64_MAX) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range. Ignoring.", rvalue); return 0; } diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index b36a2e3a02..213bce55a7 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -111,6 +111,8 @@ int config_parse_exec_utmp_mode(const char *unit, const char *filename, unsigned int config_parse_working_directory(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_fdname(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_sec_fix_0(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_user_group(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_user_group_strv(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); /* gperf prototypes */ const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length); diff --git a/src/core/machine-id-setup.c b/src/core/machine-id-setup.c index ea6b085e4f..76dfcfa6d7 100644 --- a/src/core/machine-id-setup.c +++ b/src/core/machine-id-setup.c @@ -17,11 +17,8 @@ along with systemd; If not, see <http://www.gnu.org/licenses/>. ***/ -#include <errno.h> #include <fcntl.h> #include <sched.h> -#include <stdio.h> -#include <string.h> #include <sys/mount.h> #include <unistd.h> @@ -29,10 +26,8 @@ #include "alloc-util.h" #include "fd-util.h" -#include "fileio.h" #include "fs-util.h" -#include "hexdecoct.h" -#include "io-util.h" +#include "id128-util.h" #include "log.h" #include "machine-id-setup.h" #include "macro.h" @@ -46,101 +41,23 @@ #include "util.h" #include "virt.h" -static int shorten_uuid(char destination[34], const char source[36]) { - unsigned i, j; - - assert(destination); - assert(source); - - /* Converts a UUID into a machine ID, by lowercasing it and - * removing dashes. Validates everything. */ - - for (i = 0, j = 0; i < 36 && j < 32; i++) { - int t; - - t = unhexchar(source[i]); - if (t < 0) - continue; - - destination[j++] = hexchar(t); - } - - if (i != 36 || j != 32) - return -EINVAL; - - destination[32] = '\n'; - destination[33] = 0; - return 0; -} - -static int read_machine_id(int fd, char id[34]) { - char id_to_validate[34]; - int r; - - assert(fd >= 0); - assert(id); - - /* Reads a machine ID from a file, validates it, and returns - * it. The returned ID ends in a newline. */ - - r = loop_read_exact(fd, id_to_validate, 33, false); - if (r < 0) - return r; - - if (id_to_validate[32] != '\n') - return -EINVAL; - - id_to_validate[32] = 0; - - if (!id128_is_valid(id_to_validate)) - return -EINVAL; - - memcpy(id, id_to_validate, 32); - id[32] = '\n'; - id[33] = 0; - return 0; -} - -static int write_machine_id(int fd, const char id[34]) { - int r; - - assert(fd >= 0); - assert(id); - - if (lseek(fd, 0, SEEK_SET) < 0) - return -errno; - - r = loop_write(fd, id, 33, false); - if (r < 0) - return r; - - if (fsync(fd) < 0) - return -errno; - - return 0; -} - -static int generate_machine_id(char id[34], const char *root) { - int fd, r; - unsigned char *p; - sd_id128_t buf; - char *q; +static int generate_machine_id(const char *root, sd_id128_t *ret) { const char *dbus_machine_id; + _cleanup_close_ int fd = -1; + int r; - assert(id); - - dbus_machine_id = prefix_roota(root, "/var/lib/dbus/machine-id"); + assert(ret); /* First, try reading the D-Bus machine id, unless it is a symlink */ + dbus_machine_id = prefix_roota(root, "/var/lib/dbus/machine-id"); fd = open(dbus_machine_id, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); if (fd >= 0) { - r = read_machine_id(fd, id); - safe_close(fd); - - if (r >= 0) { + if (id128_read_fd(fd, ID128_PLAIN, ret) >= 0) { log_info("Initializing machine ID from D-Bus machine ID."); return 0; } + + fd = safe_close(fd); } if (isempty(root)) { @@ -151,13 +68,10 @@ static int generate_machine_id(char id[34], const char *root) { if (detect_container() > 0) { _cleanup_free_ char *e = NULL; - r = getenv_for_pid(1, "container_uuid", &e); - if (r > 0) { - r = shorten_uuid(id, e); - if (r >= 0) { - log_info("Initializing machine ID from container UUID."); - return 0; - } + if (getenv_for_pid(1, "container_uuid", &e) > 0 && + sd_id128_from_string(e, ret) >= 0) { + log_info("Initializing machine ID from container UUID."); + return 0; } } else if (detect_vm() == VIRTUALIZATION_KVM) { @@ -166,51 +80,29 @@ static int generate_machine_id(char id[34], const char *root) { * running in qemu/kvm and a machine ID was passed in * via -uuid on the qemu/kvm command line */ - char uuid[36]; - - fd = open("/sys/class/dmi/id/product_uuid", O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); - if (fd >= 0) { - r = loop_read_exact(fd, uuid, 36, false); - safe_close(fd); - - if (r >= 0) { - r = shorten_uuid(id, uuid); - if (r >= 0) { - log_info("Initializing machine ID from KVM UUID."); - return 0; - } - } + if (id128_read("/sys/class/dmi/id/product_uuid", ID128_UUID, ret) >= 0) { + log_info("Initializing machine ID from KVM UUID."); + return 0; } } } /* If that didn't work, generate a random machine id */ - r = sd_id128_randomize(&buf); + r = sd_id128_randomize(ret); if (r < 0) - return log_error_errno(r, "Failed to open /dev/urandom: %m"); - - for (p = buf.bytes, q = id; p < buf.bytes + sizeof(buf); p++, q += 2) { - q[0] = hexchar(*p >> 4); - q[1] = hexchar(*p & 15); - } - - id[32] = '\n'; - id[33] = 0; + return log_error_errno(r, "Failed to generate randomized : %m"); log_info("Initializing machine ID from random generator."); - return 0; } -int machine_id_setup(const char *root, sd_id128_t machine_id) { +int machine_id_setup(const char *root, sd_id128_t machine_id, sd_id128_t *ret) { const char *etc_machine_id, *run_machine_id; _cleanup_close_ int fd = -1; - bool writable = true; - char id[34]; /* 32 + \n + \0 */ + bool writable; int r; etc_machine_id = prefix_roota(root, "/etc/machine-id"); - run_machine_id = prefix_roota(root, "/run/machine-id"); RUN_WITH_UMASK(0000) { /* We create this 0444, to indicate that this isn't really @@ -218,7 +110,7 @@ int machine_id_setup(const char *root, sd_id128_t machine_id) { * will be owned by root it doesn't matter much, but maybe * people look. */ - mkdir_parents(etc_machine_id, 0755); + (void) mkdir_parents(etc_machine_id, 0755); fd = open(etc_machine_id, O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY, 0444); if (fd < 0) { int old_errno = errno; @@ -239,41 +131,41 @@ int machine_id_setup(const char *root, sd_id128_t machine_id) { } writable = false; - } + } else + writable = true; } - /* A machine id argument overrides all other machined-ids */ - if (!sd_id128_is_null(machine_id)) { - sd_id128_to_string(machine_id, id); - id[32] = '\n'; - id[33] = 0; - } else { - if (read_machine_id(fd, id) >= 0) - return 0; + /* A we got a valid machine ID argument, that's what counts */ + if (sd_id128_is_null(machine_id)) { - /* Hmm, so, the id currently stored is not useful, then let's - * generate one */ + /* Try to read any existing machine ID */ + if (id128_read_fd(fd, ID128_PLAIN, ret) >= 0) + return 0; - r = generate_machine_id(id, root); + /* Hmm, so, the id currently stored is not useful, then let's generate one */ + r = generate_machine_id(root, &machine_id); if (r < 0) return r; + + if (lseek(fd, 0, SEEK_SET) == (off_t) -1) + return log_error_errno(errno, "Failed to seek: %m"); } if (writable) - if (write_machine_id(fd, id) >= 0) - return 0; + if (id128_write_fd(fd, ID128_PLAIN, machine_id, true) >= 0) + goto finish; fd = safe_close(fd); - /* Hmm, we couldn't write it? So let's write it to - * /run/machine-id as a replacement */ + /* Hmm, we couldn't write it? So let's write it to /run/machine-id as a replacement */ - RUN_WITH_UMASK(0022) { - r = write_string_file(run_machine_id, id, WRITE_STRING_FILE_CREATE); - if (r < 0) { - (void) unlink(run_machine_id); - return log_error_errno(r, "Cannot write %s: %m", run_machine_id); - } + run_machine_id = prefix_roota(root, "/run/machine-id"); + + RUN_WITH_UMASK(0022) + r = id128_write(run_machine_id, ID128_PLAIN, machine_id, false); + if (r < 0) { + (void) unlink(run_machine_id); + return log_error_errno(r, "Cannot write %s: %m", run_machine_id); } /* And now, let's mount it over */ @@ -286,7 +178,11 @@ int machine_id_setup(const char *root, sd_id128_t machine_id) { /* Mark the mount read-only */ if (mount(NULL, etc_machine_id, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, NULL) < 0) - log_warning_errno(errno, "Failed to make transient %s read-only: %m", etc_machine_id); + log_warning_errno(errno, "Failed to make transient %s read-only, ignoring: %m", etc_machine_id); + +finish: + if (ret) + *ret = machine_id; return 0; } @@ -294,9 +190,13 @@ int machine_id_setup(const char *root, sd_id128_t machine_id) { int machine_id_commit(const char *root) { _cleanup_close_ int fd = -1, initial_mntns_fd = -1; const char *etc_machine_id; - char id[34]; /* 32 + \n + \0 */ + sd_id128_t id; int r; + /* Replaces a tmpfs bind mount of /etc/machine-id by a proper file, atomically. For this, the umount is removed + * in a mount namespace, a new file is created at the right place. Afterwards the mount is also removed in the + * original mount namespace, thus revealing the file that was just created. */ + etc_machine_id = prefix_roota(root, "/etc/machine-id"); r = path_is_mount_point(etc_machine_id, 0); @@ -312,10 +212,6 @@ int machine_id_commit(const char *root) { if (fd < 0) return log_error_errno(errno, "Cannot open %s: %m", etc_machine_id); - r = read_machine_id(fd, id); - if (r < 0) - return log_error_errno(r, "We didn't find a valid machine ID in %s.", etc_machine_id); - r = fd_is_temporary_fs(fd); if (r < 0) return log_error_errno(r, "Failed to determine whether %s is on a temporary file system: %m", etc_machine_id); @@ -324,6 +220,10 @@ int machine_id_commit(const char *root) { return -EROFS; } + r = id128_read_fd(fd, ID128_PLAIN, &id); + if (r < 0) + return log_error_errno(r, "We didn't find a valid machine ID in %s.", etc_machine_id); + fd = safe_close(fd); /* Store current mount namespace */ @@ -342,15 +242,9 @@ int machine_id_commit(const char *root) { return log_error_errno(errno, "Failed to unmount transient %s file in our private namespace: %m", etc_machine_id); /* Update a persistent version of etc_machine_id */ - fd = open(etc_machine_id, O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY, 0444); - if (fd < 0) - return log_error_errno(errno, "Cannot open for writing %s. This is mandatory to get a persistent machine-id: %m", etc_machine_id); - - r = write_machine_id(fd, id); + r = id128_write(etc_machine_id, ID128_PLAIN, id, true); if (r < 0) - return log_error_errno(r, "Cannot write %s: %m", etc_machine_id); - - fd = safe_close(fd); + return log_error_errno(r, "Cannot write %s. This is mandatory to get a persistent machine ID: %m", etc_machine_id); /* Return to initial namespace and proceed a lazy tmpfs unmount */ r = namespace_enter(-1, initial_mntns_fd, -1, -1, -1); diff --git a/src/core/machine-id-setup.h b/src/core/machine-id-setup.h index a7e7678ed9..29f4620646 100644 --- a/src/core/machine-id-setup.h +++ b/src/core/machine-id-setup.h @@ -20,4 +20,4 @@ ***/ int machine_id_commit(const char *root); -int machine_id_setup(const char *root, sd_id128_t machine_id); +int machine_id_setup(const char *root, sd_id128_t requested, sd_id128_t *ret); diff --git a/src/core/macros.systemd.in b/src/core/macros.systemd.in index 028db1cc4a..6e8a3b3e3d 100644 --- a/src/core/macros.systemd.in +++ b/src/core/macros.systemd.in @@ -38,6 +38,12 @@ Requires(preun): systemd \ Requires(postun): systemd \ %{nil} +%systemd_ordering \ +OrderWithRequires(post): systemd \ +OrderWithRequires(preun): systemd \ +OrderWithRequires(postun): systemd \ +%{nil} + %systemd_post() \ if [ $1 -eq 1 ] ; then \ # Initial installation \ diff --git a/src/core/main.c b/src/core/main.c index fc04fb8051..f2adca7d2b 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -127,7 +127,7 @@ static bool arg_default_io_accounting = false; static bool arg_default_blockio_accounting = false; static bool arg_default_memory_accounting = false; static bool arg_default_tasks_accounting = true; -static uint64_t arg_default_tasks_max = UINT64_C(512); +static uint64_t arg_default_tasks_max = UINT64_MAX; static sd_id128_t arg_machine_id = {}; noreturn static void freeze_or_reboot(void) { @@ -291,14 +291,16 @@ static int parse_crash_chvt(const char *value) { } static int set_machine_id(const char *m) { + sd_id128_t t; assert(m); - if (sd_id128_from_string(m, &arg_machine_id) < 0) + if (sd_id128_from_string(m, &t) < 0) return -EINVAL; - if (sd_id128_is_null(arg_machine_id)) + if (sd_id128_is_null(t)) return -EINVAL; + arg_machine_id = t; return 0; } @@ -1298,6 +1300,11 @@ static int fixup_environment(void) { _cleanup_free_ char *term = NULL; int r; + /* We expect the environment to be set correctly + * if run inside a container. */ + if (detect_container() > 0) + return 0; + /* When started as PID1, the kernel uses /dev/console * for our stdios and uses TERM=linux whatever the * backend device used by the console. We try to make @@ -1314,7 +1321,7 @@ static int fixup_environment(void) { if (r == 0) { term = strdup(default_term_for_tty("/dev/console") + 5); if (!term) - return -errno; + return -ENOMEM; } if (setenv("TERM", term, 1) < 0) @@ -1508,13 +1515,10 @@ int main(int argc, char *argv[]) { } if (arg_system) { - /* We expect the environment to be set correctly - * if run inside a container. */ - if (detect_container() <= 0) - if (fixup_environment() < 0) { - error_message = "Failed to fix up PID1 environment"; - goto finish; - } + if (fixup_environment() < 0) { + error_message = "Failed to fix up PID1 environment"; + goto finish; + } /* Try to figure out if we can use colors with the console. No * need to do that for user instances since they never log @@ -1556,6 +1560,8 @@ int main(int argc, char *argv[]) { (void) reset_all_signal_handlers(); (void) ignore_signals(SIGNALS_IGNORE, -1); + arg_default_tasks_max = system_tasks_max_scale(15U, 100U); /* 15% the system PIDs equals 4915 by default. */ + if (parse_config_file() < 0) { error_message = "Failed to parse config file"; goto finish; @@ -1717,7 +1723,7 @@ int main(int argc, char *argv[]) { status_welcome(); hostname_setup(); - machine_id_setup(NULL, arg_machine_id); + machine_id_setup(NULL, arg_machine_id, NULL); loopback_setup(); bump_unix_max_dgram_qlen(); diff --git a/src/core/manager.c b/src/core/manager.c index a0181e2138..e41b65da50 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -569,7 +569,7 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) { m->exit_code = _MANAGER_EXIT_CODE_INVALID; m->default_timer_accuracy_usec = USEC_PER_MINUTE; m->default_tasks_accounting = true; - m->default_tasks_max = UINT64_C(512); + m->default_tasks_max = UINT64_MAX; #ifdef ENABLE_EFI if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0) @@ -1004,6 +1004,9 @@ Manager* manager_free(Manager *m) { bus_done(m); + dynamic_user_vacuum(m, false); + hashmap_free(m->dynamic_users); + hashmap_free(m->units); hashmap_free(m->jobs); hashmap_free(m->watch_pids1); @@ -1227,6 +1230,9 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) { /* Third, fire things up! */ manager_coldplug(m); + /* Release any dynamic users no longer referenced */ + dynamic_user_vacuum(m, true); + if (serialization) { assert(m->n_reloading > 0); m->n_reloading--; @@ -2403,6 +2409,10 @@ int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) { bus_track_serialize(m->subscribed, f); + r = dynamic_user_serialize(m, f, fds); + if (r < 0) + return r; + fputc('\n', f); HASHMAP_FOREACH_KEY(u, t, m->units, i) { @@ -2579,7 +2589,9 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { m->kdbus_fd = fdset_remove(fds, fd); } - } else { + } else if (startswith(l, "dynamic-user=")) + dynamic_user_deserialize_one(m, l + 13, fds); + else { int k; k = bus_track_deserialize_item(&m->deserialized_subscribed, l); @@ -2660,6 +2672,7 @@ int manager_reload(Manager *m) { manager_clear_jobs_and_units(m); lookup_paths_flush_generator(&m->lookup_paths); lookup_paths_free(&m->lookup_paths); + dynamic_user_vacuum(m, false); q = lookup_paths_init(&m->lookup_paths, m->unit_file_scope, 0, NULL); if (q < 0 && r >= 0) @@ -2696,6 +2709,9 @@ int manager_reload(Manager *m) { /* Third, fire things up! */ manager_coldplug(m); + /* Release any dynamic users no longer referenced */ + dynamic_user_vacuum(m, true); + /* Sync current state of bus names with our set of listening units */ if (m->api_bus) manager_sync_bus_names(m, m->api_bus); diff --git a/src/core/manager.h b/src/core/manager.h index 6ed15c1a41..c681d5dc46 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -298,6 +298,9 @@ struct Manager { /* Used for processing polkit authorization responses */ Hashmap *polkit_registry; + /* Dynamic users/groups, indexed by their name */ + Hashmap *dynamic_users; + /* When the user hits C-A-D more than 7 times per 2s, reboot immediately... */ RateLimit ctrl_alt_del_ratelimit; diff --git a/src/core/mount.c b/src/core/mount.c index fda4d65d6f..db5cafcb11 100644 --- a/src/core/mount.c +++ b/src/core/mount.c @@ -245,6 +245,8 @@ static void mount_done(Unit *u) { exec_command_done_array(m->exec_command, _MOUNT_EXEC_COMMAND_MAX); m->control_command = NULL; + dynamic_creds_unref(&m->dynamic_creds); + mount_unwatch_control_pid(m); m->timer_event_source = sd_event_source_unref(m->timer_event_source); @@ -648,6 +650,9 @@ static int mount_coldplug(Unit *u) { return r; } + if (!IN_SET(new_state, MOUNT_DEAD, MOUNT_FAILED)) + (void) unit_setup_dynamic_creds(u); + mount_set_state(m, new_state); return 0; } @@ -716,6 +721,10 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) { if (r < 0) return r; + r = unit_setup_dynamic_creds(UNIT(m)); + if (r < 0) + return r; + r = mount_arm_timer(m, usec_add(now(CLOCK_MONOTONIC), m->timeout_usec)); if (r < 0) return r; @@ -732,6 +741,7 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) { &m->exec_context, &exec_params, m->exec_runtime, + &m->dynamic_creds, &pid); if (r < 0) return r; @@ -752,12 +762,14 @@ static void mount_enter_dead(Mount *m, MountResult f) { if (f != MOUNT_SUCCESS) m->result = f; + mount_set_state(m, m->result != MOUNT_SUCCESS ? MOUNT_FAILED : MOUNT_DEAD); + exec_runtime_destroy(m->exec_runtime); m->exec_runtime = exec_runtime_unref(m->exec_runtime); exec_context_destroy_runtime_directory(&m->exec_context, manager_get_runtime_prefix(UNIT(m)->manager)); - mount_set_state(m, m->result != MOUNT_SUCCESS ? MOUNT_FAILED : MOUNT_DEAD); + dynamic_creds_destroy(&m->dynamic_creds); } static void mount_enter_mounted(Mount *m, MountResult f) { @@ -1817,6 +1829,7 @@ const UnitVTable mount_vtable = { .cgroup_context_offset = offsetof(Mount, cgroup_context), .kill_context_offset = offsetof(Mount, kill_context), .exec_runtime_offset = offsetof(Mount, exec_runtime), + .dynamic_creds_offset = offsetof(Mount, dynamic_creds), .sections = "Unit\0" diff --git a/src/core/mount.h b/src/core/mount.h index da529c44f4..ac27b518cc 100644 --- a/src/core/mount.h +++ b/src/core/mount.h @@ -21,8 +21,8 @@ typedef struct Mount Mount; -#include "execute.h" #include "kill.h" +#include "dynamic-user.h" typedef enum MountExecCommand { MOUNT_EXEC_MOUNT, @@ -85,6 +85,7 @@ struct Mount { CGroupContext cgroup_context; ExecRuntime *exec_runtime; + DynamicCreds dynamic_creds; MountState state, deserialized_state; diff --git a/src/core/namespace.c b/src/core/namespace.c index 4baf4750f4..52a2505d94 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -642,7 +642,7 @@ int setup_netns(int netns_storage_socket[2]) { } fail: - lockf(netns_storage_socket[0], F_ULOCK, 0); + (void) lockf(netns_storage_socket[0], F_ULOCK, 0); return r; } diff --git a/src/core/scope.c b/src/core/scope.c index decd1a1f3f..b45e238974 100644 --- a/src/core/scope.c +++ b/src/core/scope.c @@ -240,7 +240,7 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) { /* If we have a controller set let's ask the controller nicely * to terminate the scope, instead of us going directly into - * SIGTERM beserk mode */ + * SIGTERM berserk mode */ if (state == SCOPE_STOP_SIGTERM) skip_signal = bus_scope_send_request_stop(s) > 0; @@ -248,7 +248,9 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) { r = unit_kill_context( UNIT(s), &s->kill_context, - state != SCOPE_STOP_SIGTERM ? KILL_KILL : KILL_TERMINATE, + state != SCOPE_STOP_SIGTERM ? KILL_KILL : + s->was_abandoned ? KILL_TERMINATE_AND_LOG : + KILL_TERMINATE, -1, -1, false); if (r < 0) goto fail; @@ -369,6 +371,7 @@ static int scope_serialize(Unit *u, FILE *f, FDSet *fds) { assert(fds); unit_serialize_item(u, f, "state", scope_state_to_string(s->state)); + unit_serialize_item(u, f, "was-abandoned", yes_no(s->was_abandoned)); return 0; } @@ -389,6 +392,14 @@ static int scope_deserialize_item(Unit *u, const char *key, const char *value, F else s->deserialized_state = state; + } else if (streq(key, "was-abandoned")) { + int k; + + k = parse_boolean(value); + if (k < 0) + log_unit_debug(u, "Failed to parse boolean value: %s", value); + else + s->was_abandoned = k; } else log_unit_debug(u, "Unknown serialization key: %s", key); @@ -474,6 +485,7 @@ int scope_abandon(Scope *s) { if (!IN_SET(s->state, SCOPE_RUNNING, SCOPE_ABANDONED)) return -ESTALE; + s->was_abandoned = true; s->controller = mfree(s->controller); /* The client is no longer watching the remaining processes, diff --git a/src/core/scope.h b/src/core/scope.h index 2dc86325c5..eaf8e8b447 100644 --- a/src/core/scope.h +++ b/src/core/scope.h @@ -21,7 +21,9 @@ typedef struct Scope Scope; +#include "cgroup.h" #include "kill.h" +#include "unit.h" typedef enum ScopeResult { SCOPE_SUCCESS, @@ -43,6 +45,7 @@ struct Scope { usec_t timeout_stop_usec; char *controller; + bool was_abandoned; sd_event_source *timer_event_source; }; diff --git a/src/core/service.c b/src/core/service.c index 13de671700..4d59d78ecb 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -322,6 +322,8 @@ static void service_done(Unit *u) { s->control_command = NULL; s->main_command = NULL; + dynamic_creds_unref(&s->dynamic_creds); + exit_status_set_free(&s->restart_prevent_status); exit_status_set_free(&s->restart_force_status); exit_status_set_free(&s->success_status); @@ -1030,6 +1032,9 @@ static int service_coldplug(Unit *u) { if (IN_SET(s->deserialized_state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD)) service_start_watchdog(s); + if (!IN_SET(s->deserialized_state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART)) + (void) unit_setup_dynamic_creds(u); + service_set_state(s, s->deserialized_state); return 0; } @@ -1184,6 +1189,10 @@ static int service_spawn( if (r < 0) return r; + r = unit_setup_dynamic_creds(UNIT(s)); + if (r < 0) + return r; + if (pass_fds || s->exec_context.std_input == EXEC_INPUT_SOCKET || s->exec_context.std_output == EXEC_OUTPUT_SOCKET || @@ -1285,6 +1294,7 @@ static int service_spawn( &s->exec_context, &exec_params, s->exec_runtime, + &s->dynamic_creds, &pid); if (r < 0) return r; @@ -1418,9 +1428,12 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) exec_runtime_destroy(s->exec_runtime); s->exec_runtime = exec_runtime_unref(s->exec_runtime); - /* Also, remove the runtime directory in */ + /* Also, remove the runtime directory */ exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager)); + /* Release the user, and destroy it if we are the only remaining owner */ + dynamic_creds_destroy(&s->dynamic_creds); + /* Try to delete the pid file. At this point it will be * out-of-date, and some software might be confused by it, so * let's remove it. */ @@ -1674,7 +1687,7 @@ static void service_kill_control_processes(Service *s) { return; p = strjoina(UNIT(s)->cgroup_path, "/control"); - cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, p, SIGKILL, true, true, true, NULL); + cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, p, SIGKILL, CGROUP_SIGCONT|CGROUP_IGNORE_SELF|CGROUP_REMOVE, NULL, NULL, NULL); } static void service_enter_start(Service *s) { @@ -3323,6 +3336,7 @@ const UnitVTable service_vtable = { .cgroup_context_offset = offsetof(Service, cgroup_context), .kill_context_offset = offsetof(Service, kill_context), .exec_runtime_offset = offsetof(Service, exec_runtime), + .dynamic_creds_offset = offsetof(Service, dynamic_creds), .sections = "Unit\0" diff --git a/src/core/service.h b/src/core/service.h index cfef375b03..8e56e1acb9 100644 --- a/src/core/service.h +++ b/src/core/service.h @@ -148,6 +148,7 @@ struct Service { /* Runtime data of the execution context */ ExecRuntime *exec_runtime; + DynamicCreds dynamic_creds; pid_t main_pid, control_pid; int socket_fd; diff --git a/src/core/socket.c b/src/core/socket.c index e098055885..1ce41a1f07 100644 --- a/src/core/socket.c +++ b/src/core/socket.c @@ -150,6 +150,8 @@ static void socket_done(Unit *u) { exec_command_free_array(s->exec_command, _SOCKET_EXEC_COMMAND_MAX); s->control_command = NULL; + dynamic_creds_unref(&s->dynamic_creds); + socket_unwatch_control_pid(s); unit_ref_unset(&s->service); @@ -1602,6 +1604,9 @@ static int socket_coldplug(Unit *u) { return r; } + if (!IN_SET(s->deserialized_state, SOCKET_DEAD, SOCKET_FAILED)) + (void) unit_setup_dynamic_creds(u); + socket_set_state(s, s->deserialized_state); return 0; } @@ -1633,6 +1638,10 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) { if (r < 0) return r; + r = unit_setup_dynamic_creds(UNIT(s)); + if (r < 0) + return r; + r = socket_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_usec)); if (r < 0) return r; @@ -1654,6 +1663,7 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) { &s->exec_context, &exec_params, s->exec_runtime, + &s->dynamic_creds, &pid); if (r < 0) return r; @@ -1757,12 +1767,14 @@ static void socket_enter_dead(Socket *s, SocketResult f) { if (f != SOCKET_SUCCESS) s->result = f; + socket_set_state(s, s->result != SOCKET_SUCCESS ? SOCKET_FAILED : SOCKET_DEAD); + exec_runtime_destroy(s->exec_runtime); s->exec_runtime = exec_runtime_unref(s->exec_runtime); exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager)); - socket_set_state(s, s->result != SOCKET_SUCCESS ? SOCKET_FAILED : SOCKET_DEAD); + dynamic_creds_destroy(&s->dynamic_creds); } static void socket_enter_signal(Socket *s, SocketState state, SocketResult f); @@ -2930,6 +2942,7 @@ const UnitVTable socket_vtable = { .cgroup_context_offset = offsetof(Socket, cgroup_context), .kill_context_offset = offsetof(Socket, kill_context), .exec_runtime_offset = offsetof(Socket, exec_runtime), + .dynamic_creds_offset = offsetof(Socket, dynamic_creds), .sections = "Unit\0" diff --git a/src/core/socket.h b/src/core/socket.h index 0f1ac69c6f..6c32d67bef 100644 --- a/src/core/socket.h +++ b/src/core/socket.h @@ -94,7 +94,9 @@ struct Socket { ExecContext exec_context; KillContext kill_context; CGroupContext cgroup_context; + ExecRuntime *exec_runtime; + DynamicCreds dynamic_creds; /* For Accept=no sockets refers to the one service we'll activate. For Accept=yes sockets is either NULL, or filled diff --git a/src/core/swap.c b/src/core/swap.c index a532b15be8..66a318d01f 100644 --- a/src/core/swap.c +++ b/src/core/swap.c @@ -153,6 +153,8 @@ static void swap_done(Unit *u) { exec_command_done_array(s->exec_command, _SWAP_EXEC_COMMAND_MAX); s->control_command = NULL; + dynamic_creds_unref(&s->dynamic_creds); + swap_unwatch_control_pid(s); s->timer_event_source = sd_event_source_unref(s->timer_event_source); @@ -553,6 +555,9 @@ static int swap_coldplug(Unit *u) { return r; } + if (!IN_SET(new_state, SWAP_DEAD, SWAP_FAILED)) + (void) unit_setup_dynamic_creds(u); + swap_set_state(s, new_state); return 0; } @@ -628,6 +633,10 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) { if (r < 0) goto fail; + r = unit_setup_dynamic_creds(UNIT(s)); + if (r < 0) + return r; + r = swap_arm_timer(s, usec_add(now(CLOCK_MONOTONIC), s->timeout_usec)); if (r < 0) goto fail; @@ -644,6 +653,7 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) { &s->exec_context, &exec_params, s->exec_runtime, + &s->dynamic_creds, &pid); if (r < 0) goto fail; @@ -668,12 +678,14 @@ static void swap_enter_dead(Swap *s, SwapResult f) { if (f != SWAP_SUCCESS) s->result = f; + swap_set_state(s, s->result != SWAP_SUCCESS ? SWAP_FAILED : SWAP_DEAD); + exec_runtime_destroy(s->exec_runtime); s->exec_runtime = exec_runtime_unref(s->exec_runtime); exec_context_destroy_runtime_directory(&s->exec_context, manager_get_runtime_prefix(UNIT(s)->manager)); - swap_set_state(s, s->result != SWAP_SUCCESS ? SWAP_FAILED : SWAP_DEAD); + dynamic_creds_destroy(&s->dynamic_creds); } static void swap_enter_active(Swap *s, SwapResult f) { @@ -1466,6 +1478,7 @@ const UnitVTable swap_vtable = { .cgroup_context_offset = offsetof(Swap, cgroup_context), .kill_context_offset = offsetof(Swap, kill_context), .exec_runtime_offset = offsetof(Swap, exec_runtime), + .dynamic_creds_offset = offsetof(Swap, dynamic_creds), .sections = "Unit\0" diff --git a/src/core/swap.h b/src/core/swap.h index fbf66debdc..b0ef50f1e8 100644 --- a/src/core/swap.h +++ b/src/core/swap.h @@ -82,6 +82,7 @@ struct Swap { CGroupContext cgroup_context; ExecRuntime *exec_runtime; + DynamicCreds dynamic_creds; SwapState state, deserialized_state; diff --git a/src/core/system.conf b/src/core/system.conf index db8b7acd78..c6bb050aac 100644 --- a/src/core/system.conf +++ b/src/core/system.conf @@ -42,7 +42,7 @@ #DefaultBlockIOAccounting=no #DefaultMemoryAccounting=no #DefaultTasksAccounting=yes -#DefaultTasksMax=512 +#DefaultTasksMax=15% #DefaultLimitCPU= #DefaultLimitFSIZE= #DefaultLimitDATA= diff --git a/src/core/transaction.c b/src/core/transaction.c index af539171fd..8370b864fb 100644 --- a/src/core/transaction.c +++ b/src/core/transaction.c @@ -591,6 +591,9 @@ static int transaction_apply(Transaction *tr, Manager *m, JobMode mode) { HASHMAP_FOREACH(j, m->jobs, i) { assert(j->installed); + if (j->unit->ignore_on_isolate) + continue; + if (hashmap_get(tr->jobs, j->unit)) continue; diff --git a/src/core/unit.c b/src/core/unit.c index fdf7ce3af3..ff7c562fba 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -3144,7 +3144,7 @@ int unit_kill_common( if (!pid_set) return -ENOMEM; - q = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, signo, false, false, false, pid_set); + q = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, signo, 0, pid_set, NULL, NULL); if (q < 0 && q != -EAGAIN && q != -ESRCH && q != -ENOENT) r = q; else @@ -3224,6 +3224,33 @@ void unit_ref_unset(UnitRef *ref) { ref->unit = NULL; } +static int user_from_unit_name(Unit *u, char **ret) { + + static const uint8_t hash_key[] = { + 0x58, 0x1a, 0xaf, 0xe6, 0x28, 0x58, 0x4e, 0x96, + 0xb4, 0x4e, 0xf5, 0x3b, 0x8c, 0x92, 0x07, 0xec + }; + + _cleanup_free_ char *n = NULL; + int r; + + r = unit_name_to_prefix(u->id, &n); + if (r < 0) + return r; + + if (valid_user_group_name(n)) { + *ret = n; + n = NULL; + return 0; + } + + /* If we can't use the unit name as a user name, then let's hash it and use that */ + if (asprintf(ret, "_du%016" PRIx64, siphash24(n, strlen(n), hash_key)) < 0) + return -ENOMEM; + + return 0; +} + int unit_patch_contexts(Unit *u) { CGroupContext *cc; ExecContext *ec; @@ -3268,6 +3295,22 @@ int unit_patch_contexts(Unit *u) { if (ec->private_devices) ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_MKNOD); + + if (ec->dynamic_user) { + if (!ec->user) { + r = user_from_unit_name(u, &ec->user); + if (r < 0) + return r; + } + + if (!ec->group) { + ec->group = strdup(ec->user); + if (!ec->group) + return -ENOMEM; + } + + ec->private_tmp = true; + } } cc = unit_get_cgroup_context(u); @@ -3512,6 +3555,43 @@ int unit_make_transient(Unit *u) { return 0; } +static void log_kill(pid_t pid, int sig, void *userdata) { + _cleanup_free_ char *comm = NULL; + + (void) get_process_comm(pid, &comm); + + /* Don't log about processes marked with brackets, under the assumption that these are temporary processes + only, like for example systemd's own PAM stub process. */ + if (comm && comm[0] == '(') + return; + + log_unit_notice(userdata, + "Killing process " PID_FMT " (%s) with signal SIG%s.", + pid, + strna(comm), + signal_to_string(sig)); +} + +static int operation_to_signal(KillContext *c, KillOperation k) { + assert(c); + + switch (k) { + + case KILL_TERMINATE: + case KILL_TERMINATE_AND_LOG: + return c->kill_signal; + + case KILL_KILL: + return SIGKILL; + + case KILL_ABORT: + return SIGABRT; + + default: + assert_not_reached("KillOperation unknown"); + } +} + int unit_kill_context( Unit *u, KillContext *c, @@ -3520,58 +3600,63 @@ int unit_kill_context( pid_t control_pid, bool main_pid_alien) { - bool wait_for_exit = false; + bool wait_for_exit = false, send_sighup; + cg_kill_log_func_t log_func; int sig, r; assert(u); assert(c); + /* Kill the processes belonging to this unit, in preparation for shutting the unit down. Returns > 0 if we + * killed something worth waiting for, 0 otherwise. */ + if (c->kill_mode == KILL_NONE) return 0; - switch (k) { - case KILL_KILL: - sig = SIGKILL; - break; - case KILL_ABORT: - sig = SIGABRT; - break; - case KILL_TERMINATE: - sig = c->kill_signal; - break; - default: - assert_not_reached("KillOperation unknown"); - } + sig = operation_to_signal(c, k); + + send_sighup = + c->send_sighup && + IN_SET(k, KILL_TERMINATE, KILL_TERMINATE_AND_LOG) && + sig != SIGHUP; + + log_func = + k != KILL_TERMINATE || + IN_SET(sig, SIGKILL, SIGABRT) ? log_kill : NULL; if (main_pid > 0) { - r = kill_and_sigcont(main_pid, sig); + if (log_func) + log_func(main_pid, sig, u); + r = kill_and_sigcont(main_pid, sig); if (r < 0 && r != -ESRCH) { _cleanup_free_ char *comm = NULL; - get_process_comm(main_pid, &comm); + (void) get_process_comm(main_pid, &comm); log_unit_warning_errno(u, r, "Failed to kill main process " PID_FMT " (%s), ignoring: %m", main_pid, strna(comm)); } else { if (!main_pid_alien) wait_for_exit = true; - if (c->send_sighup && k == KILL_TERMINATE) + if (r != -ESRCH && send_sighup) (void) kill(main_pid, SIGHUP); } } if (control_pid > 0) { - r = kill_and_sigcont(control_pid, sig); + if (log_func) + log_func(control_pid, sig, u); + r = kill_and_sigcont(control_pid, sig); if (r < 0 && r != -ESRCH) { _cleanup_free_ char *comm = NULL; - get_process_comm(control_pid, &comm); + (void) get_process_comm(control_pid, &comm); log_unit_warning_errno(u, r, "Failed to kill control process " PID_FMT " (%s), ignoring: %m", control_pid, strna(comm)); } else { wait_for_exit = true; - if (c->send_sighup && k == KILL_TERMINATE) + if (r != -ESRCH && send_sighup) (void) kill(control_pid, SIGHUP); } } @@ -3585,7 +3670,11 @@ int unit_kill_context( if (!pid_set) return -ENOMEM; - r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, sig, true, k != KILL_TERMINATE, false, pid_set); + r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, + sig, + CGROUP_SIGCONT|CGROUP_IGNORE_SELF, + pid_set, + log_func, u); if (r < 0) { if (r != -EAGAIN && r != -ESRCH && r != -ENOENT) log_unit_warning_errno(u, r, "Failed to kill control group %s, ignoring: %m", u->cgroup_path); @@ -3610,14 +3699,18 @@ int unit_kill_context( (detect_container() == 0 && !unit_cgroup_delegate(u))) wait_for_exit = true; - if (c->send_sighup && k != KILL_KILL) { + if (send_sighup) { set_free(pid_set); pid_set = unit_pid_set(main_pid, control_pid); if (!pid_set) return -ENOMEM; - cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, SIGHUP, false, true, false, pid_set); + cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, + SIGHUP, + CGROUP_IGNORE_SELF, + pid_set, + NULL, NULL); } } } @@ -3726,6 +3819,26 @@ int unit_setup_exec_runtime(Unit *u) { return exec_runtime_make(rt, unit_get_exec_context(u), u->id); } +int unit_setup_dynamic_creds(Unit *u) { + ExecContext *ec; + DynamicCreds *dcreds; + size_t offset; + + assert(u); + + offset = UNIT_VTABLE(u)->dynamic_creds_offset; + assert(offset > 0); + dcreds = (DynamicCreds*) ((uint8_t*) u + offset); + + ec = unit_get_exec_context(u); + assert(ec); + + if (!ec->dynamic_user) + return 0; + + return dynamic_creds_acquire(dcreds, u->manager, ec->user, ec->group); +} + bool unit_type_supported(UnitType t) { if (_unlikely_(t < 0)) return false; diff --git a/src/core/unit.h b/src/core/unit.h index c41011ed9d..47eb8d50a6 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -36,6 +36,7 @@ typedef struct UnitStatusMessageFormats UnitStatusMessageFormats; typedef enum KillOperation { KILL_TERMINATE, + KILL_TERMINATE_AND_LOG, KILL_KILL, KILL_ABORT, _KILL_OPERATION_MAX, @@ -290,6 +291,10 @@ struct UnitVTable { * that */ size_t exec_runtime_offset; + /* If greater than 0, the offset into the object where the pointer to DynamicCreds is found, if the unit type + * has that. */ + size_t dynamic_creds_offset; + /* The name of the configuration file section with the private settings of this unit */ const char *private_section; @@ -588,6 +593,7 @@ CGroupContext *unit_get_cgroup_context(Unit *u) _pure_; ExecRuntime *unit_get_exec_runtime(Unit *u) _pure_; int unit_setup_exec_runtime(Unit *u); +int unit_setup_dynamic_creds(Unit *u); int unit_write_drop_in(Unit *u, UnitSetPropertiesMode mode, const char *name, const char *data); int unit_write_drop_in_format(Unit *u, UnitSetPropertiesMode mode, const char *name, const char *format, ...) _printf_(4,5); diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c index 82a54968e7..dcc09fcc6d 100644 --- a/src/coredump/coredump.c +++ b/src/coredump/coredump.c @@ -157,10 +157,8 @@ static int fix_acl(int fd, uid_t uid) { if (acl_create_entry(&acl, &entry) < 0 || acl_set_tag_type(entry, ACL_USER) < 0 || - acl_set_qualifier(entry, &uid) < 0) { - log_error_errno(errno, "Failed to patch ACL: %m"); - return -errno; - } + acl_set_qualifier(entry, &uid) < 0) + return log_error_errno(errno, "Failed to patch ACL: %m"); if (acl_get_permset(entry, &permset) < 0 || acl_add_perm(permset, ACL_READ) < 0) @@ -756,7 +754,6 @@ static int process_socket(int fd) { iovec[n_iovec].iov_len = l; iovec[n_iovec].iov_base = malloc(l + 1); - if (!iovec[n_iovec].iov_base) { r = log_oom(); goto finish; @@ -852,12 +849,42 @@ static int send_iovec(const struct iovec iovec[], size_t n_iovec, int input_fd) return log_error_errno(errno, "Failed to connect to coredump service: %m"); for (i = 0; i < n_iovec; i++) { - ssize_t n; - assert(iovec[i].iov_len > 0); + struct msghdr mh = { + .msg_iov = (struct iovec*) iovec + i, + .msg_iovlen = 1, + }; + struct iovec copy[2]; + + for (;;) { + if (sendmsg(fd, &mh, MSG_NOSIGNAL) >= 0) + break; + + if (errno == EMSGSIZE && mh.msg_iov[0].iov_len > 0) { + /* This field didn't fit? That's a pity. Given that this is just metadata, + * let's truncate the field at half, and try again. We append three dots, in + * order to show that this is truncated. */ + + if (mh.msg_iov != copy) { + /* We don't want to modify the caller's iovec, hence let's create our + * own array, consisting of two new iovecs, where the first is a + * (truncated) copy of what we want to send, and the second one + * contains the trailing dots. */ + copy[0] = iovec[i]; + copy[1] = (struct iovec) { + .iov_base = (char[]) { '.', '.', '.' }, + .iov_len = 3, + }; + + mh.msg_iov = copy; + mh.msg_iovlen = 2; + } + + copy[0].iov_len /= 2; /* halve it, and try again */ + continue; + } - n = send(fd, iovec[i].iov_base, iovec[i].iov_len, MSG_NOSIGNAL); - if (n < 0) return log_error_errno(errno, "Failed to send coredump datagram: %m"); + } } r = send_one_fd(fd, input_fd, 0); @@ -867,7 +894,7 @@ static int send_iovec(const struct iovec iovec[], size_t n_iovec, int input_fd) return 0; } -static int process_journald_crash(const char *context[], int input_fd) { +static int process_special_crash(const char *context[], int input_fd) { _cleanup_close_ int coredump_fd = -1, coredump_node_fd = -1; _cleanup_free_ char *filename = NULL; uint64_t coredump_size; @@ -876,7 +903,7 @@ static int process_journald_crash(const char *context[], int input_fd) { assert(context); assert(input_fd >= 0); - /* If we are journald, we cut things short, don't write to the journal, but still create a coredump. */ + /* If we are pid1 or journald, we cut things short, don't write to the journal, but still create a coredump. */ if (arg_storage != COREDUMP_STORAGE_NONE) arg_storage = COREDUMP_STORAGE_EXTERNAL; @@ -889,7 +916,8 @@ static int process_journald_crash(const char *context[], int input_fd) { if (r < 0) return r; - log_info("Detected coredump of the journal daemon itself, diverted to %s.", filename); + log_notice("Detected coredump of the journal daemon or PID 1, diverted to %s.", filename); + return 0; } @@ -949,9 +977,17 @@ static int process_kernel(int argc, char* argv[]) { if (cg_pid_get_unit(pid, &t) >= 0) { - if (streq(t, SPECIAL_JOURNALD_SERVICE)) { + /* If this is PID 1 disable coredump collection, we'll unlikely be able to process it later on. */ + if (streq(t, SPECIAL_INIT_SCOPE)) { + log_notice("Due to PID 1 having crashed coredump collection will now be turned off."); + (void) write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", 0); + } + + /* Let's avoid dead-locks when processing journald and init crashes, as socket activation and logging + * are unlikely to work then. */ + if (STR_IN_SET(t, SPECIAL_JOURNALD_SERVICE, SPECIAL_INIT_SCOPE)) { free(t); - return process_journald_crash(context, STDIN_FILENO); + return process_special_crash(context, STDIN_FILENO); } core_unit = strjoina("COREDUMP_UNIT=", t); diff --git a/src/firstboot/firstboot.c b/src/firstboot/firstboot.c index 3df72460ef..c9e8e54ee3 100644 --- a/src/firstboot/firstboot.c +++ b/src/firstboot/firstboot.c @@ -427,7 +427,7 @@ static int process_machine_id(void) { if (laccess(etc_machine_id, F_OK) >= 0) return 0; - if (sd_id128_equal(arg_machine_id, SD_ID128_NULL)) + if (sd_id128_is_null(arg_machine_id)) return 0; mkdir_parents(etc_machine_id, 0755); diff --git a/src/fstab-generator/fstab-generator.c b/src/fstab-generator/fstab-generator.c index 5aeca7e2d5..33af553d0d 100644 --- a/src/fstab-generator/fstab-generator.c +++ b/src/fstab-generator/fstab-generator.c @@ -85,13 +85,12 @@ static int add_swap( return log_oom(); f = fopen(unit, "wxe"); - if (!f) { - if (errno == EEXIST) - log_error("Failed to create swap unit file %s, as it already exists. Duplicate entry in /etc/fstab?", unit); - else - log_error_errno(errno, "Failed to create unit file %s: %m", unit); - return -errno; - } + if (!f) + return log_error_errno(errno, + errno == EEXIST ? + "Failed to create swap unit file %s, as it already exists. Duplicate entry in /etc/fstab?" : + "Failed to create unit file %s: %m", + unit); fprintf(f, "# Automatically generated by systemd-fstab-generator\n\n" @@ -281,13 +280,12 @@ static int add_mount( return log_oom(); f = fopen(unit, "wxe"); - if (!f) { - if (errno == EEXIST) - log_error("Failed to create mount unit file %s, as it already exists. Duplicate entry in /etc/fstab?", unit); - else - log_error_errno(errno, "Failed to create unit file %s: %m", unit); - return -errno; - } + if (!f) + return log_error_errno(errno, + errno == EEXIST ? + "Failed to create mount unit file %s, as it already exists. Duplicate entry in /etc/fstab?" : + "Failed to create unit file %s: %m", + unit); fprintf(f, "# Automatically generated by systemd-fstab-generator\n\n" diff --git a/src/gpt-auto-generator/gpt-auto-generator.c b/src/gpt-auto-generator/gpt-auto-generator.c index a4938a7c3a..6cc1aad705 100644 --- a/src/gpt-auto-generator/gpt-auto-generator.c +++ b/src/gpt-auto-generator/gpt-auto-generator.c @@ -450,101 +450,101 @@ static int add_automount( } static int add_boot(const char *what) { - _cleanup_blkid_free_probe_ blkid_probe b = NULL; - const char *fstype = NULL, *uuid = NULL; - sd_id128_t id, type_id; + const char *esp; int r; assert(what); - if (!is_efi_boot()) { - log_debug("Not an EFI boot, ignoring /boot."); - return 0; - } - if (in_initrd()) { - log_debug("In initrd, ignoring /boot."); + log_debug("In initrd, ignoring the ESP."); return 0; } if (detect_container() > 0) { - log_debug("In a container, ignoring /boot."); + log_debug("In a container, ignoring the ESP."); return 0; } + /* If /efi exists we'll use that. Otherwise we'll use /boot, as that's usually the better choice */ + esp = access("/efi/", F_OK) >= 0 ? "/efi" : "/boot"; + /* We create an .automount which is not overridden by the .mount from the fstab generator. */ - if (fstab_is_mount_point("/boot")) { - log_debug("/boot specified in fstab, ignoring."); + if (fstab_is_mount_point(esp)) { + log_debug("%s specified in fstab, ignoring.", esp); return 0; } - if (path_is_busy("/boot")) { - log_debug("/boot already populated, ignoring."); + if (path_is_busy(esp)) { + log_debug("%s already populated, ignoring.", esp); return 0; } - r = efi_loader_get_device_part_uuid(&id); - if (r == -ENOENT) { - log_debug("EFI loader partition unknown."); - return 0; - } + if (is_efi_boot()) { + _cleanup_blkid_free_probe_ blkid_probe b = NULL; + const char *fstype = NULL, *uuid_string = NULL; + sd_id128_t loader_uuid, part_uuid; - if (r < 0) { - log_error_errno(r, "Failed to read ESP partition UUID: %m"); - return r; - } + /* If this is an EFI boot, be extra careful, and only mount the ESP if it was the ESP used for booting. */ - errno = 0; - b = blkid_new_probe_from_filename(what); - if (!b) { - if (errno == 0) - return log_oom(); - return log_error_errno(errno, "Failed to allocate prober: %m"); - } - - blkid_probe_enable_partitions(b, 1); - blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS); + r = efi_loader_get_device_part_uuid(&loader_uuid); + if (r == -ENOENT) { + log_debug("EFI loader partition unknown."); + return 0; + } + if (r < 0) + return log_error_errno(r, "Failed to read ESP partition UUID: %m"); - errno = 0; - r = blkid_do_safeprobe(b); - if (r == -2 || r == 1) /* no result or uncertain */ - return 0; - else if (r != 0) - return log_error_errno(errno ?: EIO, "Failed to probe %s: %m", what); + errno = 0; + b = blkid_new_probe_from_filename(what); + if (!b) { + if (errno == 0) + return log_oom(); + return log_error_errno(errno, "Failed to allocate prober: %m"); + } - (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL); - if (!streq_ptr(fstype, "vfat")) { - log_debug("Partition for /boot is not a FAT filesystem, ignoring."); - return 0; - } + blkid_probe_enable_partitions(b, 1); + blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS); - errno = 0; - r = blkid_probe_lookup_value(b, "PART_ENTRY_UUID", &uuid, NULL); - if (r != 0) { - log_debug_errno(errno, "Partition for /boot does not have a UUID, ignoring."); - return 0; - } + errno = 0; + r = blkid_do_safeprobe(b); + if (r == -2 || r == 1) /* no result or uncertain */ + return 0; + else if (r != 0) + return log_error_errno(errno ?: EIO, "Failed to probe %s: %m", what); - if (sd_id128_from_string(uuid, &type_id) < 0) { - log_debug("Partition for /boot does not have a valid UUID, ignoring."); - return 0; - } + (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL); + if (!streq_ptr(fstype, "vfat")) { + log_debug("Partition for %s is not a FAT filesystem, ignoring.", esp); + return 0; + } - if (!sd_id128_equal(type_id, id)) { - log_debug("Partition for /boot does not appear to be the partition we are booted from."); - return 0; - } + errno = 0; + r = blkid_probe_lookup_value(b, "PART_ENTRY_UUID", &uuid_string, NULL); + if (r != 0) { + log_debug_errno(errno, "Partition for %s does not have a UUID, ignoring.", esp); + return 0; + } - r = add_automount("boot", - what, - "/boot", - "vfat", - true, - "umask=0077", - "EFI System Partition Automount", - 120 * USEC_PER_SEC); + if (sd_id128_from_string(uuid_string, &part_uuid) < 0) { + log_debug("Partition for %s does not have a valid UUID, ignoring.", esp); + return 0; + } - return r; + if (!sd_id128_equal(part_uuid, loader_uuid)) { + log_debug("Partition for %s does not appear to be the partition we are booted from.", esp); + return 0; + } + } else + log_debug("Not an EFI boot, skipping ESP check."); + + return add_automount("boot", + what, + esp, + "vfat", + true, + "umask=0077", + "EFI System Partition Automount", + 120 * USEC_PER_SEC); } #else static int add_boot(const char *what) { diff --git a/src/import/import.c b/src/import/import.c index 4e442ee84a..2b6ca24af8 100644 --- a/src/import/import.c +++ b/src/import/import.c @@ -90,7 +90,7 @@ static int import_tar(int argc, char *argv[], void *userdata) { if (r < 0) return log_error_errno(r, "Failed to check whether image '%s' exists: %m", local); else if (r > 0) { - log_error_errno(EEXIST, "Image '%s' already exists.", local); + log_error("Image '%s' already exists.", local); return -EEXIST; } } @@ -185,7 +185,7 @@ static int import_raw(int argc, char *argv[], void *userdata) { if (r < 0) return log_error_errno(r, "Failed to check whether image '%s' exists: %m", local); else if (r > 0) { - log_error_errno(EEXIST, "Image '%s' already exists.", local); + log_error("Image '%s' already exists.", local); return -EEXIST; } } diff --git a/src/import/pull.c b/src/import/pull.c index 72604a6a74..53b1211965 100644 --- a/src/import/pull.c +++ b/src/import/pull.c @@ -97,7 +97,7 @@ static int pull_tar(int argc, char *argv[], void *userdata) { if (r < 0) return log_error_errno(r, "Failed to check whether image '%s' exists: %m", local); else if (r > 0) { - log_error_errno(EEXIST, "Image '%s' already exists.", local); + log_error("Image '%s' already exists.", local); return -EEXIST; } } @@ -183,7 +183,7 @@ static int pull_raw(int argc, char *argv[], void *userdata) { if (r < 0) return log_error_errno(r, "Failed to check whether image '%s' exists: %m", local); else if (r > 0) { - log_error_errno(EEXIST, "Image '%s' already exists.", local); + log_error("Image '%s' already exists.", local); return -EEXIST; } } diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c index 4cc0c2b6c2..53c6180864 100644 --- a/src/journal/journalctl.c +++ b/src/journal/journalctl.c @@ -1266,7 +1266,7 @@ static int add_boot(sd_journal *j) { /* Take a shortcut and use the current boot_id, which we can do very quickly. * We can do this only when we logs are coming from the current machine, * so take the slow path if log location is specified. */ - if (arg_boot_offset == 0 && sd_id128_equal(arg_boot_id, SD_ID128_NULL) && + if (arg_boot_offset == 0 && sd_id128_is_null(arg_boot_id) && !arg_directory && !arg_file) return add_match_this_boot(j, arg_machine); diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c index 886e0ec856..587c343b31 100644 --- a/src/journal/journald-server.c +++ b/src/journal/journald-server.c @@ -877,7 +877,7 @@ void server_driver_message(Server *s, sd_id128_t message_id, const char *format, assert_cc(6 == LOG_INFO); IOVEC_SET_STRING(iovec[n++], "PRIORITY=6"); - if (!sd_id128_equal(message_id, SD_ID128_NULL)) { + if (!sd_id128_is_null(message_id)) { snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id)); IOVEC_SET_STRING(iovec[n++], mid); } diff --git a/src/journal/journald-server.h b/src/journal/journald-server.h index e025a4cf90..d2a32ab422 100644 --- a/src/journal/journald-server.h +++ b/src/journal/journald-server.h @@ -43,7 +43,7 @@ typedef enum Storage { typedef enum SplitMode { SPLIT_UID, - SPLIT_LOGIN, + SPLIT_LOGIN, /* deprecated */ SPLIT_NONE, _SPLIT_MAX, _SPLIT_INVALID = -1 diff --git a/src/kernel-install/kernel-install b/src/kernel-install/kernel-install index 1159dc384d..c66bcfc092 100644 --- a/src/kernel-install/kernel-install +++ b/src/kernel-install/kernel-install @@ -86,10 +86,15 @@ if [[ ! $COMMAND ]] || [[ ! $KERNEL_VERSION ]]; then exit 1 fi -if [[ -d /boot/loader/entries ]] || [[ -d /boot/$MACHINE_ID ]]; then +if [[ -d /efi/loader/entries ]] || [[ -d /efi/$MACHINE_ID ]]; then + BOOT_DIR_ABS="/efi/$MACHINE_ID/$KERNEL_VERSION" +elif [[ -d /boot/loader/entries ]] || [[ -d /boot/$MACHINE_ID ]]; then BOOT_DIR_ABS="/boot/$MACHINE_ID/$KERNEL_VERSION" -elif [[ -d /boot/efi/loader/entries ]] || [[ -d /boot/efi/$MACHINE_ID ]] \ - || mountpoint -q /boot/efi; then +elif [[ -d /boot/efi/loader/entries ]] || [[ -d /boot/efi/$MACHINE_ID ]]; then + BOOT_DIR_ABS="/boot/efi/$MACHINE_ID/$KERNEL_VERSION" +elif mountpoint -q /efi; then + BOOT_DIR_ABS="/efi/$MACHINE_ID/$KERNEL_VERSION" +elif mountpoint -q /boot/efi; then BOOT_DIR_ABS="/boot/efi/$MACHINE_ID/$KERNEL_VERSION" else BOOT_DIR_ABS="/boot/$MACHINE_ID/$KERNEL_VERSION" diff --git a/src/libsystemd/sd-bus/bus-common-errors.c b/src/libsystemd/sd-bus/bus-common-errors.c index 02e3bf904c..32be3cdc38 100644 --- a/src/libsystemd/sd-bus/bus-common-errors.c +++ b/src/libsystemd/sd-bus/bus-common-errors.c @@ -44,6 +44,7 @@ BUS_ERROR_MAP_ELF_REGISTER const sd_bus_error_map bus_common_errors[] = { SD_BUS_ERROR_MAP(BUS_ERROR_NO_ISOLATION, EPERM), SD_BUS_ERROR_MAP(BUS_ERROR_SHUTTING_DOWN, ECANCELED), SD_BUS_ERROR_MAP(BUS_ERROR_SCOPE_NOT_RUNNING, EHOSTDOWN), + SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_DYNAMIC_USER, ESRCH), SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_MACHINE, ENXIO), SD_BUS_ERROR_MAP(BUS_ERROR_NO_SUCH_IMAGE, ENOENT), diff --git a/src/libsystemd/sd-bus/bus-common-errors.h b/src/libsystemd/sd-bus/bus-common-errors.h index c8f369cb78..befb6fbfe0 100644 --- a/src/libsystemd/sd-bus/bus-common-errors.h +++ b/src/libsystemd/sd-bus/bus-common-errors.h @@ -40,6 +40,7 @@ #define BUS_ERROR_NO_ISOLATION "org.freedesktop.systemd1.NoIsolation" #define BUS_ERROR_SHUTTING_DOWN "org.freedesktop.systemd1.ShuttingDown" #define BUS_ERROR_SCOPE_NOT_RUNNING "org.freedesktop.systemd1.ScopeNotRunning" +#define BUS_ERROR_NO_SUCH_DYNAMIC_USER "org.freedesktop.systemd1.NoSuchDynamicUser" #define BUS_ERROR_NO_SUCH_MACHINE "org.freedesktop.machine1.NoSuchMachine" #define BUS_ERROR_NO_SUCH_IMAGE "org.freedesktop.machine1.NoSuchImage" diff --git a/src/libsystemd/sd-bus/bus-socket.c b/src/libsystemd/sd-bus/bus-socket.c index f1e2a06050..cfd7753139 100644 --- a/src/libsystemd/sd-bus/bus-socket.c +++ b/src/libsystemd/sd-bus/bus-socket.c @@ -221,7 +221,7 @@ static int bus_socket_auth_verify_client(sd_bus *b) { peer.bytes[i/2] = ((uint8_t) x << 4 | (uint8_t) y); } - if (!sd_id128_equal(b->server_id, SD_ID128_NULL) && + if (!sd_id128_is_null(b->server_id) && !sd_id128_equal(b->server_id, peer)) return -EPERM; diff --git a/src/libsystemd/sd-id128/id128-util.c b/src/libsystemd/sd-id128/id128-util.c new file mode 100644 index 0000000000..c3f527d657 --- /dev/null +++ b/src/libsystemd/sd-id128/id128-util.c @@ -0,0 +1,194 @@ +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <fcntl.h> +#include <unistd.h> + +#include "fd-util.h" +#include "hexdecoct.h" +#include "id128-util.h" +#include "io-util.h" +#include "stdio-util.h" + +char *id128_to_uuid_string(sd_id128_t id, char s[37]) { + unsigned n, k = 0; + + assert(s); + + /* Similar to sd_id128_to_string() but formats the result as UUID instead of plain hex chars */ + + for (n = 0; n < 16; n++) { + + if (IN_SET(n, 4, 6, 8, 10)) + s[k++] = '-'; + + s[k++] = hexchar(id.bytes[n] >> 4); + s[k++] = hexchar(id.bytes[n] & 0xF); + } + + assert(k == 36); + + s[k] = 0; + + return s; +} + +bool id128_is_valid(const char *s) { + size_t i, l; + + assert(s); + + l = strlen(s); + if (l == 32) { + + /* Plain formatted 128bit hex string */ + + for (i = 0; i < l; i++) { + char c = s[i]; + + if (!(c >= '0' && c <= '9') && + !(c >= 'a' && c <= 'z') && + !(c >= 'A' && c <= 'Z')) + return false; + } + + } else if (l == 36) { + + /* Formatted UUID */ + + for (i = 0; i < l; i++) { + char c = s[i]; + + if ((i == 8 || i == 13 || i == 18 || i == 23)) { + if (c != '-') + return false; + } else { + if (!(c >= '0' && c <= '9') && + !(c >= 'a' && c <= 'z') && + !(c >= 'A' && c <= 'Z')) + return false; + } + } + + } else + return false; + + return true; +} + +int id128_read_fd(int fd, Id128Format f, sd_id128_t *ret) { + char buffer[36 + 2]; + ssize_t l; + + assert(fd >= 0); + assert(f < _ID128_FORMAT_MAX); + + /* Reads an 128bit ID from a file, which may either be in plain format (32 hex digits), or in UUID format, both + * optionally followed by a newline and nothing else. ID files should really be newline terminated, but if they + * aren't that's OK too, following the rule of "Be conservative in what you send, be liberal in what you + * accept". */ + + l = loop_read(fd, buffer, sizeof(buffer), false); /* we expect a short read of either 32/33 or 36/37 chars */ + if (l < 0) + return (int) l; + if (l == 0) /* empty? */ + return -ENOMEDIUM; + + switch (l) { + + case 33: /* plain UUID with trailing newline */ + if (buffer[32] != '\n') + return -EINVAL; + + /* fall through */ + case 32: /* plain UUID without trailing newline */ + if (f == ID128_UUID) + return -EINVAL; + + buffer[32] = 0; + break; + + case 37: /* RFC UUID with trailing newline */ + if (buffer[36] != '\n') + return -EINVAL; + + /* fall through */ + case 36: /* RFC UUID without trailing newline */ + if (f == ID128_PLAIN) + return -EINVAL; + + buffer[36] = 0; + break; + + default: + return -EINVAL; + } + + return sd_id128_from_string(buffer, ret); +} + +int id128_read(const char *p, Id128Format f, sd_id128_t *ret) { + _cleanup_close_ int fd = -1; + + fd = open(p, O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (fd < 0) + return -errno; + + return id128_read_fd(fd, f, ret); +} + +int id128_write_fd(int fd, Id128Format f, sd_id128_t id, bool do_sync) { + char buffer[36 + 2]; + size_t sz; + int r; + + assert(fd >= 0); + assert(f < _ID128_FORMAT_MAX); + + if (f != ID128_UUID) { + sd_id128_to_string(id, buffer); + buffer[32] = '\n'; + sz = 33; + } else { + id128_to_uuid_string(id, buffer); + buffer[36] = '\n'; + sz = 37; + } + + r = loop_write(fd, buffer, sz, false); + if (r < 0) + return r; + + if (do_sync) { + if (fsync(fd) < 0) + return -errno; + } + + return r; +} + +int id128_write(const char *p, Id128Format f, sd_id128_t id, bool do_sync) { + _cleanup_close_ int fd = -1; + + fd = open(p, O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY, 0444); + if (fd < 0) + return -errno; + + return id128_write_fd(fd, f, id, do_sync); +} diff --git a/src/libsystemd/sd-id128/id128-util.h b/src/libsystemd/sd-id128/id128-util.h new file mode 100644 index 0000000000..3ba59acbca --- /dev/null +++ b/src/libsystemd/sd-id128/id128-util.h @@ -0,0 +1,45 @@ +#pragma once + +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <stdbool.h> + +#include "sd-id128.h" +#include "macro.h" + +char *id128_to_uuid_string(sd_id128_t id, char s[37]); + +/* Like SD_ID128_FORMAT_STR, but formats as UUID, not in plain format */ +#define ID128_UUID_FORMAT_STR "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x" + +bool id128_is_valid(const char *s) _pure_; + +typedef enum Id128Format { + ID128_ANY, + ID128_PLAIN, /* formatted as 32 hex chars as-is */ + ID128_UUID, /* formatted as 36 character uuid string */ + _ID128_FORMAT_MAX, +} Id128Format; + +int id128_read_fd(int fd, Id128Format f, sd_id128_t *ret); +int id128_read(const char *p, Id128Format f, sd_id128_t *ret); + +int id128_write_fd(int fd, Id128Format f, sd_id128_t id, bool do_sync); +int id128_write(const char *p, Id128Format f, sd_id128_t id, bool do_sync); diff --git a/src/libsystemd/sd-id128/sd-id128.c b/src/libsystemd/sd-id128/sd-id128.c index d9c0116f60..9f47d04e61 100644 --- a/src/libsystemd/sd-id128/sd-id128.c +++ b/src/libsystemd/sd-id128/sd-id128.c @@ -25,6 +25,7 @@ #include "fd-util.h" #include "hexdecoct.h" +#include "id128-util.h" #include "io-util.h" #include "macro.h" #include "random-util.h" @@ -51,7 +52,6 @@ _public_ int sd_id128_from_string(const char s[], sd_id128_t *ret) { bool is_guid = false; assert_return(s, -EINVAL); - assert_return(ret, -EINVAL); for (n = 0, i = 0; n < 16;) { int a, b; @@ -89,121 +89,57 @@ _public_ int sd_id128_from_string(const char s[], sd_id128_t *ret) { if (s[i] != 0) return -EINVAL; - *ret = t; + if (ret) + *ret = t; return 0; } -static sd_id128_t make_v4_uuid(sd_id128_t id) { - /* Stolen from generate_random_uuid() of drivers/char/random.c - * in the kernel sources */ - - /* Set UUID version to 4 --- truly random generation */ - id.bytes[6] = (id.bytes[6] & 0x0F) | 0x40; - - /* Set the UUID variant to DCE */ - id.bytes[8] = (id.bytes[8] & 0x3F) | 0x80; - - return id; -} - _public_ int sd_id128_get_machine(sd_id128_t *ret) { - static thread_local sd_id128_t saved_machine_id; - static thread_local bool saved_machine_id_valid = false; - _cleanup_close_ int fd = -1; - char buf[33]; - unsigned j; - sd_id128_t t; + static thread_local sd_id128_t saved_machine_id = {}; int r; assert_return(ret, -EINVAL); - if (saved_machine_id_valid) { - *ret = saved_machine_id; - return 0; - } - - fd = open("/etc/machine-id", O_RDONLY|O_CLOEXEC|O_NOCTTY); - if (fd < 0) - return -errno; - - r = loop_read_exact(fd, buf, 33, false); - if (r < 0) - return r; - if (buf[32] !='\n') - return -EIO; - - for (j = 0; j < 16; j++) { - int a, b; - - a = unhexchar(buf[j*2]); - b = unhexchar(buf[j*2+1]); - - if (a < 0 || b < 0) - return -EIO; + if (sd_id128_is_null(saved_machine_id)) { + r = id128_read("/etc/machine-id", ID128_PLAIN, &saved_machine_id); + if (r < 0) + return r; - t.bytes[j] = a << 4 | b; + if (sd_id128_is_null(saved_machine_id)) + return -EINVAL; } - saved_machine_id = t; - saved_machine_id_valid = true; - - *ret = t; + *ret = saved_machine_id; return 0; } _public_ int sd_id128_get_boot(sd_id128_t *ret) { - static thread_local sd_id128_t saved_boot_id; - static thread_local bool saved_boot_id_valid = false; - _cleanup_close_ int fd = -1; - char buf[36]; - unsigned j; - sd_id128_t t; - char *p; + static thread_local sd_id128_t saved_boot_id = {}; int r; assert_return(ret, -EINVAL); - if (saved_boot_id_valid) { - *ret = saved_boot_id; - return 0; + if (sd_id128_is_null(saved_boot_id)) { + r = id128_read("/proc/sys/kernel/random/boot_id", ID128_UUID, &saved_boot_id); + if (r < 0) + return r; } - fd = open("/proc/sys/kernel/random/boot_id", O_RDONLY|O_CLOEXEC|O_NOCTTY); - if (fd < 0) - return -errno; - - r = loop_read_exact(fd, buf, 36, false); - if (r < 0) - return r; - - for (j = 0, p = buf; j < 16; j++) { - int a, b; - - if (p >= buf + 35) - return -EIO; - - if (*p == '-') { - p++; - if (p >= buf + 35) - return -EIO; - } - - a = unhexchar(p[0]); - b = unhexchar(p[1]); - - if (a < 0 || b < 0) - return -EIO; + *ret = saved_boot_id; + return 0; +} - t.bytes[j] = a << 4 | b; +static sd_id128_t make_v4_uuid(sd_id128_t id) { + /* Stolen from generate_random_uuid() of drivers/char/random.c + * in the kernel sources */ - p += 2; - } + /* Set UUID version to 4 --- truly random generation */ + id.bytes[6] = (id.bytes[6] & 0x0F) | 0x40; - saved_boot_id = t; - saved_boot_id_valid = true; + /* Set the UUID variant to DCE */ + id.bytes[8] = (id.bytes[8] & 0x3F) | 0x80; - *ret = t; - return 0; + return id; } _public_ int sd_id128_randomize(sd_id128_t *ret) { diff --git a/src/login/logind-gperf.gperf b/src/login/logind-gperf.gperf index 6bd08adc05..0b6a5f3cf4 100644 --- a/src/login/logind-gperf.gperf +++ b/src/login/logind-gperf.gperf @@ -36,4 +36,4 @@ Login.RuntimeDirectorySize, config_parse_tmpfs_size, 0, offsetof(Manag Login.RemoveIPC, config_parse_bool, 0, offsetof(Manager, remove_ipc) Login.InhibitorsMax, config_parse_uint64, 0, offsetof(Manager, inhibitors_max) Login.SessionsMax, config_parse_uint64, 0, offsetof(Manager, sessions_max) -Login.UserTasksMax, config_parse_uint64, 0, offsetof(Manager, user_tasks_max) +Login.UserTasksMax, config_parse_user_tasks_max,0, offsetof(Manager, user_tasks_max) diff --git a/src/login/logind-session.c b/src/login/logind-session.c index 1e0666884a..b6da237397 100644 --- a/src/login/logind-session.c +++ b/src/login/logind-session.c @@ -603,7 +603,6 @@ int session_start(Session *s) { static int session_stop_scope(Session *s, bool force) { _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; - char *job = NULL; int r; assert(s); @@ -611,22 +610,25 @@ static int session_stop_scope(Session *s, bool force) { if (!s->scope) return 0; + /* Let's always abandon the scope first. This tells systemd that we are not interested anymore, and everything + * that is left in in the scope is "left-over". Informing systemd about this has the benefit that it will log + * when killing any processes left after this point. */ + r = manager_abandon_scope(s->manager, s->scope, &error); + if (r < 0) + log_warning_errno(r, "Failed to abandon session scope, ignoring: %s", bus_error_message(&error, r)); + + /* Optionally, let's kill everything that's left now. */ if (force || manager_shall_kill(s->manager, s->user->name)) { + char *job = NULL; + r = manager_stop_unit(s->manager, s->scope, &error, &job); - if (r < 0) { - log_error("Failed to stop session scope: %s", bus_error_message(&error, r)); - return r; - } + if (r < 0) + return log_error_errno(r, "Failed to stop session scope: %s", bus_error_message(&error, r)); free(s->scope_job); s->scope_job = job; - } else { - r = manager_abandon_scope(s->manager, s->scope, &error); - if (r < 0) { - log_error("Failed to abandon session scope: %s", bus_error_message(&error, r)); - return r; - } - } + } else + s->scope_job = mfree(s->scope_job); return 0; } diff --git a/src/login/logind-user.c b/src/login/logind-user.c index de44d369cf..348e396292 100644 --- a/src/login/logind-user.c +++ b/src/login/logind-user.c @@ -311,8 +311,7 @@ int user_load(User *u) { if (r == -ENOENT) return 0; - log_error_errno(r, "Failed to read %s: %m", u->state_file); - return r; + return log_error_errno(r, "Failed to read %s: %m", u->state_file); } if (display) @@ -870,3 +869,48 @@ int config_parse_tmpfs_size( return 0; } + +int config_parse_user_tasks_max( + const char* unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + uint64_t *m = data; + uint64_t k; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + /* First, try to parse as percentage */ + r = parse_percent(rvalue); + if (r > 0 && r < 100) + k = system_tasks_max_scale(r, 100U); + else { + + /* If the passed argument was not a percentage, or out of range, parse as byte size */ + + r = safe_atou64(rvalue, &k); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse tasks maximum, ignoring: %s", rvalue); + return 0; + } + } + + if (k <= 0 || k >= UINT64_MAX) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Tasks maximum out of range, ignoring: %s", rvalue); + return 0; + } + + *m = k; + return 0; +} diff --git a/src/login/logind.c b/src/login/logind.c index d01dd110ea..5ce36d28c7 100644 --- a/src/login/logind.c +++ b/src/login/logind.c @@ -62,7 +62,7 @@ static void manager_reset_config(Manager *m) { m->idle_action = HANDLE_IGNORE; m->runtime_dir_size = physical_memory_scale(10U, 100U); /* 10% */ - m->user_tasks_max = 12288; + m->user_tasks_max = system_tasks_max_scale(33U, 100U); /* 33% */ m->sessions_max = 8192; m->inhibitors_max = 8192; diff --git a/src/login/logind.conf.in b/src/login/logind.conf.in index 32c0844cb6..6f720b7708 100644 --- a/src/login/logind.conf.in +++ b/src/login/logind.conf.in @@ -34,4 +34,4 @@ #RemoveIPC=yes #InhibitorsMax=8192 #SessionsMax=8192 -#UserTasksMax=12288 +#UserTasksMax=33% diff --git a/src/login/logind.h b/src/login/logind.h index 90431eb4b0..086fa1eeb5 100644 --- a/src/login/logind.h +++ b/src/login/logind.h @@ -187,6 +187,7 @@ const struct ConfigPerfItem* logind_gperf_lookup(const char *key, unsigned lengt int manager_set_lid_switch_ignore(Manager *m, usec_t until); int config_parse_tmpfs_size(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_user_tasks_max(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int manager_get_session_from_creds(Manager *m, sd_bus_message *message, const char *name, sd_bus_error *error, Session **ret); int manager_get_user_from_creds(Manager *m, sd_bus_message *message, uid_t uid, sd_bus_error *error, User **ret); diff --git a/src/machine-id-setup/machine-id-setup-main.c b/src/machine-id-setup/machine-id-setup-main.c index 1d55fa04af..cc9b1b38fe 100644 --- a/src/machine-id-setup/machine-id-setup-main.c +++ b/src/machine-id-setup/machine-id-setup-main.c @@ -29,6 +29,7 @@ static char *arg_root = NULL; static bool arg_commit = false; +static bool arg_print = false; static void help(void) { printf("%s [OPTIONS...]\n\n" @@ -37,6 +38,7 @@ static void help(void) { " --version Show package version\n" " --root=ROOT Filesystem root\n" " --commit Commit transient ID\n" + " --print Print used machine ID\n" , program_invocation_short_name); } @@ -46,6 +48,7 @@ static int parse_argv(int argc, char *argv[]) { ARG_VERSION = 0x100, ARG_ROOT, ARG_COMMIT, + ARG_PRINT, }; static const struct option options[] = { @@ -53,6 +56,7 @@ static int parse_argv(int argc, char *argv[]) { { "version", no_argument, NULL, ARG_VERSION }, { "root", required_argument, NULL, ARG_ROOT }, { "commit", no_argument, NULL, ARG_COMMIT }, + { "print", no_argument, NULL, ARG_PRINT }, {} }; @@ -82,6 +86,10 @@ static int parse_argv(int argc, char *argv[]) { arg_commit = true; break; + case ARG_PRINT: + arg_print = true; + break; + case '?': return -EINVAL; @@ -98,6 +106,8 @@ static int parse_argv(int argc, char *argv[]) { } int main(int argc, char *argv[]) { + char buf[SD_ID128_STRING_MAX]; + sd_id128_t id; int r; log_parse_environment(); @@ -107,10 +117,24 @@ int main(int argc, char *argv[]) { if (r <= 0) goto finish; - if (arg_commit) + if (arg_commit) { r = machine_id_commit(arg_root); - else - r = machine_id_setup(arg_root, SD_ID128_NULL); + if (r < 0) + goto finish; + + r = sd_id128_get_machine(&id); + if (r < 0) { + log_error_errno(r, "Failed to read machine ID back: %m"); + goto finish; + } + } else { + r = machine_id_setup(arg_root, SD_ID128_NULL, &id); + if (r < 0) + goto finish; + } + + if (arg_print) + puts(sd_id128_to_string(id, buf)); finish: free(arg_root); diff --git a/src/machine/machine.c b/src/machine/machine.c index c1fae57084..dd046d6563 100644 --- a/src/machine/machine.c +++ b/src/machine/machine.c @@ -181,7 +181,7 @@ int machine_save(Machine *m) { fprintf(f, "ROOT=%s\n", escaped); } - if (!sd_id128_equal(m->id, SD_ID128_NULL)) + if (!sd_id128_is_null(m->id)) fprintf(f, "ID=" SD_ID128_FORMAT_STR "\n", SD_ID128_FORMAT_VAL(m->id)); if (m->leader != 0) diff --git a/src/machine/machinectl.c b/src/machine/machinectl.c index 96e0ab4b8a..ddec6cb4d6 100644 --- a/src/machine/machinectl.c +++ b/src/machine/machinectl.c @@ -528,7 +528,7 @@ static void print_machine_status_info(sd_bus *bus, MachineStatusInfo *i) { fputs(strna(i->name), stdout); - if (!sd_id128_equal(i->id, SD_ID128_NULL)) + if (!sd_id128_is_null(i->id)) printf("(" SD_ID128_FORMAT_STR ")\n", SD_ID128_FORMAT_VAL(i->id)); else putchar('\n'); diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index 9f4903c842..803caef3dd 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -23,6 +23,8 @@ #include "alloc-util.h" #include "cgroup-util.h" #include "escape.h" +#include "fd-util.h" +#include "fileio.h" #include "fs-util.h" #include "label.h" #include "mkdir.h" @@ -181,13 +183,15 @@ int tmpfs_mount_parse(CustomMount **l, unsigned *n, const char *s) { static int tmpfs_patch_options( const char *options, - bool userns, uid_t uid_shift, uid_t uid_range, + bool userns, + uid_t uid_shift, uid_t uid_range, + bool patch_ids, const char *selinux_apifs_context, char **ret) { char *buf = NULL; - if (userns && uid_shift != 0) { + if ((userns && uid_shift != 0) || patch_ids) { assert(uid_shift != UID_INVALID); if (options) @@ -218,7 +222,13 @@ static int tmpfs_patch_options( } #endif + if (!buf && options) { + buf = strdup(options); + if (!buf) + return -ENOMEM; + } *ret = buf; + return !!buf; } @@ -271,7 +281,15 @@ int mount_sysfs(const char *dest) { return log_error_errno(errno, "Failed to remove %s: %m", full); x = prefix_roota(top, "/fs/kdbus"); - (void) mkdir(x, 0755); + (void) mkdir_p(x, 0755); + + /* Create mountpoint for cgroups. Otherwise we are not allowed since we + * remount /sys read-only. + */ + if (cg_ns_supported()) { + x = prefix_roota(top, "/fs/cgroup"); + (void) mkdir_p(x, 0755); + } if (mount(NULL, top, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0) return log_error_errno(errno, "Failed to make %s read-only: %m", top); @@ -297,18 +315,19 @@ int mount_all(const char *dest, } MountPoint; static const MountPoint mount_table[] = { - { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true, false }, - { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true, false }, /* Bind mount first */ - { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true, false }, /* Then, make it r/o */ - { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, true }, - { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, false }, - { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false, false }, - { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false }, - { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false }, - { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false, false }, + { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true, false }, + { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true, false }, /* Bind mount first ...*/ + { "/proc/sys/net", "/proc/sys/net", NULL, NULL, MS_BIND, true, true, true }, /* (except for this) */ + { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true, false }, /* ... then, make it r/o */ + { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, true }, + { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, false }, + { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false, false }, + { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false }, + { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false }, + { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false, false }, #ifdef HAVE_SELINUX - { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false, false }, /* Bind mount first */ - { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false, false }, /* Then, make it r/o */ + { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false, false }, /* Bind mount first */ + { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false, false }, /* Then, make it r/o */ #endif }; @@ -348,7 +367,7 @@ int mount_all(const char *dest, o = mount_table[k].options; if (streq_ptr(mount_table[k].type, "tmpfs")) { - r = tmpfs_patch_options(o, use_userns, uid_shift, uid_range, selinux_apifs_context, &options); + r = tmpfs_patch_options(o, use_userns, uid_shift, uid_range, false, selinux_apifs_context, &options); if (r < 0) return log_oom(); if (r > 0) @@ -485,7 +504,7 @@ static int mount_tmpfs( if (r < 0 && r != -EEXIST) return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where); - r = tmpfs_patch_options(m->options, userns, uid_shift, uid_range, selinux_apifs_context, &buf); + r = tmpfs_patch_options(m->options, userns, uid_shift, uid_range, false, selinux_apifs_context, &buf); if (r < 0) return log_oom(); options = r > 0 ? buf : m->options; @@ -600,6 +619,48 @@ int mount_custom( return 0; } +/* Retrieve existing subsystems. This function is called in a new cgroup + * namespace. + */ +static int get_controllers(Set *subsystems) { + _cleanup_fclose_ FILE *f = NULL; + char line[LINE_MAX]; + + assert(subsystems); + + f = fopen("/proc/self/cgroup", "re"); + if (!f) + return errno == ENOENT ? -ESRCH : -errno; + + FOREACH_LINE(line, f, return -errno) { + int r; + char *e, *l, *p; + + truncate_nl(line); + + l = strchr(line, ':'); + if (!l) + continue; + + l++; + e = strchr(l, ':'); + if (!e) + continue; + + *e = 0; + + if (streq(l, "") || streq(l, "name=systemd")) + continue; + + p = strdup(l); + r = set_consume(subsystems, p); + if (r < 0) + return r; + } + + return 0; +} + static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controller, const char *hierarchy, bool read_only) { char *to; int r; @@ -628,11 +689,107 @@ static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controlle return 1; } -static int mount_legacy_cgroups( - const char *dest, +/* Mount a legacy cgroup hierarchy when cgroup namespaces are supported. */ +static int mount_legacy_cgns_supported( bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context) { + _cleanup_set_free_free_ Set *controllers = NULL; + const char *cgroup_root = "/sys/fs/cgroup", *c; + int r; + + (void) mkdir_p(cgroup_root, 0755); + + /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */ + r = path_is_mount_point(cgroup_root, AT_SYMLINK_FOLLOW); + if (r < 0) + return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m"); + if (r == 0) { + _cleanup_free_ char *options = NULL; + + /* When cgroup namespaces are enabled and user namespaces are + * used then the mount of the cgroupfs is done *inside* the new + * user namespace. We're root in the new user namespace and the + * kernel will happily translate our uid/gid to the correct + * uid/gid as seen from e.g. /proc/1/mountinfo. So we simply + * pass uid 0 and not uid_shift to tmpfs_patch_options(). + */ + r = tmpfs_patch_options("mode=755", userns, 0, uid_range, true, selinux_apifs_context, &options); + if (r < 0) + return log_oom(); + if (mount("tmpfs", cgroup_root, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options) < 0) + return log_error_errno(errno, "Failed to mount /sys/fs/cgroup: %m"); + } + + if (cg_unified() > 0) + goto skip_controllers; + + controllers = set_new(&string_hash_ops); + if (!controllers) + return log_oom(); + + r = get_controllers(controllers); + if (r < 0) + return log_error_errno(r, "Failed to determine cgroup controllers: %m"); + + for (;;) { + _cleanup_free_ const char *controller = NULL; + + controller = set_steal_first(controllers); + if (!controller) + break; + + r = mount_legacy_cgroup_hierarchy("", controller, controller, !userns); + if (r < 0) + return r; + + /* When multiple hierarchies are co-mounted, make their + * constituting individual hierarchies a symlink to the + * co-mount. + */ + c = controller; + for (;;) { + _cleanup_free_ char *target = NULL, *tok = NULL; + + r = extract_first_word(&c, &tok, ",", 0); + if (r < 0) + return log_error_errno(r, "Failed to extract co-mounted cgroup controller: %m"); + if (r == 0) + break; + + target = prefix_root("/sys/fs/cgroup", tok); + if (!target) + return log_oom(); + + if (streq(controller, tok)) + break; + + r = symlink_idempotent(controller, target); + if (r == -EINVAL) + return log_error_errno(r, "Invalid existing symlink for combined hierarchy: %m"); + if (r < 0) + return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m"); + } + } + +skip_controllers: + r = mount_legacy_cgroup_hierarchy("", "none,name=systemd,xattr", "systemd", false); + if (r < 0) + return r; + + if (!userns) { + if (mount(NULL, cgroup_root, NULL, MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755") < 0) + return log_error_errno(errno, "Failed to remount %s read-only: %m", cgroup_root); + } + + return 0; +} + +/* Mount legacy cgroup hierarchy when cgroup namespaces are unsupported. */ +static int mount_legacy_cgns_unsupported( + const char *dest, + bool userns, uid_t uid_shift, uid_t uid_range, + const char *selinux_apifs_context) { _cleanup_set_free_free_ Set *controllers = NULL; const char *cgroup_root; int r; @@ -648,7 +805,7 @@ static int mount_legacy_cgroups( if (r == 0) { _cleanup_free_ char *options = NULL; - r = tmpfs_patch_options("mode=755", userns, uid_shift, uid_range, selinux_apifs_context, &options); + r = tmpfs_patch_options("mode=755", userns, uid_shift, uid_range, false, selinux_apifs_context, &options); if (r < 0) return log_oom(); @@ -707,10 +864,8 @@ static int mount_legacy_cgroups( return r; r = symlink_idempotent(combined, target); - if (r == -EINVAL) { - log_error("Invalid existing symlink for combined hierarchy"); - return r; - } + if (r == -EINVAL) + return log_error_errno(r, "Invalid existing symlink for combined hierarchy: %m"); if (r < 0) return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m"); } @@ -761,12 +916,15 @@ int mount_cgroups( const char *dest, bool unified_requested, bool userns, uid_t uid_shift, uid_t uid_range, - const char *selinux_apifs_context) { + const char *selinux_apifs_context, + bool use_cgns) { if (unified_requested) return mount_unified_cgroups(dest); - else - return mount_legacy_cgroups(dest, userns, uid_shift, uid_range, selinux_apifs_context); + else if (use_cgns && cg_ns_supported()) + return mount_legacy_cgns_supported(userns, uid_shift, uid_range, selinux_apifs_context); + + return mount_legacy_cgns_unsupported(dest, userns, uid_shift, uid_range, selinux_apifs_context); } int mount_systemd_cgroup_writable( @@ -834,7 +992,7 @@ int setup_volatile_state( return log_error_errno(errno, "Failed to create %s: %m", directory); options = "mode=755"; - r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf); + r = tmpfs_patch_options(options, userns, uid_shift, uid_range, false, selinux_apifs_context, &buf); if (r < 0) return log_oom(); if (r > 0) @@ -870,7 +1028,7 @@ int setup_volatile( return log_error_errno(errno, "Failed to create temporary directory: %m"); options = "mode=755"; - r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf); + r = tmpfs_patch_options(options, userns, uid_shift, uid_range, false, selinux_apifs_context, &buf); if (r < 0) return log_oom(); if (r > 0) diff --git a/src/nspawn/nspawn-mount.h b/src/nspawn/nspawn-mount.h index 0daf145412..0eff8e1006 100644 --- a/src/nspawn/nspawn-mount.h +++ b/src/nspawn/nspawn-mount.h @@ -58,7 +58,7 @@ int custom_mount_compare(const void *a, const void *b); int mount_all(const char *dest, bool use_userns, bool in_userns, bool use_netns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context); int mount_sysfs(const char *dest); -int mount_cgroups(const char *dest, bool unified_requested, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context); +int mount_cgroups(const char *dest, bool unified_requested, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context, bool use_cgns); int mount_systemd_cgroup_writable(const char *dest, bool unified_requested); int mount_custom(const char *dest, CustomMount *mounts, unsigned n, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context); diff --git a/src/nspawn/nspawn-register.c b/src/nspawn/nspawn-register.c index 7fd711b8a4..e5b76a0c5d 100644 --- a/src/nspawn/nspawn-register.c +++ b/src/nspawn/nspawn-register.c @@ -104,7 +104,7 @@ int register_machine( return bus_log_create_error(r); } - r = sd_bus_message_append(m, "(sv)", "DevicePolicy", "s", "strict"); + r = sd_bus_message_append(m, "(sv)", "DevicePolicy", "s", "closed"); if (r < 0) return bus_log_create_error(r); @@ -112,31 +112,20 @@ int register_machine( * systemd-nspawn@.service, to keep the device * policies in sync regardless if we are run with or * without the --keep-unit switch. */ - r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 11, + r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 2, /* Allow the container to * access and create the API * device nodes, so that * PrivateDevices= in the * container can work * fine */ - "/dev/null", "rwm", - "/dev/zero", "rwm", - "/dev/full", "rwm", - "/dev/random", "rwm", - "/dev/urandom", "rwm", - "/dev/tty", "rwm", "/dev/net/tun", "rwm", /* Allow the container * access to ptys. However, * do not permit the * container to ever create * these device nodes. */ - "/dev/pts/ptmx", "rw", - "char-pts", "rw", - /* Allow /run/systemd/inaccessible/{chr,blk} - * devices inside the container */ - "/run/systemd/inaccessible/chr", "rwm", - "/run/systemd/inaccessible/blk", "rwm"); + "char-pts", "rw"); if (r < 0) return bus_log_create_error(r); diff --git a/src/nspawn/nspawn-seccomp.c b/src/nspawn/nspawn-seccomp.c index 54db1b47f8..3ab7160ebe 100644 --- a/src/nspawn/nspawn-seccomp.c +++ b/src/nspawn/nspawn-seccomp.c @@ -119,10 +119,8 @@ static int seccomp_add_default_syscall_filter(scmp_filter_ctx ctx, r = seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), blacklist[i].syscall_num, 0); if (r == -EFAULT) continue; /* unknown syscall */ - if (r < 0) { - log_error_errno(r, "Failed to block syscall: %m"); - return r; - } + if (r < 0) + return log_error_errno(r, "Failed to block syscall: %m"); } return 0; diff --git a/src/nspawn/nspawn-setuid.c b/src/nspawn/nspawn-setuid.c index ee15a47e93..b8e8e091c8 100644 --- a/src/nspawn/nspawn-setuid.c +++ b/src/nspawn/nspawn-setuid.c @@ -124,14 +124,12 @@ int change_uid_gid(const char *user, char **_home) { fd = -1; if (!fgets(line, sizeof(line), f)) { - if (!ferror(f)) { log_error("Failed to resolve user %s.", user); return -ESRCH; } - log_error_errno(errno, "Failed to read from getent: %m"); - return -errno; + return log_error_errno(errno, "Failed to read from getent: %m"); } truncate_nl(line); @@ -214,8 +212,7 @@ int change_uid_gid(const char *user, char **_home) { return -ESRCH; } - log_error_errno(errno, "Failed to read from getent: %m"); - return -errno; + return log_error_errno(errno, "Failed to read from getent: %m"); } truncate_nl(line); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index e4be0a2251..6cc1b9177d 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -61,9 +61,9 @@ #include "fs-util.h" #include "gpt.h" #include "hostname-util.h" +#include "id128-util.h" #include "log.h" #include "loopback-setup.h" -#include "machine-id-setup.h" #include "machine-image.h" #include "macro.h" #include "missing.h" @@ -76,10 +76,10 @@ #include "nspawn-network.h" #include "nspawn-patch-uid.h" #include "nspawn-register.h" +#include "nspawn-seccomp.h" #include "nspawn-settings.h" #include "nspawn-setuid.h" #include "nspawn-stub-pid1.h" -#include "nspawn-seccomp.h" #include "parse-util.h" #include "path-util.h" #include "process-util.h" @@ -101,9 +101,11 @@ #include "util.h" /* Note that devpts's gid= parameter parses GIDs as signed values, hence we stay away from the upper half of the 32bit - * UID range here */ + * UID range here. We leave a bit of room at the lower end and a lot of room at the upper end, so that other subsystems + * may have their own allocation ranges too. */ #define UID_SHIFT_PICK_MIN ((uid_t) UINT32_C(0x00080000)) #define UID_SHIFT_PICK_MAX ((uid_t) UINT32_C(0x6FFF0000)) + /* nspawn is listening on the socket at the path in the constant nspawn_notify_socket_path * nspawn_notify_socket_path is relative to the container * the init process in the container pid can send messages to nspawn following the sd_notify(3) protocol */ @@ -192,6 +194,7 @@ static int arg_settings_trusted = -1; static char **arg_parameters = NULL; static const char *arg_container_service_name = "systemd-nspawn"; static bool arg_notify_ready = false; +static bool arg_use_cgns = true; static void help(void) { printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n" @@ -277,7 +280,6 @@ static void help(void) { , program_invocation_short_name); } - static int custom_mounts_prepare(void) { unsigned i; int r; @@ -593,9 +595,12 @@ static int parse_argv(int argc, char *argv[]) { case ARG_UUID: r = sd_id128_from_string(optarg, &arg_uuid); - if (r < 0) { - log_error("Invalid UUID: %s", optarg); - return r; + if (r < 0) + return log_error_errno(r, "Invalid UUID: %s", optarg); + + if (sd_id128_is_null(arg_uuid)) { + log_error("Machine UUID may not be all zeroes."); + return -EINVAL; } arg_settings_mask |= SETTING_MACHINE_ID; @@ -1100,6 +1105,12 @@ static int parse_argv(int argc, char *argv[]) { if (e) arg_container_service_name = e; + r = getenv_bool("SYSTEMD_NSPAWN_USE_CGNS"); + if (r < 0) + arg_use_cgns = cg_ns_supported(); + else + arg_use_cgns = r; + return 1; } @@ -1265,20 +1276,9 @@ static int setup_resolv_conf(const char *dest) { return 0; } -static char* id128_format_as_uuid(sd_id128_t id, char s[37]) { - assert(s); - - snprintf(s, 37, - "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", - SD_ID128_FORMAT_VAL(id)); - - return s; -} - static int setup_boot_id(const char *dest) { + sd_id128_t rnd = SD_ID128_NULL; const char *from, *to; - sd_id128_t rnd = {}; - char as_uuid[37]; int r; if (arg_share_system) @@ -1294,18 +1294,16 @@ static int setup_boot_id(const char *dest) { if (r < 0) return log_error_errno(r, "Failed to generate random boot id: %m"); - id128_format_as_uuid(rnd, as_uuid); - - r = write_string_file(from, as_uuid, WRITE_STRING_FILE_CREATE); + r = id128_write(from, ID128_UUID, rnd, false); if (r < 0) return log_error_errno(r, "Failed to write boot id: %m"); if (mount(from, to, NULL, MS_BIND, NULL) < 0) r = log_error_errno(errno, "Failed to bind mount boot id: %m"); else if (mount(NULL, to, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_NOSUID|MS_NODEV, NULL) < 0) - log_warning_errno(errno, "Failed to make boot id read-only: %m"); + log_warning_errno(errno, "Failed to make boot id read-only, ignoring: %m"); - unlink(from); + (void) unlink(from); return r; } @@ -1803,17 +1801,18 @@ static int dissect_image( char **root_device, bool *root_device_rw, char **home_device, bool *home_device_rw, char **srv_device, bool *srv_device_rw, + char **esp_device, bool *secondary) { #ifdef HAVE_BLKID - int home_nr = -1, srv_nr = -1; + int home_nr = -1, srv_nr = -1, esp_nr = -1; #ifdef GPT_ROOT_NATIVE int root_nr = -1; #endif #ifdef GPT_ROOT_SECONDARY int secondary_root_nr = -1; #endif - _cleanup_free_ char *home = NULL, *root = NULL, *secondary_root = NULL, *srv = NULL, *generic = NULL; + _cleanup_free_ char *home = NULL, *root = NULL, *secondary_root = NULL, *srv = NULL, *esp = NULL, *generic = NULL; _cleanup_udev_enumerate_unref_ struct udev_enumerate *e = NULL; _cleanup_udev_device_unref_ struct udev_device *d = NULL; _cleanup_blkid_free_probe_ blkid_probe b = NULL; @@ -1831,6 +1830,7 @@ static int dissect_image( assert(root_device); assert(home_device); assert(srv_device); + assert(esp_device); assert(secondary); assert(arg_image); @@ -2044,6 +2044,16 @@ static int dissect_image( r = free_and_strdup(&srv, node); if (r < 0) return log_oom(); + } else if (sd_id128_equal(type_id, GPT_ESP)) { + + if (esp && nr >= esp_nr) + continue; + + esp_nr = nr; + + r = free_and_strdup(&esp, node); + if (r < 0) + return log_oom(); } #ifdef GPT_ROOT_NATIVE else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) { @@ -2161,6 +2171,11 @@ static int dissect_image( *srv_device_rw = srv_rw; } + if (esp) { + *esp_device = esp; + esp = NULL; + } + return 0; #else log_error("--image= is not supported, compiled without blkid support."); @@ -2231,33 +2246,37 @@ static int mount_device(const char *what, const char *where, const char *directo } static int setup_machine_id(const char *directory) { + const char *etc_machine_id; + sd_id128_t id; int r; - const char *etc_machine_id, *t; - _cleanup_free_ char *s = NULL; - etc_machine_id = prefix_roota(directory, "/etc/machine-id"); + /* If the UUID in the container is already set, then that's what counts, and we use. If it isn't set, and the + * caller passed --uuid=, then we'll pass it in the $container_uuid env var to PID 1 of the container. The + * assumption is that PID 1 will then write it to /etc/machine-id to make it persistent. If --uuid= is not + * passed we generate a random UUID, and pass it via $container_uuid. In effect this means that /etc/machine-id + * in the container and our idea of the container UUID will always be in sync (at least if PID 1 in the + * container behaves nicely). */ - r = read_one_line_file(etc_machine_id, &s); - if (r < 0) - return log_error_errno(r, "Failed to read machine ID from %s: %m", etc_machine_id); + etc_machine_id = prefix_roota(directory, "/etc/machine-id"); - t = strstrip(s); + r = id128_read(etc_machine_id, ID128_PLAIN, &id); + if (r < 0) { + if (!IN_SET(r, -ENOENT, -ENOMEDIUM)) /* If the file is missing or empty, we don't mind */ + return log_error_errno(r, "Failed to read machine ID from container image: %m"); - if (!isempty(t)) { - r = sd_id128_from_string(t, &arg_uuid); - if (r < 0) - return log_error_errno(r, "Failed to parse machine ID from %s: %m", etc_machine_id); - } else { if (sd_id128_is_null(arg_uuid)) { r = sd_id128_randomize(&arg_uuid); if (r < 0) - return log_error_errno(r, "Failed to generate random machine ID: %m"); + return log_error_errno(r, "Failed to acquire randomized machine UUID: %m"); + } + } else { + if (sd_id128_is_null(id)) { + log_error("Machine ID in container image is zero, refusing."); + return -EINVAL; } - } - r = machine_id_setup(directory, arg_uuid); - if (r < 0) - return log_error_errno(r, "Failed to setup machine ID: %m"); + arg_uuid = id; + } return 0; } @@ -2289,7 +2308,8 @@ static int mount_devices( const char *where, const char *root_device, bool root_device_rw, const char *home_device, bool home_device_rw, - const char *srv_device, bool srv_device_rw) { + const char *srv_device, bool srv_device_rw, + const char *esp_device) { int r; assert(where); @@ -2312,6 +2332,27 @@ static int mount_devices( return log_error_errno(r, "Failed to mount server data directory: %m"); } + if (esp_device) { + const char *mp, *x; + + /* Mount the ESP to /efi if it exists and is empty. If it doesn't exist, use /boot instead. */ + + mp = "/efi"; + x = strjoina(arg_directory, mp); + r = dir_is_empty(x); + if (r == -ENOENT) { + mp = "/boot"; + x = strjoina(arg_directory, mp); + r = dir_is_empty(x); + } + + if (r > 0) { + r = mount_device(esp_device, arg_directory, mp, true); + if (r < 0) + return log_error_errno(r, "Failed to mount ESP: %m"); + } + } + return 0; } @@ -2594,9 +2635,25 @@ static int inner_child( return -ESRCH; } - r = mount_systemd_cgroup_writable("", arg_unified_cgroup_hierarchy); - if (r < 0) - return r; + if (arg_use_cgns && cg_ns_supported()) { + r = unshare(CLONE_NEWCGROUP); + if (r < 0) + return log_error_errno(errno, "Failed to unshare cgroup namespace"); + r = mount_cgroups( + "", + arg_unified_cgroup_hierarchy, + arg_userns_mode != USER_NAMESPACE_NO, + arg_uid_shift, + arg_uid_range, + arg_selinux_apifs_context, + arg_use_cgns); + if (r < 0) + return r; + } else { + r = mount_systemd_cgroup_writable("", arg_unified_cgroup_hierarchy); + if (r < 0) + return r; + } r = reset_uid_gid(); if (r < 0) @@ -2662,9 +2719,9 @@ static int inner_child( (asprintf((char**)(envp + n_env++), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) return log_oom(); - assert(!sd_id128_equal(arg_uuid, SD_ID128_NULL)); + assert(!sd_id128_is_null(arg_uuid)); - if (asprintf((char**)(envp + n_env++), "container_uuid=%s", id128_format_as_uuid(arg_uuid, as_uuid)) < 0) + if (asprintf((char**)(envp + n_env++), "container_uuid=%s", id128_to_uuid_string(arg_uuid, as_uuid)) < 0) return log_oom(); if (fdset_size(fds) > 0) { @@ -2785,6 +2842,7 @@ static int outer_child( const char *root_device, bool root_device_rw, const char *home_device, bool home_device_rw, const char *srv_device, bool srv_device_rw, + const char *esp_device, bool interactive, bool secondary, int pid_socket, @@ -2846,7 +2904,8 @@ static int outer_child( r = mount_devices(directory, root_device, root_device_rw, home_device, home_device_rw, - srv_device, srv_device_rw); + srv_device, srv_device_rw, + esp_device); if (r < 0) return r; @@ -2978,15 +3037,18 @@ static int outer_child( if (r < 0) return r; - r = mount_cgroups( - directory, - arg_unified_cgroup_hierarchy, - arg_userns_mode != USER_NAMESPACE_NO, - arg_uid_shift, - arg_uid_range, - arg_selinux_apifs_context); - if (r < 0) - return r; + if (!arg_use_cgns || !cg_ns_supported()) { + r = mount_cgroups( + directory, + arg_unified_cgroup_hierarchy, + arg_userns_mode != USER_NAMESPACE_NO, + arg_uid_shift, + arg_uid_range, + arg_selinux_apifs_context, + arg_use_cgns); + if (r < 0) + return r; + } r = mount_move_root(directory); if (r < 0) @@ -3449,7 +3511,7 @@ static int load_settings(void) { int main(int argc, char *argv[]) { - _cleanup_free_ char *device_path = NULL, *root_device = NULL, *home_device = NULL, *srv_device = NULL, *console = NULL; + _cleanup_free_ char *device_path = NULL, *root_device = NULL, *home_device = NULL, *srv_device = NULL, *esp_device = NULL, *console = NULL; bool root_device_rw = true, home_device_rw = true, srv_device_rw = true; _cleanup_close_ int master = -1, image_fd = -1; _cleanup_fdset_free_ FDSet *fds = NULL; @@ -3558,7 +3620,7 @@ int main(int argc, char *argv[]) { } if (r < 0) { log_error_errno(r, "Failed to lock %s: %m", arg_directory); - return r; + goto finish; } if (arg_template) { @@ -3631,6 +3693,7 @@ int main(int argc, char *argv[]) { &root_device, &root_device_rw, &home_device, &home_device_rw, &srv_device, &srv_device_rw, + &esp_device, &secondary); if (r < 0) goto finish; @@ -3805,6 +3868,7 @@ int main(int argc, char *argv[]) { root_device, root_device_rw, home_device, home_device_rw, srv_device, srv_device_rw, + esp_device, interactive, secondary, pid_socket_pair[1], diff --git a/src/nss-systemd/Makefile b/src/nss-systemd/Makefile new file mode 120000 index 0000000000..d0b0e8e008 --- /dev/null +++ b/src/nss-systemd/Makefile @@ -0,0 +1 @@ +../Makefile
\ No newline at end of file diff --git a/src/nss-systemd/nss-systemd.c b/src/nss-systemd/nss-systemd.c new file mode 100644 index 0000000000..e7a4393bb0 --- /dev/null +++ b/src/nss-systemd/nss-systemd.c @@ -0,0 +1,332 @@ +/*** + This file is part of systemd. + + Copyright 2016 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <nss.h> + +#include "sd-bus.h" + +#include "bus-common-errors.h" +#include "env-util.h" +#include "macro.h" +#include "nss-util.h" +#include "signal-util.h" +#include "user-util.h" +#include "util.h" + +NSS_GETPW_PROTOTYPES(systemd); +NSS_GETGR_PROTOTYPES(systemd); + +enum nss_status _nss_systemd_getpwnam_r( + const char *name, + struct passwd *pwd, + char *buffer, size_t buflen, + int *errnop) { + + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message* reply = NULL; + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + uint32_t translated; + size_t l; + int r; + + BLOCK_SIGNALS(NSS_SIGNALS_BLOCK); + + assert(name); + assert(pwd); + + /* Make sure that we don't go in circles when allocating a dynamic UID by checking our own database */ + if (getenv_bool("SYSTEMD_NSS_DYNAMIC_BYPASS") > 0) + goto not_found; + + r = sd_bus_open_system(&bus); + if (r < 0) + goto fail; + + r = sd_bus_call_method(bus, + "org.freedesktop.systemd1", + "/org/freedesktop/systemd1", + "org.freedesktop.systemd1.Manager", + "LookupDynamicUserByName", + &error, + &reply, + "s", + name); + if (r < 0) { + if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_DYNAMIC_USER)) + goto not_found; + + goto fail; + } + + r = sd_bus_message_read(reply, "u", &translated); + if (r < 0) + goto fail; + + l = strlen(name); + if (buflen < l+1) { + *errnop = ENOMEM; + return NSS_STATUS_TRYAGAIN; + } + + memcpy(buffer, name, l+1); + + pwd->pw_name = buffer; + pwd->pw_uid = (uid_t) translated; + pwd->pw_gid = (uid_t) translated; + pwd->pw_gecos = (char*) "Dynamic User"; + pwd->pw_passwd = (char*) "*"; /* locked */ + pwd->pw_dir = (char*) "/"; + pwd->pw_shell = (char*) "/sbin/nologin"; + + *errnop = 0; + return NSS_STATUS_SUCCESS; + +not_found: + *errnop = 0; + return NSS_STATUS_NOTFOUND; + +fail: + *errnop = -r; + return NSS_STATUS_UNAVAIL; +} + +enum nss_status _nss_systemd_getpwuid_r( + uid_t uid, + struct passwd *pwd, + char *buffer, size_t buflen, + int *errnop) { + + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message* reply = NULL; + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + const char *translated; + size_t l; + int r; + + BLOCK_SIGNALS(NSS_SIGNALS_BLOCK); + + if (!uid_is_valid(uid)) { + r = -EINVAL; + goto fail; + } + + if (uid <= SYSTEM_UID_MAX) + goto not_found; + + if (getenv_bool("SYSTEMD_NSS_DYNAMIC_BYPASS") > 0) + goto not_found; + + r = sd_bus_open_system(&bus); + if (r < 0) + goto fail; + + r = sd_bus_call_method(bus, + "org.freedesktop.systemd1", + "/org/freedesktop/systemd1", + "org.freedesktop.systemd1.Manager", + "LookupDynamicUserByUID", + &error, + &reply, + "u", + (uint32_t) uid); + if (r < 0) { + if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_DYNAMIC_USER)) + goto not_found; + + goto fail; + } + + r = sd_bus_message_read(reply, "s", &translated); + if (r < 0) + goto fail; + + l = strlen(translated) + 1; + if (buflen < l) { + *errnop = ENOMEM; + return NSS_STATUS_TRYAGAIN; + } + + memcpy(buffer, translated, l); + + pwd->pw_name = buffer; + pwd->pw_uid = uid; + pwd->pw_gid = uid; + pwd->pw_gecos = (char*) "Dynamic User"; + pwd->pw_passwd = (char*) "*"; /* locked */ + pwd->pw_dir = (char*) "/"; + pwd->pw_shell = (char*) "/sbin/nologin"; + + *errnop = 0; + return NSS_STATUS_SUCCESS; + +not_found: + *errnop = 0; + return NSS_STATUS_NOTFOUND; + +fail: + *errnop = -r; + return NSS_STATUS_UNAVAIL; +} + +enum nss_status _nss_systemd_getgrnam_r( + const char *name, + struct group *gr, + char *buffer, size_t buflen, + int *errnop) { + + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message* reply = NULL; + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + uint32_t translated; + size_t l; + int r; + + BLOCK_SIGNALS(NSS_SIGNALS_BLOCK); + + assert(name); + assert(gr); + + if (getenv_bool("SYSTEMD_NSS_DYNAMIC_BYPASS") > 0) + goto not_found; + + r = sd_bus_open_system(&bus); + if (r < 0) + goto fail; + + r = sd_bus_call_method(bus, + "org.freedesktop.systemd1", + "/org/freedesktop/systemd1", + "org.freedesktop.systemd1.Manager", + "LookupDynamicUserByName", + &error, + &reply, + "s", + name); + if (r < 0) { + if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_DYNAMIC_USER)) + goto not_found; + + goto fail; + } + + r = sd_bus_message_read(reply, "u", &translated); + if (r < 0) + goto fail; + + l = sizeof(char*) + strlen(name) + 1; + if (buflen < l) { + *errnop = ENOMEM; + return NSS_STATUS_TRYAGAIN; + } + + memzero(buffer, sizeof(char*)); + strcpy(buffer + sizeof(char*), name); + + gr->gr_name = buffer + sizeof(char*); + gr->gr_gid = (gid_t) translated; + gr->gr_passwd = (char*) "*"; /* locked */ + gr->gr_mem = (char**) buffer; + + *errnop = 0; + return NSS_STATUS_SUCCESS; + +not_found: + *errnop = 0; + return NSS_STATUS_NOTFOUND; + +fail: + *errnop = -r; + return NSS_STATUS_UNAVAIL; +} + +enum nss_status _nss_systemd_getgrgid_r( + gid_t gid, + struct group *gr, + char *buffer, size_t buflen, + int *errnop) { + + _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; + _cleanup_(sd_bus_message_unrefp) sd_bus_message* reply = NULL; + _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; + const char *translated; + size_t l; + int r; + + BLOCK_SIGNALS(NSS_SIGNALS_BLOCK); + + if (!gid_is_valid(gid)) { + r = -EINVAL; + goto fail; + } + + if (gid <= SYSTEM_GID_MAX) + goto not_found; + + if (getenv_bool("SYSTEMD_NSS_DYNAMIC_BYPASS") > 0) + goto not_found; + + r = sd_bus_open_system(&bus); + if (r < 0) + goto fail; + + r = sd_bus_call_method(bus, + "org.freedesktop.systemd1", + "/org/freedesktop/systemd1", + "org.freedesktop.systemd1.Manager", + "LookupDynamicUserByUID", + &error, + &reply, + "u", + (uint32_t) gid); + if (r < 0) { + if (sd_bus_error_has_name(&error, BUS_ERROR_NO_SUCH_DYNAMIC_USER)) + goto not_found; + + goto fail; + } + + r = sd_bus_message_read(reply, "s", &translated); + if (r < 0) + goto fail; + + l = sizeof(char*) + strlen(translated) + 1; + if (buflen < l) { + *errnop = ENOMEM; + return NSS_STATUS_TRYAGAIN; + } + + memzero(buffer, sizeof(char*)); + strcpy(buffer + sizeof(char*), translated); + + gr->gr_name = buffer + sizeof(char*); + gr->gr_gid = gid; + gr->gr_passwd = (char*) "*"; /* locked */ + gr->gr_mem = (char**) buffer; + + *errnop = 0; + return NSS_STATUS_SUCCESS; + +not_found: + *errnop = 0; + return NSS_STATUS_NOTFOUND; + +fail: + *errnop = -r; + return NSS_STATUS_UNAVAIL; +} diff --git a/src/nss-systemd/nss-systemd.sym b/src/nss-systemd/nss-systemd.sym new file mode 100644 index 0000000000..955078788a --- /dev/null +++ b/src/nss-systemd/nss-systemd.sym @@ -0,0 +1,17 @@ +/*** + This file is part of systemd. + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. +***/ + +{ +global: + _nss_systemd_getpwnam_r; + _nss_systemd_getpwuid_r; + _nss_systemd_getgrnam_r; + _nss_systemd_getgrgid_r; +local: *; +}; diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index 94ffa8af87..14bf8ad627 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -132,10 +132,10 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen char *n; /* When this is a percentage we'll convert this into a relative value in the range - * 0…UINT32_MAX and pass it in the MemoryLowByPhysicalMemory property (and related + * 0…UINT32_MAX and pass it in the MemoryLowScale property (and related * ones). This way the physical memory size can be determined server-side */ - n = strjoina(field, "ByPhysicalMemory"); + n = strjoina(field, "Scale"); r = sd_bus_message_append(m, "sv", n, "u", (uint32_t) (((uint64_t) UINT32_MAX * r) / 100U)); goto finish; @@ -148,6 +148,26 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen r = sd_bus_message_append(m, "sv", field, "t", bytes); goto finish; + } else if (streq(field, "TasksMax")) { + uint64_t t; + + if (isempty(eq) || streq(eq, "infinity")) + t = (uint64_t) -1; + else { + r = parse_percent(eq); + if (r >= 0) { + r = sd_bus_message_append(m, "sv", "TasksMaxScale", "u", (uint32_t) (((uint64_t) UINT32_MAX * r) / 100U)); + goto finish; + } else { + r = safe_atou64(eq, &t); + if (r < 0) + return log_error_errno(r, "Failed to parse maximum tasks specification %s", assignment); + } + + } + + r = sd_bus_message_append(m, "sv", "TasksMax", "t", t); + goto finish; } r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field); @@ -179,11 +199,12 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen r = sd_bus_message_append(m, "sv", sn, "t", l.rlim_cur); } else if (STR_IN_SET(field, - "CPUAccounting", "MemoryAccounting", "IOAccounting", "BlockIOAccounting", "TasksAccounting", - "SendSIGHUP", "SendSIGKILL", "WakeSystem", "DefaultDependencies", - "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "RemainAfterExit", - "PrivateTmp", "PrivateDevices", "PrivateNetwork", "NoNewPrivileges", - "SyslogLevelPrefix", "Delegate", "RemainAfterElapse", "MemoryDenyWriteExecute")) { + "CPUAccounting", "MemoryAccounting", "IOAccounting", "BlockIOAccounting", "TasksAccounting", + "SendSIGHUP", "SendSIGKILL", "WakeSystem", "DefaultDependencies", + "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "RemainAfterExit", + "PrivateTmp", "PrivateDevices", "PrivateNetwork", "NoNewPrivileges", + "SyslogLevelPrefix", "Delegate", "RemainAfterElapse", "MemoryDenyWriteExecute", + "RestrictRealtime", "DynamicUser")) { r = parse_boolean(eq); if (r < 0) @@ -191,21 +212,6 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen r = sd_bus_message_append(m, "v", "b", r); - } else if (streq(field, "TasksMax")) { - uint64_t n; - - if (isempty(eq) || streq(eq, "infinity")) - n = (uint64_t) -1; - else { - r = safe_atou64(eq, &n); - if (r < 0) { - log_error("Failed to parse maximum tasks specification %s", assignment); - return -EINVAL; - } - } - - r = sd_bus_message_append(m, "v", "t", n); - } else if (STR_IN_SET(field, "CPUShares", "StartupCPUShares")) { uint64_t u; diff --git a/src/shared/conf-parser.c b/src/shared/conf-parser.c index 83be79a4f5..7cf222e4d2 100644 --- a/src/shared/conf-parser.c +++ b/src/shared/conf-parser.c @@ -323,8 +323,7 @@ int config_parse(const char *unit, if (feof(f)) break; - log_error_errno(errno, "Failed to read configuration file '%s': %m", filename); - return -errno; + return log_error_errno(errno, "Failed to read configuration file '%s': %m", filename); } l = buf; @@ -708,6 +707,7 @@ int config_parse_strv(const char *unit, void *userdata) { char ***sv = data; + int r; assert(filename); assert(lvalue); @@ -721,18 +721,19 @@ int config_parse_strv(const char *unit, * we actually fill in a real empty array here rather * than NULL, since some code wants to know if * something was set at all... */ - empty = strv_new(NULL, NULL); + empty = new0(char*, 1); if (!empty) return log_oom(); strv_free(*sv); *sv = empty; + return 0; } for (;;) { char *word = NULL; - int r; + r = extract_first_word(&rvalue, &word, WHITESPACE, EXTRACT_QUOTES|EXTRACT_RETAIN_ESCAPE); if (r == 0) break; diff --git a/src/shared/install.c b/src/shared/install.c index 23cab96c50..7b49e1ece9 100644 --- a/src/shared/install.c +++ b/src/shared/install.c @@ -2215,7 +2215,7 @@ int unit_file_enable( config_path = runtime ? paths.runtime_config : paths.persistent_config; STRV_FOREACH(f, files) { - r = install_info_discover(scope, &c, &paths, *f, SEARCH_LOAD, &i); + r = install_info_discover(scope, &c, &paths, *f, SEARCH_LOAD|SEARCH_FOLLOW_CONFIG_SYMLINKS, &i); if (r < 0) return r; r = install_info_may_process(i, &paths, changes, n_changes); diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c index d3f437411a..782824ff38 100644 --- a/src/systemctl/systemctl.c +++ b/src/systemctl/systemctl.c @@ -224,6 +224,21 @@ static void release_busses(void) { busses[w] = sd_bus_flush_close_unref(busses[w]); } +static int map_string_no_copy(sd_bus *bus, const char *member, sd_bus_message *m, sd_bus_error *error, void *userdata) { + char *s; + const char **p = userdata; + int r; + + r = sd_bus_message_read_basic(m, SD_BUS_TYPE_STRING, &s); + if (r < 0) + return r; + + if (!isempty(s)) + *p = s; + + return 0; +} + static void ask_password_agent_open_if_enabled(void) { /* Open the password agent as a child process if necessary */ @@ -1820,12 +1835,12 @@ static const struct bus_properties_map machine_info_property_map[] = { }; static void machine_info_clear(struct machine_info *info) { - if (info) { - free(info->name); - free(info->state); - free(info->control_group); - zero(*info); - } + assert(info); + + free(info->name); + free(info->state); + free(info->control_group); + zero(*info); } static void free_machines_list(struct machine_info *machine_infos, int n) { @@ -3033,6 +3048,9 @@ static int logind_check_inhibitors(enum action a) { if (!on_tty()) return 0; + if (arg_transport != BUS_TRANSPORT_LOCAL) + return 0; + r = acquire_bus(BUS_FULL, &bus); if (r < 0) return r; @@ -3455,6 +3473,27 @@ static int exec_status_info_deserialize(sd_bus_message *m, ExecStatusInfo *i) { return 1; } +typedef struct UnitCondition { + char *name; + char *param; + bool trigger; + bool negate; + int tristate; + + LIST_FIELDS(struct UnitCondition, conditions); +} UnitCondition; + +static void unit_condition_free(UnitCondition *c) { + if (!c) + return; + + free(c->name); + free(c->param); + free(c); +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(UnitCondition*, unit_condition_free); + typedef struct UnitStatusInfo { const char *id; const char *load_state; @@ -3501,10 +3540,7 @@ typedef struct UnitStatusInfo { usec_t condition_timestamp; bool condition_result; - bool failed_condition_trigger; - bool failed_condition_negate; - const char *failed_condition; - const char *failed_condition_parameter; + LIST_HEAD(UnitCondition, conditions); usec_t assert_timestamp; bool assert_result; @@ -3543,6 +3579,25 @@ typedef struct UnitStatusInfo { LIST_HEAD(ExecStatusInfo, exec); } UnitStatusInfo; +static void unit_status_info_free(UnitStatusInfo *info) { + ExecStatusInfo *p; + UnitCondition *c; + + strv_free(info->documentation); + strv_free(info->dropin_paths); + strv_free(info->listen); + + while ((c = info->conditions)) { + LIST_REMOVE(conditions, info->conditions, c); + unit_condition_free(c); + } + + while ((p = info->exec)) { + LIST_REMOVE(exec, info->exec, p); + exec_status_info_free(p); + } +} + static void print_status_info( sd_bus *bus, UnitStatusInfo *i, @@ -3664,19 +3719,28 @@ static void print_status_info( printf("\n"); if (!i->condition_result && i->condition_timestamp > 0) { + UnitCondition *c; + int n = 0; + s1 = format_timestamp_relative(since1, sizeof(since1), i->condition_timestamp); s2 = format_timestamp(since2, sizeof(since2), i->condition_timestamp); printf("Condition: start %scondition failed%s at %s%s%s\n", ansi_highlight_yellow(), ansi_normal(), s2, s1 ? "; " : "", strempty(s1)); - if (i->failed_condition_trigger) - printf(" none of the trigger conditions were met\n"); - else if (i->failed_condition) - printf(" %s=%s%s was not met\n", - i->failed_condition, - i->failed_condition_negate ? "!" : "", - i->failed_condition_parameter); + + LIST_FOREACH(conditions, c, i->conditions) + if (c->tristate < 0) + n++; + + LIST_FOREACH(conditions, c, i->conditions) + if (c->tristate < 0) + printf(" %s %s=%s%s%s was not met\n", + --n ? special_glyph(TREE_BRANCH) : special_glyph(TREE_RIGHT), + c->name, + c->trigger ? "|" : "", + c->negate ? "!" : "", + c->param); } if (!i->assert_result && i->assert_timestamp > 0) { @@ -3761,7 +3825,7 @@ static void print_status_info( if (i->running) { _cleanup_free_ char *comm = NULL; - get_process_comm(i->main_pid, &comm); + (void) get_process_comm(i->main_pid, &comm); if (comm) printf(" (%s)", comm); } else if (i->exit_code > 0) { @@ -3780,17 +3844,19 @@ static void print_status_info( printf("signal=%s", signal_to_string(i->exit_status)); printf(")"); } - - if (i->control_pid > 0) - printf(";"); } if (i->control_pid > 0) { _cleanup_free_ char *c = NULL; - printf(" %8s: "PID_FMT, i->main_pid ? "" : " Control", i->control_pid); + if (i->main_pid > 0) + fputs("; Control PID: ", stdout); + else + fputs("Cntrl PID: ", stdout); /* if first in column, abbreviated so it fits alignment */ + + printf(PID_FMT, i->control_pid); - get_process_comm(i->control_pid, &c); + (void) get_process_comm(i->control_pid, &c); if (c) printf(" (%s)", c); } @@ -3807,7 +3873,7 @@ static void print_status_info( printf(" Tasks: %" PRIu64, i->tasks_current); if (i->tasks_max != (uint64_t) -1) - printf(" (limit: %" PRIi64 ")\n", i->tasks_max); + printf(" (limit: %" PRIu64 ")\n", i->tasks_max); else printf("\n"); } @@ -4169,13 +4235,25 @@ static int status_property(const char *name, sd_bus_message *m, UnitStatusInfo * return bus_log_parse_error(r); while ((r = sd_bus_message_read(m, "(sbbsi)", &cond, &trigger, &negate, ¶m, &state)) > 0) { - log_debug("%s %d %d %s %d", cond, trigger, negate, param, state); - if (state < 0 && (!trigger || !i->failed_condition)) { - i->failed_condition = cond; - i->failed_condition_trigger = trigger; - i->failed_condition_negate = negate; - i->failed_condition_parameter = param; - } + _cleanup_(unit_condition_freep) UnitCondition *c = NULL; + + log_debug("%s trigger=%d negate=%d %s →%d", cond, trigger, negate, param, state); + + c = new0(UnitCondition, 1); + if (!c) + return log_oom(); + + c->name = strdup(cond); + c->param = strdup(param); + if (!c->name || !c->param) + return log_oom(); + + c->trigger = trigger; + c->negate = negate; + c->tristate = state; + + LIST_PREPEND(conditions, i->conditions, c); + c = NULL; } if (r < 0) return bus_log_parse_error(r); @@ -4565,15 +4643,15 @@ static int show_one( bool *ellipsized) { static const struct bus_properties_map property_map[] = { - { "LoadState", "s", NULL, offsetof(UnitStatusInfo, load_state) }, - { "ActiveState", "s", NULL, offsetof(UnitStatusInfo, active_state) }, + { "LoadState", "s", map_string_no_copy, offsetof(UnitStatusInfo, load_state) }, + { "ActiveState", "s", map_string_no_copy, offsetof(UnitStatusInfo, active_state) }, {} }; _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; _cleanup_set_free_ Set *found_properties = NULL; - UnitStatusInfo info = { + _cleanup_(unit_status_info_free) UnitStatusInfo info = { .memory_current = (uint64_t) -1, .memory_high = CGROUP_LIMIT_MAX, .memory_max = CGROUP_LIMIT_MAX, @@ -4582,7 +4660,6 @@ static int show_one( .tasks_current = (uint64_t) -1, .tasks_max = (uint64_t) -1, }; - ExecStatusInfo *p; int r; assert(path); @@ -4676,16 +4753,15 @@ static int show_one( return bus_log_parse_error(r); r = 0; - if (show_properties) { char **pp; - STRV_FOREACH(pp, arg_properties) { + STRV_FOREACH(pp, arg_properties) if (!set_contains(found_properties, *pp)) { log_warning("Property %s does not exist.", *pp); r = -ENXIO; } - } + } else if (streq(verb, "help")) show_unit_help(&info); else if (streq(verb, "status")) { @@ -4697,15 +4773,6 @@ static int show_one( r = EXIT_PROGRAM_RUNNING_OR_SERVICE_OK; } - strv_free(info.documentation); - strv_free(info.dropin_paths); - strv_free(info.listen); - - while ((p = info.exec)) { - LIST_REMOVE(exec, info.exec, p); - exec_status_info_free(p); - } - return r; } @@ -5499,10 +5566,12 @@ static int enable_sysv_units(const char *verb, char **args) { if (!found_sysv) continue; - if (found_native) - log_info("Synchronizing state of %s with SysV service script with %s.", name, argv[0]); - else - log_info("%s is not a native service, redirecting to systemd-sysv-install.", name); + if (!arg_quiet) { + if (found_native) + log_info("Synchronizing state of %s with SysV service script with %s.", name, argv[0]); + else + log_info("%s is not a native service, redirecting to systemd-sysv-install.", name); + } if (!isempty(arg_root)) argv[c++] = q = strappend("--root=", arg_root); @@ -5532,10 +5601,8 @@ static int enable_sysv_units(const char *verb, char **args) { } j = wait_for_terminate(pid, &status); - if (j < 0) { - log_error_errno(j, "Failed to wait for child: %m"); - return j; - } + if (j < 0) + return log_error_errno(j, "Failed to wait for child: %m"); if (status.si_code == CLD_EXITED) { if (streq(verb, "is-enabled")) { @@ -5605,13 +5672,36 @@ static int mangle_names(char **original_names, char ***mangled_names) { return 0; } +static int normalize_names(char **names, bool warn_if_path) { + char **u; + bool was_path = false; + + STRV_FOREACH(u, names) { + int r; + + if (!is_path(*u)) + continue; + + r = free_and_strdup(u, basename(*u)); + if (r < 0) + return log_error_errno(r, "Failed to normalize unit file path: %m"); + + was_path = true; + } + + if (warn_if_path && was_path) + log_warning("Warning: Can't execute disable on the unit file path. Proceeding with the unit name."); + + return 0; +} + static int unit_exists(const char *unit) { _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; _cleanup_free_ char *path = NULL; static const struct bus_properties_map property_map[] = { - { "LoadState", "s", NULL, offsetof(UnitStatusInfo, load_state) }, - { "ActiveState", "s", NULL, offsetof(UnitStatusInfo, active_state)}, + { "LoadState", "s", map_string_no_copy, offsetof(UnitStatusInfo, load_state) }, + { "ActiveState", "s", map_string_no_copy, offsetof(UnitStatusInfo, active_state)}, {}, }; UnitStatusInfo info = {}; @@ -5672,6 +5762,12 @@ static int enable_unit(int argc, char *argv[], void *userdata) { return daemon_reload(argc, argv, userdata); } + if (streq(verb, "disable")) { + r = normalize_names(names, true); + if (r < 0) + return r; + } + if (install_client_side()) { if (streq(verb, "enable")) { r = unit_file_enable(arg_scope, arg_runtime, arg_root, names, arg_force, &changes, &n_changes); @@ -6530,7 +6626,7 @@ static void systemctl_help(void) { " unit is required or wanted\n\n" "Unit File Commands:\n" " list-unit-files [PATTERN...] List installed unit files\n" - " enable NAME... Enable one or more unit files\n" + " enable [NAME...|PATH...] Enable one or more unit files\n" " disable NAME... Disable one or more unit files\n" " reenable NAME... Reenable one or more unit files\n" " preset NAME... Enable/disable one or more unit files\n" diff --git a/src/sysusers/sysusers.c b/src/sysusers/sysusers.c index 787d68a009..5d72493725 100644 --- a/src/sysusers/sysusers.c +++ b/src/sysusers/sysusers.c @@ -1299,81 +1299,6 @@ static bool item_equal(Item *a, Item *b) { return true; } -static bool valid_user_group_name(const char *u) { - const char *i; - long sz; - - if (isempty(u)) - return false; - - if (!(u[0] >= 'a' && u[0] <= 'z') && - !(u[0] >= 'A' && u[0] <= 'Z') && - u[0] != '_') - return false; - - for (i = u+1; *i; i++) { - if (!(*i >= 'a' && *i <= 'z') && - !(*i >= 'A' && *i <= 'Z') && - !(*i >= '0' && *i <= '9') && - *i != '_' && - *i != '-') - return false; - } - - sz = sysconf(_SC_LOGIN_NAME_MAX); - assert_se(sz > 0); - - if ((size_t) (i-u) > (size_t) sz) - return false; - - if ((size_t) (i-u) > UT_NAMESIZE - 1) - return false; - - return true; -} - -static bool valid_gecos(const char *d) { - - if (!d) - return false; - - if (!utf8_is_valid(d)) - return false; - - if (string_has_cc(d, NULL)) - return false; - - /* Colons are used as field separators, and hence not OK */ - if (strchr(d, ':')) - return false; - - return true; -} - -static bool valid_home(const char *p) { - - if (isempty(p)) - return false; - - if (!utf8_is_valid(p)) - return false; - - if (string_has_cc(p, NULL)) - return false; - - if (!path_is_absolute(p)) - return false; - - if (!path_is_safe(p)) - return false; - - /* Colons are used as field separators, and hence not OK */ - if (strchr(p, ':')) - return false; - - return true; -} - static int parse_line(const char *fname, unsigned line, const char *buffer) { static const Specifier specifier_table[] = { diff --git a/src/test/test-cgroup.c b/src/test/test-cgroup.c index 72c32d9c8f..5336c19652 100644 --- a/src/test/test-cgroup.c +++ b/src/test/test-cgroup.c @@ -60,16 +60,16 @@ int main(int argc, char*argv[]) { assert_se(cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-a") > 0); assert_se(cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-b") == 0); - assert_se(cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-a", 0, false, false, false, NULL) == 0); - assert_se(cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-b", 0, false, false, false, NULL) > 0); + assert_se(cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-a", 0, 0, NULL, NULL, NULL) == 0); + assert_se(cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-b", 0, 0, NULL, NULL, NULL) > 0); - assert_se(cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-b", SYSTEMD_CGROUP_CONTROLLER, "/test-a", false, false) > 0); + assert_se(cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-b", SYSTEMD_CGROUP_CONTROLLER, "/test-a", 0) > 0); assert_se(cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-a") == 0); assert_se(cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-b") > 0); - assert_se(cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-a", 0, false, false, false, NULL) > 0); - assert_se(cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-b", 0, false, false, false, NULL) == 0); + assert_se(cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-a", 0, 0, NULL, NULL, NULL) > 0); + assert_se(cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, "/test-b", 0, 0, NULL, NULL, NULL) == 0); cg_trim(SYSTEMD_CGROUP_CONTROLLER, "/", false); diff --git a/src/test/test-id128.c b/src/test/test-id128.c index 96aa008c06..f01fbdd6b2 100644 --- a/src/test/test-id128.c +++ b/src/test/test-id128.c @@ -23,6 +23,9 @@ #include "sd-id128.h" #include "alloc-util.h" +#include "fd-util.h" +#include "fileio.h" +#include "id128-util.h" #include "macro.h" #include "string-util.h" #include "util.h" @@ -33,8 +36,9 @@ int main(int argc, char *argv[]) { sd_id128_t id, id2; - char t[33]; + char t[33], q[37]; _cleanup_free_ char *b = NULL; + _cleanup_close_ int fd = -1; assert_se(sd_id128_randomize(&id) == 0); printf("random: %s\n", sd_id128_to_string(id, t)); @@ -57,6 +61,17 @@ int main(int argc, char *argv[]) { printf("waldi2: %s\n", b); assert_se(streq(t, b)); + printf("waldi3: %s\n", id128_to_uuid_string(ID128_WALDI, q)); + assert_se(streq(q, UUID_WALDI)); + + b = mfree(b); + assert_se(asprintf(&b, ID128_UUID_FORMAT_STR, SD_ID128_FORMAT_VAL(ID128_WALDI)) == 36); + printf("waldi4: %s\n", b); + assert_se(streq(q, b)); + + assert_se(sd_id128_from_string(STR_WALDI, &id) >= 0); + assert_se(sd_id128_equal(id, ID128_WALDI)); + assert_se(sd_id128_from_string(UUID_WALDI, &id) >= 0); assert_se(sd_id128_equal(id, ID128_WALDI)); @@ -74,5 +89,69 @@ int main(int argc, char *argv[]) { assert_se(!id128_is_valid("01020304-0506-0708-090a0b0c0d0e0f10")); assert_se(!id128_is_valid("010203040506-0708-090a-0b0c0d0e0f10")); + fd = open_tmpfile_unlinkable(NULL, O_RDWR|O_CLOEXEC); + assert_se(fd >= 0); + + /* First, write as UUID */ + assert_se(sd_id128_randomize(&id) >= 0); + assert_se(id128_write_fd(fd, ID128_UUID, id, false) >= 0); + + assert_se(lseek(fd, 0, SEEK_SET) == 0); + assert_se(id128_read_fd(fd, ID128_PLAIN, &id2) == -EINVAL); + + assert_se(lseek(fd, 0, SEEK_SET) == 0); + assert_se(id128_read_fd(fd, ID128_UUID, &id2) >= 0); + assert_se(sd_id128_equal(id, id2)); + + assert_se(lseek(fd, 0, SEEK_SET) == 0); + assert_se(id128_read_fd(fd, ID128_ANY, &id2) >= 0); + assert_se(sd_id128_equal(id, id2)); + + /* Second, write as plain */ + assert_se(lseek(fd, 0, SEEK_SET) == 0); + assert_se(ftruncate(fd, 0) >= 0); + + assert_se(sd_id128_randomize(&id) >= 0); + assert_se(id128_write_fd(fd, ID128_PLAIN, id, false) >= 0); + + assert_se(lseek(fd, 0, SEEK_SET) == 0); + assert_se(id128_read_fd(fd, ID128_UUID, &id2) == -EINVAL); + + assert_se(lseek(fd, 0, SEEK_SET) == 0); + assert_se(id128_read_fd(fd, ID128_PLAIN, &id2) >= 0); + assert_se(sd_id128_equal(id, id2)); + + assert_se(lseek(fd, 0, SEEK_SET) == 0); + assert_se(id128_read_fd(fd, ID128_ANY, &id2) >= 0); + assert_se(sd_id128_equal(id, id2)); + + /* Third, write plain without trailing newline */ + assert_se(lseek(fd, 0, SEEK_SET) == 0); + assert_se(ftruncate(fd, 0) >= 0); + + assert_se(sd_id128_randomize(&id) >= 0); + assert_se(write(fd, sd_id128_to_string(id, t), 32) == 32); + + assert_se(lseek(fd, 0, SEEK_SET) == 0); + assert_se(id128_read_fd(fd, ID128_UUID, &id2) == -EINVAL); + + assert_se(lseek(fd, 0, SEEK_SET) == 0); + assert_se(id128_read_fd(fd, ID128_PLAIN, &id2) >= 0); + assert_se(sd_id128_equal(id, id2)); + + /* Third, write UUID without trailing newline */ + assert_se(lseek(fd, 0, SEEK_SET) == 0); + assert_se(ftruncate(fd, 0) >= 0); + + assert_se(sd_id128_randomize(&id) >= 0); + assert_se(write(fd, id128_to_uuid_string(id, t), 36) == 36); + + assert_se(lseek(fd, 0, SEEK_SET) == 0); + assert_se(id128_read_fd(fd, ID128_PLAIN, &id2) == -EINVAL); + + assert_se(lseek(fd, 0, SEEK_SET) == 0); + assert_se(id128_read_fd(fd, ID128_UUID, &id2) >= 0); + assert_se(sd_id128_equal(id, id2)); + return 0; } diff --git a/src/test/test-install-root.c b/src/test/test-install-root.c index 4b9a74fca4..db1c928660 100644 --- a/src/test/test-install-root.c +++ b/src/test/test-install-root.c @@ -301,7 +301,12 @@ static void test_linked_units(const char *root) { unit_file_changes_free(changes, n_changes); changes = NULL; n_changes = 0; - assert_se(unit_file_enable(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("linked3.service"), false, &changes, &n_changes) == -ELOOP); + assert_se(unit_file_enable(UNIT_FILE_SYSTEM, false, root, STRV_MAKE("linked3.service"), false, &changes, &n_changes) >= 0); + assert_se(n_changes == 1); + assert_se(changes[0].type == UNIT_FILE_SYMLINK); + assert_se(startswith(changes[0].path, root)); + assert_se(endswith(changes[0].path, "linked3.service")); + assert_se(streq(changes[0].source, "/opt/linked3.service")); unit_file_changes_free(changes, n_changes); changes = NULL; n_changes = 0; } diff --git a/src/test/test-user-util.c b/src/test/test-user-util.c index 8d1ec19f17..2a344a9f93 100644 --- a/src/test/test-user-util.c +++ b/src/test/test-user-util.c @@ -61,6 +61,88 @@ static void test_uid_ptr(void) { assert_se(PTR_TO_UID(UID_TO_PTR(1000)) == 1000); } +static void test_valid_user_group_name(void) { + assert_se(!valid_user_group_name(NULL)); + assert_se(!valid_user_group_name("")); + assert_se(!valid_user_group_name("1")); + assert_se(!valid_user_group_name("65535")); + assert_se(!valid_user_group_name("-1")); + assert_se(!valid_user_group_name("-kkk")); + assert_se(!valid_user_group_name("rööt")); + assert_se(!valid_user_group_name(".")); + assert_se(!valid_user_group_name("eff.eff")); + assert_se(!valid_user_group_name("foo\nbar")); + assert_se(!valid_user_group_name("0123456789012345678901234567890123456789")); + assert_se(!valid_user_group_name_or_id("aaa:bbb")); + + assert_se(valid_user_group_name("root")); + assert_se(valid_user_group_name("lennart")); + assert_se(valid_user_group_name("LENNART")); + assert_se(valid_user_group_name("_kkk")); + assert_se(valid_user_group_name("kkk-")); + assert_se(valid_user_group_name("kk-k")); + + assert_se(valid_user_group_name("some5")); + assert_se(!valid_user_group_name("5some")); + assert_se(valid_user_group_name("INNER5NUMBER")); +} + +static void test_valid_user_group_name_or_id(void) { + assert_se(!valid_user_group_name_or_id(NULL)); + assert_se(!valid_user_group_name_or_id("")); + assert_se(valid_user_group_name_or_id("0")); + assert_se(valid_user_group_name_or_id("1")); + assert_se(valid_user_group_name_or_id("65534")); + assert_se(!valid_user_group_name_or_id("65535")); + assert_se(valid_user_group_name_or_id("65536")); + assert_se(!valid_user_group_name_or_id("-1")); + assert_se(!valid_user_group_name_or_id("-kkk")); + assert_se(!valid_user_group_name_or_id("rööt")); + assert_se(!valid_user_group_name_or_id(".")); + assert_se(!valid_user_group_name_or_id("eff.eff")); + assert_se(!valid_user_group_name_or_id("foo\nbar")); + assert_se(!valid_user_group_name_or_id("0123456789012345678901234567890123456789")); + assert_se(!valid_user_group_name_or_id("aaa:bbb")); + + assert_se(valid_user_group_name_or_id("root")); + assert_se(valid_user_group_name_or_id("lennart")); + assert_se(valid_user_group_name_or_id("LENNART")); + assert_se(valid_user_group_name_or_id("_kkk")); + assert_se(valid_user_group_name_or_id("kkk-")); + assert_se(valid_user_group_name_or_id("kk-k")); + + assert_se(valid_user_group_name_or_id("some5")); + assert_se(!valid_user_group_name_or_id("5some")); + assert_se(valid_user_group_name_or_id("INNER5NUMBER")); +} + +static void test_valid_gecos(void) { + + assert_se(!valid_gecos(NULL)); + assert_se(valid_gecos("")); + assert_se(valid_gecos("test")); + assert_se(valid_gecos("Ümläüt")); + assert_se(!valid_gecos("In\nvalid")); + assert_se(!valid_gecos("In:valid")); +} + +static void test_valid_home(void) { + + assert_se(!valid_home(NULL)); + assert_se(!valid_home("")); + assert_se(!valid_home(".")); + assert_se(!valid_home("/home/..")); + assert_se(!valid_home("/home/../")); + assert_se(!valid_home("/home\n/foo")); + assert_se(!valid_home("./piep")); + assert_se(!valid_home("piep")); + assert_se(!valid_home("/home/user:lennart")); + + assert_se(valid_home("/")); + assert_se(valid_home("/home")); + assert_se(valid_home("/home/foo")); +} + int main(int argc, char*argv[]) { test_uid_to_name_one(0, "root"); @@ -75,5 +157,10 @@ int main(int argc, char*argv[]) { test_parse_uid(); test_uid_ptr(); + test_valid_user_group_name(); + test_valid_user_group_name_or_id(); + test_valid_gecos(); + test_valid_home(); + return 0; } diff --git a/src/test/test-util.c b/src/test/test-util.c index e177612a9f..1b5cba86c1 100644 --- a/src/test/test-util.c +++ b/src/test/test-util.c @@ -308,7 +308,43 @@ static void test_physical_memory_scale(void) { /* overflow */ assert_se(physical_memory_scale(UINT64_MAX/4, UINT64_MAX) == UINT64_MAX); +} + +static void test_system_tasks_max(void) { + uint64_t t; + + t = system_tasks_max(); + assert_se(t > 0); + assert_se(t < UINT64_MAX); + + log_info("Max tasks: %" PRIu64, t); +} + +static void test_system_tasks_max_scale(void) { + uint64_t t; + + t = system_tasks_max(); + + assert_se(system_tasks_max_scale(0, 100) == 0); + assert_se(system_tasks_max_scale(100, 100) == t); + + assert_se(system_tasks_max_scale(0, 1) == 0); + assert_se(system_tasks_max_scale(1, 1) == t); + assert_se(system_tasks_max_scale(2, 1) == 2*t); + + assert_se(system_tasks_max_scale(0, 2) == 0); + assert_se(system_tasks_max_scale(1, 2) == t/2); + assert_se(system_tasks_max_scale(2, 2) == t); + assert_se(system_tasks_max_scale(3, 2) == (3*t)/2); + assert_se(system_tasks_max_scale(4, 2) == t*2); + + assert_se(system_tasks_max_scale(0, UINT32_MAX) == 0); + assert_se(system_tasks_max_scale((UINT32_MAX-1)/2, UINT32_MAX-1) == t/2); + assert_se(system_tasks_max_scale(UINT32_MAX, UINT32_MAX) == t); + + /* overflow */ + assert_se(system_tasks_max_scale(UINT64_MAX/4, UINT64_MAX) == UINT64_MAX); } int main(int argc, char *argv[]) { @@ -327,6 +363,8 @@ int main(int argc, char *argv[]) { test_raw_clone(); test_physical_memory(); test_physical_memory_scale(); + test_system_tasks_max(); + test_system_tasks_max_scale(); return 0; } diff --git a/src/tmpfiles/tmpfiles.c b/src/tmpfiles/tmpfiles.c index bfb6293b3d..954f4aa985 100644 --- a/src/tmpfiles/tmpfiles.c +++ b/src/tmpfiles/tmpfiles.c @@ -1575,13 +1575,12 @@ static int clean_item_instance(Item *i, const char* instance) { d = opendir_nomod(instance); if (!d) { - if (errno == ENOENT || errno == ENOTDIR) { + if (IN_SET(errno, ENOENT, ENOTDIR)) { log_debug_errno(errno, "Directory \"%s\": %m", instance); return 0; } - log_error_errno(errno, "Failed to open directory %s: %m", instance); - return -errno; + return log_error_errno(errno, "Failed to open directory %s: %m", instance); } if (fstat(dirfd(d), &s) < 0) diff --git a/src/udev/udevd.c b/src/udev/udevd.c index a8ab208816..a893a2b3d9 100644 --- a/src/udev/udevd.c +++ b/src/udev/udevd.c @@ -1256,7 +1256,7 @@ static int on_post(sd_event_source *s, void *userdata) { return r; } else if (manager->cgroup) /* cleanup possible left-over processes in our cgroup */ - cg_kill(SYSTEMD_CGROUP_CONTROLLER, manager->cgroup, SIGKILL, false, true, NULL); + cg_kill(SYSTEMD_CGROUP_CONTROLLER, manager->cgroup, SIGKILL, CGROUP_IGNORE_SELF, NULL, NULL, NULL); } } |