diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/basic/audit.c | 9 | ||||
-rw-r--r-- | src/basic/cgroup-util.c | 65 | ||||
-rw-r--r-- | src/basic/macro.h | 3 | ||||
-rw-r--r-- | src/basic/util.c | 25 | ||||
-rw-r--r-- | src/basic/util.h | 5 | ||||
-rw-r--r-- | src/core/cgroup.c | 41 | ||||
-rw-r--r-- | src/core/cgroup.h | 1 | ||||
-rw-r--r-- | src/core/killall.c | 6 | ||||
-rw-r--r-- | src/core/manager.c | 12 | ||||
-rw-r--r-- | src/core/unit.c | 20 | ||||
-rw-r--r-- | src/libsystemd/libsystemd.sym | 6 | ||||
-rw-r--r-- | src/libsystemd/sd-bus/bus-creds.c | 8 | ||||
-rw-r--r-- | src/libsystemd/sd-event/sd-event.c | 430 | ||||
-rw-r--r-- | src/libsystemd/sd-event/test-event.c | 66 | ||||
-rw-r--r-- | src/libsystemd/sd-login/sd-login.c | 213 | ||||
-rw-r--r-- | src/libsystemd/sd-login/test-login.c | 14 | ||||
-rw-r--r-- | src/nspawn/nspawn.c | 7 | ||||
-rw-r--r-- | src/systemd/sd-login.h | 36 | ||||
-rw-r--r-- | src/test/test-cgroup-util.c | 12 | ||||
-rw-r--r-- | src/test/test-util.c | 6 |
20 files changed, 679 insertions, 306 deletions
diff --git a/src/basic/audit.c b/src/basic/audit.c index 54148fcf18..1f593aa813 100644 --- a/src/basic/audit.c +++ b/src/basic/audit.c @@ -36,6 +36,11 @@ int audit_session_from_pid(pid_t pid, uint32_t *id) { assert(id); + /* We don't convert ENOENT to ESRCH here, since we can't + * really distuingish between "audit is not available in the + * kernel" and "the process does not exist", both which will + * result in ENOENT. */ + p = procfs_file_alloca(pid, "sessionid"); r = read_one_line_file(p, &s); @@ -47,7 +52,7 @@ int audit_session_from_pid(pid_t pid, uint32_t *id) { return r; if (u == AUDIT_SESSION_INVALID || u <= 0) - return -ENXIO; + return -ENODATA; *id = u; return 0; @@ -68,6 +73,8 @@ int audit_loginuid_from_pid(pid_t pid, uid_t *uid) { return r; r = parse_uid(s, &u); + if (r == -ENXIO) /* the UID was -1 */ + return -ENODATA; if (r < 0) return r; diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 0ebe570bb8..a298b29382 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -187,7 +187,7 @@ int cg_kill(const char *controller, const char *path, int sig, bool sigcont, boo if (ignore_self && pid == my_pid) continue; - if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid)) + if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid)) continue; /* If we haven't killed this process yet, kill @@ -205,7 +205,7 @@ int cg_kill(const char *controller, const char *path, int sig, bool sigcont, boo done = false; - r = set_put(s, LONG_TO_PTR(pid)); + r = set_put(s, PID_TO_PTR(pid)); if (r < 0) { if (ret >= 0) return r; @@ -318,7 +318,7 @@ int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char if (ignore_self && pid == my_pid) continue; - if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid)) + if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid)) continue; /* Ignore kernel threads. Since they can only @@ -338,7 +338,7 @@ int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char done = false; - r = set_put(s, LONG_TO_PTR(pid)); + r = set_put(s, PID_TO_PTR(pid)); if (r < 0) { if (ret >= 0) return r; @@ -460,20 +460,23 @@ static const char *controller_to_dirname(const char *controller) { return controller; } -static int join_path_legacy(const char *controller_dn, const char *path, const char *suffix, char **fs) { +static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) { + const char *dn; char *t = NULL; assert(fs); - assert(controller_dn); + assert(controller); + + dn = controller_to_dirname(controller); if (isempty(path) && isempty(suffix)) - t = strappend("/sys/fs/cgroup/", controller_dn); + t = strappend("/sys/fs/cgroup/", dn); else if (isempty(path)) - t = strjoin("/sys/fs/cgroup/", controller_dn, "/", suffix, NULL); + t = strjoin("/sys/fs/cgroup/", dn, "/", suffix, NULL); else if (isempty(suffix)) - t = strjoin("/sys/fs/cgroup/", controller_dn, "/", path, NULL); + t = strjoin("/sys/fs/cgroup/", dn, "/", path, NULL); else - t = strjoin("/sys/fs/cgroup/", controller_dn, "/", path, "/", suffix, NULL); + t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix, NULL); if (!t) return -ENOMEM; @@ -509,15 +512,15 @@ int cg_get_path(const char *controller, const char *path, const char *suffix, ch if (!controller) { char *t; - /* If no controller is specified, we assume only the - * path below the controller matters */ + /* If no controller is specified, we return the path + * *below* the controllers, without any prefix. */ if (!path && !suffix) return -EINVAL; - if (isempty(suffix)) + if (!suffix) t = strdup(path); - else if (isempty(path)) + else if (!path) t = strdup(suffix); else t = strjoin(path, "/", suffix, NULL); @@ -537,14 +540,8 @@ int cg_get_path(const char *controller, const char *path, const char *suffix, ch if (unified > 0) r = join_path_unified(path, suffix, fs); - else { - const char *dn; - - dn = controller_to_dirname(controller); - - r = join_path_legacy(dn, path, suffix, fs); - } - + else + r = join_path_legacy(controller, path, suffix, fs); if (r < 0) return r; @@ -873,7 +870,7 @@ int cg_pid_get_path(const char *controller, pid_t pid, char **path) { return 0; } - return -ENOENT; + return -ENODATA; } int cg_install_release_agent(const char *controller, const char *agent) { @@ -902,7 +899,7 @@ int cg_install_release_agent(const char *controller, const char *agent) { r = write_string_file(fs, agent, 0); if (r < 0) return r; - } else if (!streq(sc, agent)) + } else if (!path_equal(sc, agent)) return -EEXIST; fs = mfree(fs); @@ -1005,6 +1002,8 @@ int cg_is_empty_recursive(const char *controller, const char *path) { return r; r = read_one_line_file(populated, &t); + if (r == -ENOENT) + return 1; if (r < 0) return r; @@ -1898,7 +1897,7 @@ int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, int r = 0; SET_FOREACH(pidp, pids, i) { - pid_t pid = PTR_TO_LONG(pidp); + pid_t pid = PTR_TO_PID(pidp); int q; q = cg_attach_everywhere(supported, path, pid, path_callback, userdata); @@ -1911,7 +1910,7 @@ int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) { CGroupController c; - int r, unified; + int r = 0, unified; if (!path_equal(from, to)) { r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true); @@ -1982,14 +1981,22 @@ int cg_mask_supported(CGroupMask *ret) { if (unified < 0) return unified; if (unified > 0) { - _cleanup_free_ char *controllers = NULL; + _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL; const char *c; /* In the unified hierarchy we can read the supported * and accessible controllers from a the top-level * cgroup attribute */ - r = read_one_line_file("/sys/fs/cgroup/cgroup.controllers", &controllers); + r = cg_get_root_path(&root); + if (r < 0) + return r; + + r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path); + if (r < 0) + return r; + + r = read_one_line_file(path, &controllers); if (r < 0) return r; @@ -2156,7 +2163,7 @@ int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) { r = write_string_file(fs, s, 0); if (r < 0) - log_warning_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs); + log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs); } } diff --git a/src/basic/macro.h b/src/basic/macro.h index 627d768b76..cbc3ca97b8 100644 --- a/src/basic/macro.h +++ b/src/basic/macro.h @@ -298,6 +298,9 @@ static inline unsigned long ALIGN_POWER2(unsigned long u) { #define PTR_TO_GID(p) ((gid_t) (((uintptr_t) (p))-1)) #define GID_TO_PTR(u) ((void*) (((uintptr_t) (u))+1)) +#define PTR_TO_PID(p) ((pid_t) ((uintptr_t) p)) +#define PID_TO_PTR(p) ((void*) ((uintptr_t) p)) + #define memzero(x,l) (memset((x), 0, (l))) #define zero(x) (memzero(&(x), sizeof(x))) diff --git a/src/basic/util.c b/src/basic/util.c index f01f5f237b..86aacad307 100644 --- a/src/basic/util.c +++ b/src/basic/util.c @@ -373,6 +373,19 @@ int parse_pid(const char *s, pid_t* ret_pid) { return 0; } +bool uid_is_valid(uid_t uid) { + + /* Some libc APIs use UID_INVALID as special placeholder */ + if (uid == (uid_t) 0xFFFFFFFF) + return false; + + /* A long time ago UIDs where 16bit, hence explicitly avoid the 16bit -1 too */ + if (uid == (uid_t) 0xFFFF) + return false; + + return true; +} + int parse_uid(const char *s, uid_t* ret_uid) { unsigned long ul = 0; uid_t uid; @@ -389,13 +402,11 @@ int parse_uid(const char *s, uid_t* ret_uid) { if ((unsigned long) uid != ul) return -ERANGE; - /* Some libc APIs use UID_INVALID as special placeholder */ - if (uid == (uid_t) 0xFFFFFFFF) - return -ENXIO; - - /* A long time ago UIDs where 16bit, hence explicitly avoid the 16bit -1 too */ - if (uid == (uid_t) 0xFFFF) - return -ENXIO; + if (!uid_is_valid(uid)) + return -ENXIO; /* we return ENXIO instead of EINVAL + * here, to make it easy to distuingish + * invalid numeric uids invalid + * strings. */ if (ret_uid) *ret_uid = uid; diff --git a/src/basic/util.h b/src/basic/util.h index ff7a00e928..f8e32360f0 100644 --- a/src/basic/util.h +++ b/src/basic/util.h @@ -154,7 +154,10 @@ int parse_size(const char *t, off_t base, off_t *size); int parse_boolean(const char *v) _pure_; int parse_pid(const char *s, pid_t* ret_pid); int parse_uid(const char *s, uid_t* ret_uid); -#define parse_gid(s, ret_uid) parse_uid(s, ret_uid) +#define parse_gid(s, ret_gid) parse_uid(s, ret_gid) + +bool uid_is_valid(uid_t uid); +#define gid_is_valid(gid) uid_is_valid(gid) int safe_atou(const char *s, unsigned *ret_u); int safe_atoi(const char *s, int *ret_i); diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 1e78f871c7..9a025cf929 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -507,15 +507,20 @@ CGroupMask unit_get_own_mask(Unit *u) { return 0; /* If delegation is turned on, then turn on all cgroups, - * unless the process we fork into it is known to drop - * privileges anyway, and shouldn't get access to the - * controllers anyway. */ + * unless we are on the legacy hierarchy and the process we + * fork into it is known to drop privileges, and hence + * shouldn't get access to the controllers. + * + * Note that on the unified hierarchy it is safe to delegate + * controllers to unprivileged services. */ if (c->delegate) { ExecContext *e; e = unit_get_exec_context(u); - if (!e || exec_context_maintains_privileges(e)) + if (!e || + exec_context_maintains_privileges(e) || + cg_unified() > 0) return _CGROUP_MASK_ALL; } @@ -1378,9 +1383,8 @@ Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) { } } -Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) { +Unit *manager_get_unit_by_pid_cgroup(Manager *m, pid_t pid) { _cleanup_free_ char *cgroup = NULL; - Unit *u; int r; assert(m); @@ -1388,22 +1392,33 @@ Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) { if (pid <= 0) return NULL; + r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup); + if (r < 0) + return NULL; + + return manager_get_unit_by_cgroup(m, cgroup); +} + +Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) { + Unit *u; + + assert(m); + + if (pid <= 0) + return NULL; + if (pid == 1) return hashmap_get(m->units, SPECIAL_INIT_SCOPE); - u = hashmap_get(m->watch_pids1, LONG_TO_PTR(pid)); + u = hashmap_get(m->watch_pids1, PID_TO_PTR(pid)); if (u) return u; - u = hashmap_get(m->watch_pids2, LONG_TO_PTR(pid)); + u = hashmap_get(m->watch_pids2, PID_TO_PTR(pid)); if (u) return u; - r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup); - if (r < 0) - return NULL; - - return manager_get_unit_by_cgroup(m, cgroup); + return manager_get_unit_by_pid_cgroup(m, pid); } int manager_notify_cgroup_empty(Manager *m, const char *cgroup) { diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 1ce21f43f2..438f5bf50f 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -130,6 +130,7 @@ void manager_shutdown_cgroup(Manager *m, bool delete); unsigned manager_dispatch_cgroup_queue(Manager *m); Unit *manager_get_unit_by_cgroup(Manager *m, const char *cgroup); +Unit *manager_get_unit_by_pid_cgroup(Manager *m, pid_t pid); Unit* manager_get_unit_by_pid(Manager *m, pid_t pid); int unit_search_main_pid(Unit *u, pid_t *ret); diff --git a/src/core/killall.c b/src/core/killall.c index 2a9d72c901..ee5d388560 100644 --- a/src/core/killall.c +++ b/src/core/killall.c @@ -108,7 +108,7 @@ static void wait_for_children(Set *pids, sigset_t *mask) { return; } - set_remove(pids, ULONG_TO_PTR(pid)); + (void) set_remove(pids, PID_TO_PTR(pid)); } /* Now explicitly check who might be remaining, who @@ -117,7 +117,7 @@ static void wait_for_children(Set *pids, sigset_t *mask) { /* We misuse getpgid as a check whether a * process still exists. */ - if (getpgid((pid_t) PTR_TO_ULONG(p)) >= 0) + if (getpgid(PTR_TO_PID(p)) >= 0) continue; if (errno != ESRCH) @@ -179,7 +179,7 @@ static int killall(int sig, Set *pids, bool send_sighup) { if (kill(pid, sig) >= 0) { if (pids) { - r = set_put(pids, ULONG_TO_PTR(pid)); + r = set_put(pids, PID_TO_PTR(pid)); if (r < 0) log_oom(); } diff --git a/src/core/manager.c b/src/core/manager.c index c3327e37f5..fc10ddb5d9 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -1585,19 +1585,19 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t /* Notify every unit that might be interested, but try * to avoid notifying the same one multiple times. */ - u1 = manager_get_unit_by_pid(m, ucred->pid); + u1 = manager_get_unit_by_pid_cgroup(m, ucred->pid); if (u1) { manager_invoke_notify_message(m, u1, ucred->pid, buf, n, fds); found = true; } - u2 = hashmap_get(m->watch_pids1, LONG_TO_PTR(ucred->pid)); + u2 = hashmap_get(m->watch_pids1, PID_TO_PTR(ucred->pid)); if (u2 && u2 != u1) { manager_invoke_notify_message(m, u2, ucred->pid, buf, n, fds); found = true; } - u3 = hashmap_get(m->watch_pids2, LONG_TO_PTR(ucred->pid)); + u3 = hashmap_get(m->watch_pids2, PID_TO_PTR(ucred->pid)); if (u3 && u3 != u2 && u3 != u1) { manager_invoke_notify_message(m, u3, ucred->pid, buf, n, fds); found = true; @@ -1663,13 +1663,13 @@ static int manager_dispatch_sigchld(Manager *m) { /* And now figure out the unit this belongs * to, it might be multiple... */ - u1 = manager_get_unit_by_pid(m, si.si_pid); + u1 = manager_get_unit_by_pid_cgroup(m, si.si_pid); if (u1) invoke_sigchld_event(m, u1, &si); - u2 = hashmap_get(m->watch_pids1, LONG_TO_PTR(si.si_pid)); + u2 = hashmap_get(m->watch_pids1, PID_TO_PTR(si.si_pid)); if (u2 && u2 != u1) invoke_sigchld_event(m, u2, &si); - u3 = hashmap_get(m->watch_pids2, LONG_TO_PTR(si.si_pid)); + u3 = hashmap_get(m->watch_pids2, PID_TO_PTR(si.si_pid)); if (u3 && u3 != u2 && u3 != u1) invoke_sigchld_event(m, u3, &si); } diff --git a/src/core/unit.c b/src/core/unit.c index 8c07c6140d..a5714adf38 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -1995,16 +1995,16 @@ int unit_watch_pid(Unit *u, pid_t pid) { if (r < 0) return r; - r = hashmap_put(u->manager->watch_pids1, LONG_TO_PTR(pid), u); + r = hashmap_put(u->manager->watch_pids1, PID_TO_PTR(pid), u); if (r == -EEXIST) { r = hashmap_ensure_allocated(&u->manager->watch_pids2, NULL); if (r < 0) return r; - r = hashmap_put(u->manager->watch_pids2, LONG_TO_PTR(pid), u); + r = hashmap_put(u->manager->watch_pids2, PID_TO_PTR(pid), u); } - q = set_put(u->pids, LONG_TO_PTR(pid)); + q = set_put(u->pids, PID_TO_PTR(pid)); if (q < 0) return q; @@ -2015,16 +2015,16 @@ void unit_unwatch_pid(Unit *u, pid_t pid) { assert(u); assert(pid >= 1); - (void) hashmap_remove_value(u->manager->watch_pids1, LONG_TO_PTR(pid), u); - (void) hashmap_remove_value(u->manager->watch_pids2, LONG_TO_PTR(pid), u); - (void) set_remove(u->pids, LONG_TO_PTR(pid)); + (void) hashmap_remove_value(u->manager->watch_pids1, PID_TO_PTR(pid), u); + (void) hashmap_remove_value(u->manager->watch_pids2, PID_TO_PTR(pid), u); + (void) set_remove(u->pids, PID_TO_PTR(pid)); } void unit_unwatch_all_pids(Unit *u) { assert(u); while (!set_isempty(u->pids)) - unit_unwatch_pid(u, PTR_TO_LONG(set_first(u->pids))); + unit_unwatch_pid(u, PTR_TO_PID(set_first(u->pids))); u->pids = set_free(u->pids); } @@ -2038,7 +2038,7 @@ void unit_tidy_watch_pids(Unit *u, pid_t except1, pid_t except2) { /* Cleans dead PIDs from our list */ SET_FOREACH(e, u->pids, i) { - pid_t pid = PTR_TO_LONG(e); + pid_t pid = PTR_TO_PID(e); if (pid == except1 || pid == except2) continue; @@ -2993,13 +2993,13 @@ static Set *unit_pid_set(pid_t main_pid, pid_t control_pid) { /* Exclude the main/control pids from being killed via the cgroup */ if (main_pid > 0) { - r = set_put(pid_set, LONG_TO_PTR(main_pid)); + r = set_put(pid_set, PID_TO_PTR(main_pid)); if (r < 0) goto fail; } if (control_pid > 0) { - r = set_put(pid_set, LONG_TO_PTR(control_pid)); + r = set_put(pid_set, PID_TO_PTR(control_pid)); if (r < 0) goto fail; } diff --git a/src/libsystemd/libsystemd.sym b/src/libsystemd/libsystemd.sym index 7bf1d66dde..d5ad127bcb 100644 --- a/src/libsystemd/libsystemd.sym +++ b/src/libsystemd/libsystemd.sym @@ -467,3 +467,9 @@ global: sd_bus_emit_object_removed; sd_bus_flush_close_unref; } LIBSYSTEMD_221; + +LIBSYSTEMD_226 { +global: + sd_pid_get_cgroup; + sd_peer_get_cgroup; +} LIBSYSTEMD_222; diff --git a/src/libsystemd/sd-bus/bus-creds.c b/src/libsystemd/sd-bus/bus-creds.c index 1c365b7fcd..c3cc2b7212 100644 --- a/src/libsystemd/sd-bus/bus-creds.c +++ b/src/libsystemd/sd-bus/bus-creds.c @@ -1062,8 +1062,8 @@ int bus_creds_add_more(sd_bus_creds *c, uint64_t mask, pid_t pid, pid_t tid) { if (missing & SD_BUS_CREDS_AUDIT_SESSION_ID) { r = audit_session_from_pid(pid, &c->audit_session_id); - if (r == -ENXIO) { - /* ENXIO means: no audit session id assigned */ + if (r == -ENODATA) { + /* ENODATA means: no audit session id assigned */ c->audit_session_id = AUDIT_SESSION_INVALID; c->mask |= SD_BUS_CREDS_AUDIT_SESSION_ID; } else if (r < 0) { @@ -1075,8 +1075,8 @@ int bus_creds_add_more(sd_bus_creds *c, uint64_t mask, pid_t pid, pid_t tid) { if (missing & SD_BUS_CREDS_AUDIT_LOGIN_UID) { r = audit_loginuid_from_pid(pid, &c->audit_login_uid); - if (r == -ENXIO) { - /* ENXIO means: no audit login uid assigned */ + if (r == -ENODATA) { + /* ENODATA means: no audit login uid assigned */ c->audit_login_uid = UID_INVALID; c->mask |= SD_BUS_CREDS_AUDIT_LOGIN_UID; } else if (r < 0) { diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c index c419be820a..838ee4d454 100644 --- a/src/libsystemd/sd-event/sd-event.c +++ b/src/libsystemd/sd-event/sd-event.c @@ -56,9 +56,22 @@ typedef enum EventSourceType { _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1 } EventSourceType; +/* All objects we use in epoll events start with this value, so that + * we know how to dispatch it */ +typedef enum WakeupType { + WAKEUP_NONE, + WAKEUP_EVENT_SOURCE, + WAKEUP_CLOCK_DATA, + WAKEUP_SIGNAL_DATA, + _WAKEUP_TYPE_MAX, + _WAKEUP_TYPE_INVALID = -1, +} WakeupType; + #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM) struct sd_event_source { + WakeupType wakeup; + unsigned n_ref; sd_event *event; @@ -120,6 +133,7 @@ struct sd_event_source { }; struct clock_data { + WakeupType wakeup; int fd; /* For all clocks we maintain two priority queues each, one @@ -136,11 +150,23 @@ struct clock_data { bool needs_rearm:1; }; +struct signal_data { + WakeupType wakeup; + + /* For each priority we maintain one signal fd, so that we + * only have to dequeue a single event per priority at a + * time. */ + + int fd; + int64_t priority; + sigset_t sigset; + sd_event_source *current; +}; + struct sd_event { unsigned n_ref; int epoll_fd; - int signal_fd; int watchdog_fd; Prioq *pending; @@ -157,8 +183,8 @@ struct sd_event { usec_t perturb; - sigset_t sigset; - sd_event_source **signal_sources; + sd_event_source **signal_sources; /* indexed by signal number */ + Hashmap *signal_data; /* indexed by priority */ Hashmap *child_sources; unsigned n_enabled_child_sources; @@ -355,6 +381,7 @@ static int exit_prioq_compare(const void *a, const void *b) { static void free_clock_data(struct clock_data *d) { assert(d); + assert(d->wakeup == WAKEUP_CLOCK_DATA); safe_close(d->fd); prioq_free(d->earliest); @@ -378,7 +405,6 @@ static void event_free(sd_event *e) { *(e->default_event_ptr) = NULL; safe_close(e->epoll_fd); - safe_close(e->signal_fd); safe_close(e->watchdog_fd); free_clock_data(&e->realtime); @@ -392,6 +418,7 @@ static void event_free(sd_event *e) { prioq_free(e->exit); free(e->signal_sources); + hashmap_free(e->signal_data); hashmap_free(e->child_sources); set_free(e->post_sources); @@ -409,13 +436,12 @@ _public_ int sd_event_new(sd_event** ret) { return -ENOMEM; e->n_ref = 1; - e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1; + e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1; e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY; + e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA; e->original_pid = getpid(); e->perturb = USEC_INFINITY; - assert_se(sigemptyset(&e->sigset) == 0); - e->pending = prioq_new(pending_prioq_compare); if (!e->pending) { r = -ENOMEM; @@ -509,7 +535,6 @@ static int source_io_register( r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev); else r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev); - if (r < 0) return -errno; @@ -591,45 +616,171 @@ static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) { } } -static bool need_signal(sd_event *e, int signal) { - return (e->signal_sources && e->signal_sources[signal] && - e->signal_sources[signal]->enabled != SD_EVENT_OFF) - || - (signal == SIGCHLD && - e->n_enabled_child_sources > 0); -} +static int event_make_signal_data( + sd_event *e, + int sig, + struct signal_data **ret) { -static int event_update_signal_fd(sd_event *e) { struct epoll_event ev = {}; - bool add_to_epoll; + struct signal_data *d; + bool added = false; + sigset_t ss_copy; + int64_t priority; int r; assert(e); if (event_pid_changed(e)) - return 0; + return -ECHILD; - add_to_epoll = e->signal_fd < 0; + if (e->signal_sources && e->signal_sources[sig]) + priority = e->signal_sources[sig]->priority; + else + priority = 0; - r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC); - if (r < 0) - return -errno; + d = hashmap_get(e->signal_data, &priority); + if (d) { + if (sigismember(&d->sigset, sig) > 0) { + if (ret) + *ret = d; + return 0; + } + } else { + r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops); + if (r < 0) + return r; + + d = new0(struct signal_data, 1); + if (!d) + return -ENOMEM; + + d->wakeup = WAKEUP_SIGNAL_DATA; + d->fd = -1; + d->priority = priority; + + r = hashmap_put(e->signal_data, &d->priority, d); + if (r < 0) + return r; - e->signal_fd = r; + added = true; + } + + ss_copy = d->sigset; + assert_se(sigaddset(&ss_copy, sig) >= 0); + + r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC); + if (r < 0) { + r = -errno; + goto fail; + } + + d->sigset = ss_copy; - if (!add_to_epoll) + if (d->fd >= 0) { + if (ret) + *ret = d; return 0; + } + + d->fd = r; ev.events = EPOLLIN; - ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL); + ev.data.ptr = d; - r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev); - if (r < 0) { - e->signal_fd = safe_close(e->signal_fd); - return -errno; + r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev); + if (r < 0) { + r = -errno; + goto fail; } + if (ret) + *ret = d; + return 0; + +fail: + if (added) { + d->fd = safe_close(d->fd); + hashmap_remove(e->signal_data, &d->priority); + free(d); + } + + return r; +} + +static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) { + assert(e); + assert(d); + + /* Turns off the specified signal in the signal data + * object. If the signal mask of the object becomes empty that + * way removes it. */ + + if (sigismember(&d->sigset, sig) == 0) + return; + + assert_se(sigdelset(&d->sigset, sig) >= 0); + + if (sigisemptyset(&d->sigset)) { + + /* If all the mask is all-zero we can get rid of the structure */ + hashmap_remove(e->signal_data, &d->priority); + assert(!d->current); + safe_close(d->fd); + free(d); + return; + } + + assert(d->fd >= 0); + + if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0) + log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m"); +} + +static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) { + struct signal_data *d; + static const int64_t zero_priority = 0; + + assert(e); + + /* Rechecks if the specified signal is still something we are + * interested in. If not, we'll unmask it, and possibly drop + * the signalfd for it. */ + + if (sig == SIGCHLD && + e->n_enabled_child_sources > 0) + return; + + if (e->signal_sources && + e->signal_sources[sig] && + e->signal_sources[sig]->enabled != SD_EVENT_OFF) + return; + + /* + * The specified signal might be enabled in three different queues: + * + * 1) the one that belongs to the priority passed (if it is non-NULL) + * 2) the one that belongs to the priority of the event source of the signal (if there is one) + * 3) the 0 priority (to cover the SIGCHLD case) + * + * Hence, let's remove it from all three here. + */ + + if (priority) { + d = hashmap_get(e->signal_data, priority); + if (d) + event_unmask_signal_data(e, d, sig); + } + + if (e->signal_sources && e->signal_sources[sig]) { + d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority); + if (d) + event_unmask_signal_data(e, d, sig); + } + + d = hashmap_get(e->signal_data, &zero_priority); + if (d) + event_unmask_signal_data(e, d, sig); } static void source_disconnect(sd_event_source *s) { @@ -668,17 +819,11 @@ static void source_disconnect(sd_event_source *s) { case SOURCE_SIGNAL: if (s->signal.sig > 0) { + if (s->event->signal_sources) s->event->signal_sources[s->signal.sig] = NULL; - /* If the signal was on and now it is off... */ - if (s->enabled != SD_EVENT_OFF && !need_signal(s->event, s->signal.sig)) { - assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0); - - (void) event_update_signal_fd(s->event); - /* If disabling failed, we might get a spurious event, - * but otherwise nothing bad should happen. */ - } + event_gc_signal_data(s->event, &s->priority, s->signal.sig); } break; @@ -688,18 +833,10 @@ static void source_disconnect(sd_event_source *s) { if (s->enabled != SD_EVENT_OFF) { assert(s->event->n_enabled_child_sources > 0); s->event->n_enabled_child_sources--; - - /* We know the signal was on, if it is off now... */ - if (!need_signal(s->event, SIGCHLD)) { - assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0); - - (void) event_update_signal_fd(s->event); - /* If disabling failed, we might get a spurious event, - * but otherwise nothing bad should happen. */ - } } - hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid)); + (void) hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid)); + event_gc_signal_data(s->event, &s->priority, SIGCHLD); } break; @@ -778,6 +915,14 @@ static int source_set_pending(sd_event_source *s, bool b) { d->needs_rearm = true; } + if (s->type == SOURCE_SIGNAL && !b) { + struct signal_data *d; + + d = hashmap_get(s->event->signal_data, &s->priority); + if (d && d->current == s) + d->current = NULL; + } + return 0; } @@ -827,6 +972,7 @@ _public_ int sd_event_add_io( if (!s) return -ENOMEM; + s->wakeup = WAKEUP_EVENT_SOURCE; s->io.fd = fd; s->io.events = events; s->io.callback = callback; @@ -883,7 +1029,7 @@ static int event_setup_timer_fd( return -errno; ev.events = EPOLLIN; - ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock)); + ev.data.ptr = d; r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev); if (r < 0) { @@ -993,9 +1139,9 @@ _public_ int sd_event_add_signal( void *userdata) { sd_event_source *s; + struct signal_data *d; sigset_t ss; int r; - bool previous; assert_return(e, -EINVAL); assert_return(sig > 0, -EINVAL); @@ -1020,8 +1166,6 @@ _public_ int sd_event_add_signal( } else if (e->signal_sources[sig]) return -EBUSY; - previous = need_signal(e, sig); - s = source_new(e, !ret, SOURCE_SIGNAL); if (!s) return -ENOMEM; @@ -1033,14 +1177,10 @@ _public_ int sd_event_add_signal( e->signal_sources[sig] = s; - if (!previous) { - assert_se(sigaddset(&e->sigset, sig) == 0); - - r = event_update_signal_fd(e); - if (r < 0) { - source_free(s); - return r; - } + r = event_make_signal_data(e, sig, &d); + if (r < 0) { + source_free(s); + return r; } /* Use the signal name as description for the event source by default */ @@ -1062,7 +1202,6 @@ _public_ int sd_event_add_child( sd_event_source *s; int r; - bool previous; assert_return(e, -EINVAL); assert_return(pid > 1, -EINVAL); @@ -1079,8 +1218,6 @@ _public_ int sd_event_add_child( if (hashmap_contains(e->child_sources, INT_TO_PTR(pid))) return -EBUSY; - previous = need_signal(e, SIGCHLD); - s = source_new(e, !ret, SOURCE_CHILD); if (!s) return -ENOMEM; @@ -1099,14 +1236,11 @@ _public_ int sd_event_add_child( e->n_enabled_child_sources ++; - if (!previous) { - assert_se(sigaddset(&e->sigset, SIGCHLD) == 0); - - r = event_update_signal_fd(e); - if (r < 0) { - source_free(s); - return r; - } + r = event_make_signal_data(e, SIGCHLD, NULL); + if (r < 0) { + e->n_enabled_child_sources--; + source_free(s); + return r; } e->need_process_child = true; @@ -1406,6 +1540,8 @@ _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) } _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) { + int r; + assert_return(s, -EINVAL); assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE); assert_return(!event_pid_changed(s->event), -ECHILD); @@ -1413,7 +1549,25 @@ _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) if (s->priority == priority) return 0; - s->priority = priority; + if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) { + struct signal_data *old, *d; + + /* Move us from the signalfd belonging to the old + * priority to the signalfd of the new priority */ + + assert_se(old = hashmap_get(s->event->signal_data, &s->priority)); + + s->priority = priority; + + r = event_make_signal_data(s->event, s->signal.sig, &d); + if (r < 0) { + s->priority = old->priority; + return r; + } + + event_unmask_signal_data(s->event, old, s->signal.sig); + } else + s->priority = priority; if (s->pending) prioq_reshuffle(s->event->pending, s, &s->pending_index); @@ -1478,34 +1632,18 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) { } case SOURCE_SIGNAL: - assert(need_signal(s->event, s->signal.sig)); - s->enabled = m; - if (!need_signal(s->event, s->signal.sig)) { - assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0); - - (void) event_update_signal_fd(s->event); - /* If disabling failed, we might get a spurious event, - * but otherwise nothing bad should happen. */ - } - + event_gc_signal_data(s->event, &s->priority, s->signal.sig); break; case SOURCE_CHILD: - assert(need_signal(s->event, SIGCHLD)); - s->enabled = m; assert(s->event->n_enabled_child_sources > 0); s->event->n_enabled_child_sources--; - if (!need_signal(s->event, SIGCHLD)) { - assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0); - - (void) event_update_signal_fd(s->event); - } - + event_gc_signal_data(s->event, &s->priority, SIGCHLD); break; case SOURCE_EXIT: @@ -1551,37 +1689,33 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) { } case SOURCE_SIGNAL: - /* Check status before enabling. */ - if (!need_signal(s->event, s->signal.sig)) { - assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0); - - r = event_update_signal_fd(s->event); - if (r < 0) { - s->enabled = SD_EVENT_OFF; - return r; - } - } s->enabled = m; + + r = event_make_signal_data(s->event, s->signal.sig, NULL); + if (r < 0) { + s->enabled = SD_EVENT_OFF; + event_gc_signal_data(s->event, &s->priority, s->signal.sig); + return r; + } + break; case SOURCE_CHILD: - /* Check status before enabling. */ - if (s->enabled == SD_EVENT_OFF) { - if (!need_signal(s->event, SIGCHLD)) { - assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0); - - r = event_update_signal_fd(s->event); - if (r < 0) { - s->enabled = SD_EVENT_OFF; - return r; - } - } + if (s->enabled == SD_EVENT_OFF) s->event->n_enabled_child_sources++; - } s->enabled = m; + + r = event_make_signal_data(s->event, s->signal.sig, SIGCHLD); + if (r < 0) { + s->enabled = SD_EVENT_OFF; + s->event->n_enabled_child_sources--; + event_gc_signal_data(s->event, &s->priority, SIGCHLD); + return r; + } + break; case SOURCE_EXIT: @@ -2025,20 +2159,35 @@ static int process_child(sd_event *e) { return 0; } -static int process_signal(sd_event *e, uint32_t events) { +static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) { bool read_one = false; int r; assert(e); - assert_return(events == EPOLLIN, -EIO); + /* If there's a signal queued on this priority and SIGCHLD is + on this priority too, then make sure to recheck the + children we watch. This is because we only ever dequeue + the first signal per priority, and if we dequeue one, and + SIGCHLD might be enqueued later we wouldn't know, but we + might have higher priority children we care about hence we + need to check that explicitly. */ + + if (sigismember(&d->sigset, SIGCHLD)) + e->need_process_child = true; + + /* If there's already an event source pending for this + * priority we don't read another */ + if (d->current) + return 0; + for (;;) { struct signalfd_siginfo si; ssize_t n; sd_event_source *s = NULL; - n = read(e->signal_fd, &si, sizeof(si)); + n = read(d->fd, &si, sizeof(si)); if (n < 0) { if (errno == EAGAIN || errno == EINTR) return read_one; @@ -2053,24 +2202,21 @@ static int process_signal(sd_event *e, uint32_t events) { read_one = true; - if (si.ssi_signo == SIGCHLD) { - r = process_child(e); - if (r < 0) - return r; - if (r > 0) - continue; - } - if (e->signal_sources) s = e->signal_sources[si.ssi_signo]; - if (!s) continue; + if (s->pending) + continue; s->signal.siginfo = si; + d->current = s; + r = source_set_pending(s, true); if (r < 0) return r; + + return 1; } } @@ -2388,23 +2534,31 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) { for (i = 0; i < m; i++) { - if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME)) - r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next); - else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME)) - r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next); - else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC)) - r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next); - else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM)) - r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next); - else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM)) - r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next); - else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL)) - r = process_signal(e, ev_queue[i].events); - else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG)) + if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG)) r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL); - else - r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events); + else { + WakeupType *t = ev_queue[i].data.ptr; + + switch (*t) { + + case WAKEUP_EVENT_SOURCE: + r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events); + break; + case WAKEUP_CLOCK_DATA: { + struct clock_data *d = ev_queue[i].data.ptr; + r = flush_timer(e, d->fd, ev_queue[i].events, &d->next); + break; + } + + case WAKEUP_SIGNAL_DATA: + r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events); + break; + + default: + assert_not_reached("Invalid wake-up pointer"); + } + } if (r < 0) goto finish; } diff --git a/src/libsystemd/sd-event/test-event.c b/src/libsystemd/sd-event/test-event.c index 408e1679a2..c092e56b7a 100644 --- a/src/libsystemd/sd-event/test-event.c +++ b/src/libsystemd/sd-event/test-event.c @@ -156,7 +156,7 @@ static int exit_handler(sd_event_source *s, void *userdata) { return 3; } -int main(int argc, char *argv[]) { +static void test_basic(void) { sd_event *e = NULL; sd_event_source *w = NULL, *x = NULL, *y = NULL, *z = NULL, *q = NULL, *t = NULL; static const char ch = 'x'; @@ -244,6 +244,70 @@ int main(int argc, char *argv[]) { safe_close_pair(b); safe_close_pair(d); safe_close_pair(k); +} + +static int last_rtqueue_sigval = 0; +static int n_rtqueue = 0; + +static int rtqueue_handler(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) { + last_rtqueue_sigval = si->ssi_int; + n_rtqueue ++; + return 0; +} + +static void test_rtqueue(void) { + sd_event_source *u = NULL, *v = NULL, *s = NULL; + sd_event *e = NULL; + + assert_se(sd_event_default(&e) >= 0); + + assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGRTMIN+2, SIGRTMIN+3, SIGUSR2, -1) >= 0); + assert_se(sd_event_add_signal(e, &u, SIGRTMIN+2, rtqueue_handler, NULL) >= 0); + assert_se(sd_event_add_signal(e, &v, SIGRTMIN+3, rtqueue_handler, NULL) >= 0); + assert_se(sd_event_add_signal(e, &s, SIGUSR2, rtqueue_handler, NULL) >= 0); + + assert_se(sd_event_source_set_priority(v, -10) >= 0); + + assert(sigqueue(getpid(), SIGRTMIN+2, (union sigval) { .sival_int = 1 }) >= 0); + assert(sigqueue(getpid(), SIGRTMIN+3, (union sigval) { .sival_int = 2 }) >= 0); + assert(sigqueue(getpid(), SIGUSR2, (union sigval) { .sival_int = 3 }) >= 0); + assert(sigqueue(getpid(), SIGRTMIN+3, (union sigval) { .sival_int = 4 }) >= 0); + assert(sigqueue(getpid(), SIGUSR2, (union sigval) { .sival_int = 5 }) >= 0); + + assert_se(n_rtqueue == 0); + assert_se(last_rtqueue_sigval == 0); + + assert_se(sd_event_run(e, (uint64_t) -1) >= 1); + assert_se(n_rtqueue == 1); + assert_se(last_rtqueue_sigval == 2); /* first SIGRTMIN+3 */ + + assert_se(sd_event_run(e, (uint64_t) -1) >= 1); + assert_se(n_rtqueue == 2); + assert_se(last_rtqueue_sigval == 4); /* second SIGRTMIN+3 */ + + assert_se(sd_event_run(e, (uint64_t) -1) >= 1); + assert_se(n_rtqueue == 3); + assert_se(last_rtqueue_sigval == 3); /* first SIGUSR2 */ + + assert_se(sd_event_run(e, (uint64_t) -1) >= 1); + assert_se(n_rtqueue == 4); + assert_se(last_rtqueue_sigval == 1); /* SIGRTMIN+2 */ + + assert_se(sd_event_run(e, 0) == 0); /* the other SIGUSR2 is dropped, because the first one was still queued */ + assert_se(n_rtqueue == 4); + assert_se(last_rtqueue_sigval == 1); + + sd_event_source_unref(u); + sd_event_source_unref(v); + sd_event_source_unref(s); + + sd_event_unref(e); +} + +int main(int argc, char *argv[]) { + + test_basic(); + test_rtqueue(); return 0; } diff --git a/src/libsystemd/sd-login/sd-login.c b/src/libsystemd/sd-login/sd-login.c index 7d6a4b78cf..55da26e9d9 100644 --- a/src/libsystemd/sd-login/sd-login.c +++ b/src/libsystemd/sd-login/sd-login.c @@ -35,6 +35,16 @@ #include "hostname-util.h" #include "sd-login.h" +/* Error codes: + * + * invalid input parameters → -EINVAL + * invalid fd → -EBADF + * process does not exist → -ESRCH + * cgroup does not exist → -ENOENT + * machine, session does not exist → -ENXIO + * requested metadata on object is missing → -ENODATA + */ + _public_ int sd_pid_get_session(pid_t pid, char **session) { assert_return(pid >= 0, -EINVAL); @@ -91,6 +101,32 @@ _public_ int sd_pid_get_owner_uid(pid_t pid, uid_t *uid) { return cg_pid_get_owner_uid(pid, uid); } +_public_ int sd_pid_get_cgroup(pid_t pid, char **cgroup) { + char *c; + int r; + + assert_return(pid >= 0, -EINVAL); + assert_return(cgroup, -EINVAL); + + r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &c); + if (r < 0) + return r; + + /* The internal APIs return the empty string for the root + * cgroup, let's return the "/" in the public APIs instead, as + * that's easier and less ambigious for people to grok. */ + if (isempty(c)) { + free(c); + c = strdup("/"); + if (!c) + return -ENOMEM; + + } + + *cgroup = c; + return 0; +} + _public_ int sd_peer_get_session(int fd, char **session) { struct ucred ucred = {}; int r; @@ -189,7 +225,23 @@ _public_ int sd_peer_get_user_slice(int fd, char **slice) { return cg_pid_get_user_slice(ucred.pid, slice); } +_public_ int sd_peer_get_cgroup(int fd, char **cgroup) { + struct ucred ucred; + int r; + + assert_return(fd >= 0, -EBADF); + assert_return(cgroup, -EINVAL); + + r = getpeercred(fd, &ucred); + if (r < 0) + return r; + + return sd_pid_get_cgroup(ucred.pid, cgroup); +} + static int file_of_uid(uid_t uid, char **p) { + + assert_return(uid_is_valid(uid), -EINVAL); assert(p); if (asprintf(p, "/run/systemd/users/" UID_FMT, uid) < 0) @@ -216,11 +268,15 @@ _public_ int sd_uid_get_state(uid_t uid, char**state) { if (!s) return -ENOMEM; - } else if (r < 0) { + } + if (r < 0) { free(s); return r; - } else if (!s) + } + if (isempty(s)) { + free(s); return -EIO; + } *state = s; return 0; @@ -238,12 +294,11 @@ _public_ int sd_uid_get_display(uid_t uid, char **session) { r = parse_env_file(p, NEWLINE, "DISPLAY", &s, NULL); if (r == -ENOENT) - return -ENXIO; + return -ENODATA; if (r < 0) return r; - if (isempty(s)) - return -ENXIO; + return -ENODATA; *session = s; s = NULL; @@ -251,35 +306,63 @@ _public_ int sd_uid_get_display(uid_t uid, char **session) { return 0; } +static int file_of_seat(const char *seat, char **_p) { + char *p; + int r; + + assert(_p); + + if (seat) { + if (!filename_is_valid(seat)) + return -EINVAL; + + p = strappend("/run/systemd/seats/", seat); + } else { + _cleanup_free_ char *buf = NULL; + + r = sd_session_get_seat(NULL, &buf); + if (r < 0) + return r; + + p = strappend("/run/systemd/seats/", buf); + } + + if (!p) + return -ENOMEM; + + *_p = p; + p = NULL; + return 0; +} + _public_ int sd_uid_is_on_seat(uid_t uid, int require_active, const char *seat) { _cleanup_free_ char *t = NULL, *s = NULL, *p = NULL; size_t l; int r; const char *word, *variable, *state; - assert_return(seat, -EINVAL); + assert_return(uid_is_valid(uid), -EINVAL); - variable = require_active ? "ACTIVE_UID" : "UIDS"; + r = file_of_seat(seat, &p); + if (r < 0) + return r; - p = strappend("/run/systemd/seats/", seat); - if (!p) - return -ENOMEM; + variable = require_active ? "ACTIVE_UID" : "UIDS"; r = parse_env_file(p, NEWLINE, variable, &s, NULL); - + if (r == -ENOENT) + return 0; if (r < 0) return r; - - if (!s) - return -EIO; + if (isempty(s)) + return 0; if (asprintf(&t, UID_FMT, uid) < 0) return -ENOMEM; - FOREACH_WORD(word, l, s, state) { + FOREACH_WORD(word, l, s, state) if (strneq(t, word, l)) return 1; - } return 0; } @@ -289,31 +372,22 @@ static int uid_get_array(uid_t uid, const char *variable, char ***array) { char **a; int r; + assert(variable); + r = file_of_uid(uid, &p); if (r < 0) return r; - r = parse_env_file(p, NEWLINE, - variable, &s, - NULL); - if (r < 0) { - if (r == -ENOENT) { - if (array) - *array = NULL; - return 0; - } - - return r; - } - - if (!s) { + r = parse_env_file(p, NEWLINE, variable, &s, NULL); + if (r == -ENOENT || (r >= 0 && isempty(s))) { if (array) *array = NULL; return 0; } + if (r < 0) + return r; a = strv_split(s, " "); - if (!a) return -ENOMEM; @@ -375,37 +449,39 @@ static int file_of_session(const char *session, char **_p) { } _public_ int sd_session_is_active(const char *session) { - int r; _cleanup_free_ char *p = NULL, *s = NULL; + int r; r = file_of_session(session, &p); if (r < 0) return r; r = parse_env_file(p, NEWLINE, "ACTIVE", &s, NULL); + if (r == -ENOENT) + return -ENXIO; if (r < 0) return r; - - if (!s) + if (isempty(s)) return -EIO; return parse_boolean(s); } _public_ int sd_session_is_remote(const char *session) { - int r; _cleanup_free_ char *p = NULL, *s = NULL; + int r; r = file_of_session(session, &p); if (r < 0) return r; r = parse_env_file(p, NEWLINE, "REMOTE", &s, NULL); + if (r == -ENOENT) + return -ENXIO; if (r < 0) return r; - - if (!s) - return -EIO; + if (isempty(s)) + return -ENODATA; return parse_boolean(s); } @@ -421,9 +497,11 @@ _public_ int sd_session_get_state(const char *session, char **state) { return r; r = parse_env_file(p, NEWLINE, "STATE", &s, NULL); + if (r == -ENOENT) + return -ENXIO; if (r < 0) return r; - else if (!s) + if (isempty(s)) return -EIO; *state = s; @@ -443,10 +521,11 @@ _public_ int sd_session_get_uid(const char *session, uid_t *uid) { return r; r = parse_env_file(p, NEWLINE, "UID", &s, NULL); + if (r == -ENOENT) + return -ENXIO; if (r < 0) return r; - - if (!s) + if (isempty(s)) return -EIO; return parse_uid(s, uid); @@ -457,17 +536,19 @@ static int session_get_string(const char *session, const char *field, char **val int r; assert_return(value, -EINVAL); + assert(field); r = file_of_session(session, &p); if (r < 0) return r; r = parse_env_file(p, NEWLINE, field, &s, NULL); + if (r == -ENOENT) + return -ENXIO; if (r < 0) return r; - if (isempty(s)) - return -ENXIO; + return -ENODATA; *value = s; s = NULL; @@ -487,6 +568,8 @@ _public_ int sd_session_get_vt(const char *session, unsigned *vtnr) { unsigned u; int r; + assert_return(vtnr, -EINVAL); + r = session_get_string(session, "VTNR", &vtnr_string); if (r < 0) return r; @@ -542,32 +625,6 @@ _public_ int sd_session_get_remote_host(const char *session, char **remote_host) return session_get_string(session, "REMOTE_HOST", remote_host); } -static int file_of_seat(const char *seat, char **_p) { - char *p; - int r; - - assert(_p); - - if (seat) - p = strappend("/run/systemd/seats/", seat); - else { - _cleanup_free_ char *buf = NULL; - - r = sd_session_get_seat(NULL, &buf); - if (r < 0) - return r; - - p = strappend("/run/systemd/seats/", buf); - } - - if (!p) - return -ENOMEM; - - *_p = p; - p = NULL; - return 0; -} - _public_ int sd_seat_get_active(const char *seat, char **session, uid_t *uid) { _cleanup_free_ char *p = NULL, *s = NULL, *t = NULL; int r; @@ -582,6 +639,8 @@ _public_ int sd_seat_get_active(const char *seat, char **session, uid_t *uid) { "ACTIVE", &s, "ACTIVE_UID", &t, NULL); + if (r == -ENOENT) + return -ENXIO; if (r < 0) return r; @@ -620,7 +679,8 @@ _public_ int sd_seat_get_sessions(const char *seat, char ***sessions, uid_t **ui "SESSIONS", &s, "ACTIVE_SESSIONS", &t, NULL); - + if (r == -ENOENT) + return -ENXIO; if (r < 0) return r; @@ -652,7 +712,6 @@ _public_ int sd_seat_get_sessions(const char *seat, char ***sessions, uid_t **ui return -ENOMEM; r = parse_uid(k, b + i); - if (r < 0) continue; @@ -683,7 +742,7 @@ static int seat_get_can(const char *seat, const char *variable) { _cleanup_free_ char *p = NULL, *s = NULL; int r; - assert_return(variable, -EINVAL); + assert(variable); r = file_of_seat(seat, &p); if (r < 0) @@ -692,10 +751,12 @@ static int seat_get_can(const char *seat, const char *variable) { r = parse_env_file(p, NEWLINE, variable, &s, NULL); + if (r == -ENOENT) + return -ENXIO; if (r < 0) return r; - if (!s) - return 0; + if (isempty(s)) + return -ENODATA; return parse_boolean(s); } @@ -819,6 +880,8 @@ _public_ int sd_machine_get_class(const char *machine, char **class) { p = strjoina("/run/systemd/machines/", machine); r = parse_env_file(p, NEWLINE, "CLASS", &c, NULL); + if (r == -ENOENT) + return -ENXIO; if (r < 0) return r; if (!c) @@ -842,6 +905,8 @@ _public_ int sd_machine_get_ifindices(const char *machine, int **ifindices) { p = strjoina("/run/systemd/machines/", machine); r = parse_env_file(p, NEWLINE, "NETIF", &netif, NULL); + if (r == -ENOENT) + return -ENXIO; if (r < 0) return r; if (!netif) { diff --git a/src/libsystemd/sd-login/test-login.c b/src/libsystemd/sd-login/test-login.c index ddea7ffa14..f734ce9eee 100644 --- a/src/libsystemd/sd-login/test-login.c +++ b/src/libsystemd/sd-login/test-login.c @@ -33,7 +33,7 @@ static void test_login(void) { _cleanup_free_ char *pp = NULL, *qq = NULL; int r, k; uid_t u, u2; - char *seat, *type, *class, *display, *remote_user, *remote_host, *display_session; + char *seat, *type, *class, *display, *remote_user, *remote_host, *display_session, *cgroup; char *session; char *state; char *session2; @@ -50,9 +50,13 @@ static void test_login(void) { assert_se(sd_pid_get_owner_uid(0, &u2) == 0); printf("user = "UID_FMT"\n", u2); + assert_se(sd_pid_get_cgroup(0, &cgroup) == 0); + printf("cgroup = %s\n", cgroup); + free(cgroup); + display_session = NULL; r = sd_uid_get_display(u2, &display_session); - assert_se(r >= 0 || r == -ENXIO); + assert_se(r >= 0 || r == -ENODATA); printf("user's display session = %s\n", strna(display_session)); free(display_session); @@ -108,19 +112,19 @@ static void test_login(void) { display = NULL; r = sd_session_get_display(session, &display); - assert_se(r >= 0 || r == -ENXIO); + assert_se(r >= 0 || r == -ENODATA); printf("display = %s\n", strna(display)); free(display); remote_user = NULL; r = sd_session_get_remote_user(session, &remote_user); - assert_se(r >= 0 || r == -ENXIO); + assert_se(r >= 0 || r == -ENODATA); printf("remote_user = %s\n", strna(remote_user)); free(remote_user); remote_host = NULL; r = sd_session_get_remote_host(session, &remote_host); - assert_se(r >= 0 || r == -ENXIO); + assert_se(r >= 0 || r == -ENODATA); printf("remote_host = %s\n", strna(remote_host)); free(remote_host); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index a56960506c..1c64c3e771 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -4737,6 +4737,7 @@ static int create_subcgroup(pid_t pid) { _cleanup_free_ char *cgroup = NULL; const char *child; int unified, r; + CGroupMask supported; /* In the unified hierarchy inner nodes may only only contain * subgroups, but not processes. Hence, if we running in the @@ -4756,6 +4757,10 @@ static int create_subcgroup(pid_t pid) { if (unified == 0) return 0; + r = cg_mask_supported(&supported); + if (r < 0) + return log_error_errno(r, "Failed to determine supported controllers: %m"); + r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup); if (r < 0) return log_error_errno(r, "Failed to get our control group: %m"); @@ -4770,6 +4775,8 @@ static int create_subcgroup(pid_t pid) { if (r < 0) return log_error_errno(r, "Failed to create %s subcgroup: %m", child); + /* Try to enable as many controllers as possible for the new payload. */ + (void) cg_enable_everywhere(supported, supported, cgroup); return 0; } diff --git a/src/systemd/sd-login.h b/src/systemd/sd-login.h index 9260396d5d..59c6eedcda 100644 --- a/src/systemd/sd-login.h +++ b/src/systemd/sd-login.h @@ -81,34 +81,42 @@ int sd_pid_get_user_slice(pid_t pid, char **slice); * container. This will return an error for non-machine processes. */ int sd_pid_get_machine_name(pid_t pid, char **machine); -/* Similar to sd_pid_get_session(), but retrieves data about peer of - * connected AF_UNIX socket */ +/* Get the control group from a PID, relative to the root of the + * hierarchy. */ +int sd_pid_get_cgroup(pid_t pid, char **cgroup); + +/* Similar to sd_pid_get_session(), but retrieves data about the peer + * of a connected AF_UNIX socket */ int sd_peer_get_session(int fd, char **session); -/* Similar to sd_pid_get_owner_uid(), but retrieves data about peer of - * connected AF_UNIX socket */ +/* Similar to sd_pid_get_owner_uid(), but retrieves data about the peer of + * a connected AF_UNIX socket */ int sd_peer_get_owner_uid(int fd, uid_t *uid); -/* Similar to sd_pid_get_unit(), but retrieves data about peer of - * connected AF_UNIX socket */ +/* Similar to sd_pid_get_unit(), but retrieves data about the peer of + * a connected AF_UNIX socket */ int sd_peer_get_unit(int fd, char **unit); -/* Similar to sd_pid_get_user_unit(), but retrieves data about peer of - * connected AF_UNIX socket */ +/* Similar to sd_pid_get_user_unit(), but retrieves data about the peer of + * a connected AF_UNIX socket */ int sd_peer_get_user_unit(int fd, char **unit); -/* Similar to sd_pid_get_slice(), but retrieves data about peer of - * connected AF_UNIX socket */ +/* Similar to sd_pid_get_slice(), but retrieves data about the peer of + * a connected AF_UNIX socket */ int sd_peer_get_slice(int fd, char **slice); -/* Similar to sd_pid_get_user_slice(), but retrieves data about peer of - * connected AF_UNIX socket */ +/* Similar to sd_pid_get_user_slice(), but retrieves data about the peer of + * a connected AF_UNIX socket */ int sd_peer_get_user_slice(int fd, char **slice); -/* Similar to sd_pid_get_machine_name(), but retrieves data about peer - * of connected AF_UNIX socket */ +/* Similar to sd_pid_get_machine_name(), but retrieves data about the + * peer of a a connected AF_UNIX socket */ int sd_peer_get_machine_name(int fd, char **machine); +/* Similar to sd_pid_get_cgroup(), but retrieves data about the peer + * of a connected AF_UNIX socket. */ +int sd_peer_get_cgroup(pid_t pid, char **cgroup); + /* Get state from UID. Possible states: offline, lingering, online, active, closing */ int sd_uid_get_state(uid_t uid, char **state); diff --git a/src/test/test-cgroup-util.c b/src/test/test-cgroup-util.c index ecc9d70bf4..ff7e45901c 100644 --- a/src/test/test-cgroup-util.c +++ b/src/test/test-cgroup-util.c @@ -295,6 +295,17 @@ static void test_shift_path(void) { test_shift_path_one("/foobar/waldo", "/fuckfuck", "/foobar/waldo"); } +static void test_mask_supported(void) { + + CGroupMask m; + CGroupController c; + + assert_se(cg_mask_supported(&m) >= 0); + + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) + printf("'%s' is supported: %s\n", cgroup_controller_to_string(c), yes_no(m & CGROUP_CONTROLLER_TO_MASK(c))); +} + int main(void) { test_path_decode_unit(); test_path_get_unit(); @@ -309,6 +320,7 @@ int main(void) { test_controller_is_valid(); test_slice_to_path(); test_shift_path(); + test_mask_supported(); return 0; } diff --git a/src/test/test-util.c b/src/test/test-util.c index dff38ab6f6..8ceb71f22a 100644 --- a/src/test/test-util.c +++ b/src/test/test-util.c @@ -270,6 +270,9 @@ static void test_parse_pid(void) { r = parse_pid("0xFFFFFFFFFFFFFFFFF", &pid); assert_se(r == -ERANGE); assert_se(pid == 65); + + r = parse_pid("junk", &pid); + assert_se(r == -EINVAL); } static void test_parse_uid(void) { @@ -282,6 +285,9 @@ static void test_parse_uid(void) { r = parse_uid("65535", &uid); assert_se(r == -ENXIO); + + r = parse_uid("asdsdas", &uid); + assert_se(r == -EINVAL); } static void test_safe_atou16(void) { |