summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/basic/audit.c9
-rw-r--r--src/basic/cgroup-util.c65
-rw-r--r--src/basic/macro.h3
-rw-r--r--src/basic/util.c25
-rw-r--r--src/basic/util.h5
-rw-r--r--src/core/cgroup.c41
-rw-r--r--src/core/cgroup.h1
-rw-r--r--src/core/killall.c6
-rw-r--r--src/core/manager.c12
-rw-r--r--src/core/unit.c20
-rw-r--r--src/libsystemd/libsystemd.sym6
-rw-r--r--src/libsystemd/sd-bus/bus-creds.c8
-rw-r--r--src/libsystemd/sd-event/sd-event.c430
-rw-r--r--src/libsystemd/sd-event/test-event.c66
-rw-r--r--src/libsystemd/sd-login/sd-login.c213
-rw-r--r--src/libsystemd/sd-login/test-login.c14
-rw-r--r--src/nspawn/nspawn.c7
-rw-r--r--src/systemd/sd-login.h36
-rw-r--r--src/test/test-cgroup-util.c12
-rw-r--r--src/test/test-util.c6
20 files changed, 679 insertions, 306 deletions
diff --git a/src/basic/audit.c b/src/basic/audit.c
index 54148fcf18..1f593aa813 100644
--- a/src/basic/audit.c
+++ b/src/basic/audit.c
@@ -36,6 +36,11 @@ int audit_session_from_pid(pid_t pid, uint32_t *id) {
assert(id);
+ /* We don't convert ENOENT to ESRCH here, since we can't
+ * really distuingish between "audit is not available in the
+ * kernel" and "the process does not exist", both which will
+ * result in ENOENT. */
+
p = procfs_file_alloca(pid, "sessionid");
r = read_one_line_file(p, &s);
@@ -47,7 +52,7 @@ int audit_session_from_pid(pid_t pid, uint32_t *id) {
return r;
if (u == AUDIT_SESSION_INVALID || u <= 0)
- return -ENXIO;
+ return -ENODATA;
*id = u;
return 0;
@@ -68,6 +73,8 @@ int audit_loginuid_from_pid(pid_t pid, uid_t *uid) {
return r;
r = parse_uid(s, &u);
+ if (r == -ENXIO) /* the UID was -1 */
+ return -ENODATA;
if (r < 0)
return r;
diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c
index 0ebe570bb8..a298b29382 100644
--- a/src/basic/cgroup-util.c
+++ b/src/basic/cgroup-util.c
@@ -187,7 +187,7 @@ int cg_kill(const char *controller, const char *path, int sig, bool sigcont, boo
if (ignore_self && pid == my_pid)
continue;
- if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
+ if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
continue;
/* If we haven't killed this process yet, kill
@@ -205,7 +205,7 @@ int cg_kill(const char *controller, const char *path, int sig, bool sigcont, boo
done = false;
- r = set_put(s, LONG_TO_PTR(pid));
+ r = set_put(s, PID_TO_PTR(pid));
if (r < 0) {
if (ret >= 0)
return r;
@@ -318,7 +318,7 @@ int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char
if (ignore_self && pid == my_pid)
continue;
- if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
+ if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
continue;
/* Ignore kernel threads. Since they can only
@@ -338,7 +338,7 @@ int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char
done = false;
- r = set_put(s, LONG_TO_PTR(pid));
+ r = set_put(s, PID_TO_PTR(pid));
if (r < 0) {
if (ret >= 0)
return r;
@@ -460,20 +460,23 @@ static const char *controller_to_dirname(const char *controller) {
return controller;
}
-static int join_path_legacy(const char *controller_dn, const char *path, const char *suffix, char **fs) {
+static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
+ const char *dn;
char *t = NULL;
assert(fs);
- assert(controller_dn);
+ assert(controller);
+
+ dn = controller_to_dirname(controller);
if (isempty(path) && isempty(suffix))
- t = strappend("/sys/fs/cgroup/", controller_dn);
+ t = strappend("/sys/fs/cgroup/", dn);
else if (isempty(path))
- t = strjoin("/sys/fs/cgroup/", controller_dn, "/", suffix, NULL);
+ t = strjoin("/sys/fs/cgroup/", dn, "/", suffix, NULL);
else if (isempty(suffix))
- t = strjoin("/sys/fs/cgroup/", controller_dn, "/", path, NULL);
+ t = strjoin("/sys/fs/cgroup/", dn, "/", path, NULL);
else
- t = strjoin("/sys/fs/cgroup/", controller_dn, "/", path, "/", suffix, NULL);
+ t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix, NULL);
if (!t)
return -ENOMEM;
@@ -509,15 +512,15 @@ int cg_get_path(const char *controller, const char *path, const char *suffix, ch
if (!controller) {
char *t;
- /* If no controller is specified, we assume only the
- * path below the controller matters */
+ /* If no controller is specified, we return the path
+ * *below* the controllers, without any prefix. */
if (!path && !suffix)
return -EINVAL;
- if (isempty(suffix))
+ if (!suffix)
t = strdup(path);
- else if (isempty(path))
+ else if (!path)
t = strdup(suffix);
else
t = strjoin(path, "/", suffix, NULL);
@@ -537,14 +540,8 @@ int cg_get_path(const char *controller, const char *path, const char *suffix, ch
if (unified > 0)
r = join_path_unified(path, suffix, fs);
- else {
- const char *dn;
-
- dn = controller_to_dirname(controller);
-
- r = join_path_legacy(dn, path, suffix, fs);
- }
-
+ else
+ r = join_path_legacy(controller, path, suffix, fs);
if (r < 0)
return r;
@@ -873,7 +870,7 @@ int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
return 0;
}
- return -ENOENT;
+ return -ENODATA;
}
int cg_install_release_agent(const char *controller, const char *agent) {
@@ -902,7 +899,7 @@ int cg_install_release_agent(const char *controller, const char *agent) {
r = write_string_file(fs, agent, 0);
if (r < 0)
return r;
- } else if (!streq(sc, agent))
+ } else if (!path_equal(sc, agent))
return -EEXIST;
fs = mfree(fs);
@@ -1005,6 +1002,8 @@ int cg_is_empty_recursive(const char *controller, const char *path) {
return r;
r = read_one_line_file(populated, &t);
+ if (r == -ENOENT)
+ return 1;
if (r < 0)
return r;
@@ -1898,7 +1897,7 @@ int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids,
int r = 0;
SET_FOREACH(pidp, pids, i) {
- pid_t pid = PTR_TO_LONG(pidp);
+ pid_t pid = PTR_TO_PID(pidp);
int q;
q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
@@ -1911,7 +1910,7 @@ int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids,
int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
CGroupController c;
- int r, unified;
+ int r = 0, unified;
if (!path_equal(from, to)) {
r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true);
@@ -1982,14 +1981,22 @@ int cg_mask_supported(CGroupMask *ret) {
if (unified < 0)
return unified;
if (unified > 0) {
- _cleanup_free_ char *controllers = NULL;
+ _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
const char *c;
/* In the unified hierarchy we can read the supported
* and accessible controllers from a the top-level
* cgroup attribute */
- r = read_one_line_file("/sys/fs/cgroup/cgroup.controllers", &controllers);
+ r = cg_get_root_path(&root);
+ if (r < 0)
+ return r;
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
+ if (r < 0)
+ return r;
+
+ r = read_one_line_file(path, &controllers);
if (r < 0)
return r;
@@ -2156,7 +2163,7 @@ int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
r = write_string_file(fs, s, 0);
if (r < 0)
- log_warning_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
+ log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
}
}
diff --git a/src/basic/macro.h b/src/basic/macro.h
index 627d768b76..cbc3ca97b8 100644
--- a/src/basic/macro.h
+++ b/src/basic/macro.h
@@ -298,6 +298,9 @@ static inline unsigned long ALIGN_POWER2(unsigned long u) {
#define PTR_TO_GID(p) ((gid_t) (((uintptr_t) (p))-1))
#define GID_TO_PTR(u) ((void*) (((uintptr_t) (u))+1))
+#define PTR_TO_PID(p) ((pid_t) ((uintptr_t) p))
+#define PID_TO_PTR(p) ((void*) ((uintptr_t) p))
+
#define memzero(x,l) (memset((x), 0, (l)))
#define zero(x) (memzero(&(x), sizeof(x)))
diff --git a/src/basic/util.c b/src/basic/util.c
index f01f5f237b..86aacad307 100644
--- a/src/basic/util.c
+++ b/src/basic/util.c
@@ -373,6 +373,19 @@ int parse_pid(const char *s, pid_t* ret_pid) {
return 0;
}
+bool uid_is_valid(uid_t uid) {
+
+ /* Some libc APIs use UID_INVALID as special placeholder */
+ if (uid == (uid_t) 0xFFFFFFFF)
+ return false;
+
+ /* A long time ago UIDs where 16bit, hence explicitly avoid the 16bit -1 too */
+ if (uid == (uid_t) 0xFFFF)
+ return false;
+
+ return true;
+}
+
int parse_uid(const char *s, uid_t* ret_uid) {
unsigned long ul = 0;
uid_t uid;
@@ -389,13 +402,11 @@ int parse_uid(const char *s, uid_t* ret_uid) {
if ((unsigned long) uid != ul)
return -ERANGE;
- /* Some libc APIs use UID_INVALID as special placeholder */
- if (uid == (uid_t) 0xFFFFFFFF)
- return -ENXIO;
-
- /* A long time ago UIDs where 16bit, hence explicitly avoid the 16bit -1 too */
- if (uid == (uid_t) 0xFFFF)
- return -ENXIO;
+ if (!uid_is_valid(uid))
+ return -ENXIO; /* we return ENXIO instead of EINVAL
+ * here, to make it easy to distuingish
+ * invalid numeric uids invalid
+ * strings. */
if (ret_uid)
*ret_uid = uid;
diff --git a/src/basic/util.h b/src/basic/util.h
index ff7a00e928..f8e32360f0 100644
--- a/src/basic/util.h
+++ b/src/basic/util.h
@@ -154,7 +154,10 @@ int parse_size(const char *t, off_t base, off_t *size);
int parse_boolean(const char *v) _pure_;
int parse_pid(const char *s, pid_t* ret_pid);
int parse_uid(const char *s, uid_t* ret_uid);
-#define parse_gid(s, ret_uid) parse_uid(s, ret_uid)
+#define parse_gid(s, ret_gid) parse_uid(s, ret_gid)
+
+bool uid_is_valid(uid_t uid);
+#define gid_is_valid(gid) uid_is_valid(gid)
int safe_atou(const char *s, unsigned *ret_u);
int safe_atoi(const char *s, int *ret_i);
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 1e78f871c7..9a025cf929 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -507,15 +507,20 @@ CGroupMask unit_get_own_mask(Unit *u) {
return 0;
/* If delegation is turned on, then turn on all cgroups,
- * unless the process we fork into it is known to drop
- * privileges anyway, and shouldn't get access to the
- * controllers anyway. */
+ * unless we are on the legacy hierarchy and the process we
+ * fork into it is known to drop privileges, and hence
+ * shouldn't get access to the controllers.
+ *
+ * Note that on the unified hierarchy it is safe to delegate
+ * controllers to unprivileged services. */
if (c->delegate) {
ExecContext *e;
e = unit_get_exec_context(u);
- if (!e || exec_context_maintains_privileges(e))
+ if (!e ||
+ exec_context_maintains_privileges(e) ||
+ cg_unified() > 0)
return _CGROUP_MASK_ALL;
}
@@ -1378,9 +1383,8 @@ Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
}
}
-Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
+Unit *manager_get_unit_by_pid_cgroup(Manager *m, pid_t pid) {
_cleanup_free_ char *cgroup = NULL;
- Unit *u;
int r;
assert(m);
@@ -1388,22 +1392,33 @@ Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
if (pid <= 0)
return NULL;
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
+ if (r < 0)
+ return NULL;
+
+ return manager_get_unit_by_cgroup(m, cgroup);
+}
+
+Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
+ Unit *u;
+
+ assert(m);
+
+ if (pid <= 0)
+ return NULL;
+
if (pid == 1)
return hashmap_get(m->units, SPECIAL_INIT_SCOPE);
- u = hashmap_get(m->watch_pids1, LONG_TO_PTR(pid));
+ u = hashmap_get(m->watch_pids1, PID_TO_PTR(pid));
if (u)
return u;
- u = hashmap_get(m->watch_pids2, LONG_TO_PTR(pid));
+ u = hashmap_get(m->watch_pids2, PID_TO_PTR(pid));
if (u)
return u;
- r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
- if (r < 0)
- return NULL;
-
- return manager_get_unit_by_cgroup(m, cgroup);
+ return manager_get_unit_by_pid_cgroup(m, pid);
}
int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index 1ce21f43f2..438f5bf50f 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -130,6 +130,7 @@ void manager_shutdown_cgroup(Manager *m, bool delete);
unsigned manager_dispatch_cgroup_queue(Manager *m);
Unit *manager_get_unit_by_cgroup(Manager *m, const char *cgroup);
+Unit *manager_get_unit_by_pid_cgroup(Manager *m, pid_t pid);
Unit* manager_get_unit_by_pid(Manager *m, pid_t pid);
int unit_search_main_pid(Unit *u, pid_t *ret);
diff --git a/src/core/killall.c b/src/core/killall.c
index 2a9d72c901..ee5d388560 100644
--- a/src/core/killall.c
+++ b/src/core/killall.c
@@ -108,7 +108,7 @@ static void wait_for_children(Set *pids, sigset_t *mask) {
return;
}
- set_remove(pids, ULONG_TO_PTR(pid));
+ (void) set_remove(pids, PID_TO_PTR(pid));
}
/* Now explicitly check who might be remaining, who
@@ -117,7 +117,7 @@ static void wait_for_children(Set *pids, sigset_t *mask) {
/* We misuse getpgid as a check whether a
* process still exists. */
- if (getpgid((pid_t) PTR_TO_ULONG(p)) >= 0)
+ if (getpgid(PTR_TO_PID(p)) >= 0)
continue;
if (errno != ESRCH)
@@ -179,7 +179,7 @@ static int killall(int sig, Set *pids, bool send_sighup) {
if (kill(pid, sig) >= 0) {
if (pids) {
- r = set_put(pids, ULONG_TO_PTR(pid));
+ r = set_put(pids, PID_TO_PTR(pid));
if (r < 0)
log_oom();
}
diff --git a/src/core/manager.c b/src/core/manager.c
index c3327e37f5..fc10ddb5d9 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -1585,19 +1585,19 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
/* Notify every unit that might be interested, but try
* to avoid notifying the same one multiple times. */
- u1 = manager_get_unit_by_pid(m, ucred->pid);
+ u1 = manager_get_unit_by_pid_cgroup(m, ucred->pid);
if (u1) {
manager_invoke_notify_message(m, u1, ucred->pid, buf, n, fds);
found = true;
}
- u2 = hashmap_get(m->watch_pids1, LONG_TO_PTR(ucred->pid));
+ u2 = hashmap_get(m->watch_pids1, PID_TO_PTR(ucred->pid));
if (u2 && u2 != u1) {
manager_invoke_notify_message(m, u2, ucred->pid, buf, n, fds);
found = true;
}
- u3 = hashmap_get(m->watch_pids2, LONG_TO_PTR(ucred->pid));
+ u3 = hashmap_get(m->watch_pids2, PID_TO_PTR(ucred->pid));
if (u3 && u3 != u2 && u3 != u1) {
manager_invoke_notify_message(m, u3, ucred->pid, buf, n, fds);
found = true;
@@ -1663,13 +1663,13 @@ static int manager_dispatch_sigchld(Manager *m) {
/* And now figure out the unit this belongs
* to, it might be multiple... */
- u1 = manager_get_unit_by_pid(m, si.si_pid);
+ u1 = manager_get_unit_by_pid_cgroup(m, si.si_pid);
if (u1)
invoke_sigchld_event(m, u1, &si);
- u2 = hashmap_get(m->watch_pids1, LONG_TO_PTR(si.si_pid));
+ u2 = hashmap_get(m->watch_pids1, PID_TO_PTR(si.si_pid));
if (u2 && u2 != u1)
invoke_sigchld_event(m, u2, &si);
- u3 = hashmap_get(m->watch_pids2, LONG_TO_PTR(si.si_pid));
+ u3 = hashmap_get(m->watch_pids2, PID_TO_PTR(si.si_pid));
if (u3 && u3 != u2 && u3 != u1)
invoke_sigchld_event(m, u3, &si);
}
diff --git a/src/core/unit.c b/src/core/unit.c
index 8c07c6140d..a5714adf38 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -1995,16 +1995,16 @@ int unit_watch_pid(Unit *u, pid_t pid) {
if (r < 0)
return r;
- r = hashmap_put(u->manager->watch_pids1, LONG_TO_PTR(pid), u);
+ r = hashmap_put(u->manager->watch_pids1, PID_TO_PTR(pid), u);
if (r == -EEXIST) {
r = hashmap_ensure_allocated(&u->manager->watch_pids2, NULL);
if (r < 0)
return r;
- r = hashmap_put(u->manager->watch_pids2, LONG_TO_PTR(pid), u);
+ r = hashmap_put(u->manager->watch_pids2, PID_TO_PTR(pid), u);
}
- q = set_put(u->pids, LONG_TO_PTR(pid));
+ q = set_put(u->pids, PID_TO_PTR(pid));
if (q < 0)
return q;
@@ -2015,16 +2015,16 @@ void unit_unwatch_pid(Unit *u, pid_t pid) {
assert(u);
assert(pid >= 1);
- (void) hashmap_remove_value(u->manager->watch_pids1, LONG_TO_PTR(pid), u);
- (void) hashmap_remove_value(u->manager->watch_pids2, LONG_TO_PTR(pid), u);
- (void) set_remove(u->pids, LONG_TO_PTR(pid));
+ (void) hashmap_remove_value(u->manager->watch_pids1, PID_TO_PTR(pid), u);
+ (void) hashmap_remove_value(u->manager->watch_pids2, PID_TO_PTR(pid), u);
+ (void) set_remove(u->pids, PID_TO_PTR(pid));
}
void unit_unwatch_all_pids(Unit *u) {
assert(u);
while (!set_isempty(u->pids))
- unit_unwatch_pid(u, PTR_TO_LONG(set_first(u->pids)));
+ unit_unwatch_pid(u, PTR_TO_PID(set_first(u->pids)));
u->pids = set_free(u->pids);
}
@@ -2038,7 +2038,7 @@ void unit_tidy_watch_pids(Unit *u, pid_t except1, pid_t except2) {
/* Cleans dead PIDs from our list */
SET_FOREACH(e, u->pids, i) {
- pid_t pid = PTR_TO_LONG(e);
+ pid_t pid = PTR_TO_PID(e);
if (pid == except1 || pid == except2)
continue;
@@ -2993,13 +2993,13 @@ static Set *unit_pid_set(pid_t main_pid, pid_t control_pid) {
/* Exclude the main/control pids from being killed via the cgroup */
if (main_pid > 0) {
- r = set_put(pid_set, LONG_TO_PTR(main_pid));
+ r = set_put(pid_set, PID_TO_PTR(main_pid));
if (r < 0)
goto fail;
}
if (control_pid > 0) {
- r = set_put(pid_set, LONG_TO_PTR(control_pid));
+ r = set_put(pid_set, PID_TO_PTR(control_pid));
if (r < 0)
goto fail;
}
diff --git a/src/libsystemd/libsystemd.sym b/src/libsystemd/libsystemd.sym
index 7bf1d66dde..d5ad127bcb 100644
--- a/src/libsystemd/libsystemd.sym
+++ b/src/libsystemd/libsystemd.sym
@@ -467,3 +467,9 @@ global:
sd_bus_emit_object_removed;
sd_bus_flush_close_unref;
} LIBSYSTEMD_221;
+
+LIBSYSTEMD_226 {
+global:
+ sd_pid_get_cgroup;
+ sd_peer_get_cgroup;
+} LIBSYSTEMD_222;
diff --git a/src/libsystemd/sd-bus/bus-creds.c b/src/libsystemd/sd-bus/bus-creds.c
index 1c365b7fcd..c3cc2b7212 100644
--- a/src/libsystemd/sd-bus/bus-creds.c
+++ b/src/libsystemd/sd-bus/bus-creds.c
@@ -1062,8 +1062,8 @@ int bus_creds_add_more(sd_bus_creds *c, uint64_t mask, pid_t pid, pid_t tid) {
if (missing & SD_BUS_CREDS_AUDIT_SESSION_ID) {
r = audit_session_from_pid(pid, &c->audit_session_id);
- if (r == -ENXIO) {
- /* ENXIO means: no audit session id assigned */
+ if (r == -ENODATA) {
+ /* ENODATA means: no audit session id assigned */
c->audit_session_id = AUDIT_SESSION_INVALID;
c->mask |= SD_BUS_CREDS_AUDIT_SESSION_ID;
} else if (r < 0) {
@@ -1075,8 +1075,8 @@ int bus_creds_add_more(sd_bus_creds *c, uint64_t mask, pid_t pid, pid_t tid) {
if (missing & SD_BUS_CREDS_AUDIT_LOGIN_UID) {
r = audit_loginuid_from_pid(pid, &c->audit_login_uid);
- if (r == -ENXIO) {
- /* ENXIO means: no audit login uid assigned */
+ if (r == -ENODATA) {
+ /* ENODATA means: no audit login uid assigned */
c->audit_login_uid = UID_INVALID;
c->mask |= SD_BUS_CREDS_AUDIT_LOGIN_UID;
} else if (r < 0) {
diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c
index c419be820a..838ee4d454 100644
--- a/src/libsystemd/sd-event/sd-event.c
+++ b/src/libsystemd/sd-event/sd-event.c
@@ -56,9 +56,22 @@ typedef enum EventSourceType {
_SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
} EventSourceType;
+/* All objects we use in epoll events start with this value, so that
+ * we know how to dispatch it */
+typedef enum WakeupType {
+ WAKEUP_NONE,
+ WAKEUP_EVENT_SOURCE,
+ WAKEUP_CLOCK_DATA,
+ WAKEUP_SIGNAL_DATA,
+ _WAKEUP_TYPE_MAX,
+ _WAKEUP_TYPE_INVALID = -1,
+} WakeupType;
+
#define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
struct sd_event_source {
+ WakeupType wakeup;
+
unsigned n_ref;
sd_event *event;
@@ -120,6 +133,7 @@ struct sd_event_source {
};
struct clock_data {
+ WakeupType wakeup;
int fd;
/* For all clocks we maintain two priority queues each, one
@@ -136,11 +150,23 @@ struct clock_data {
bool needs_rearm:1;
};
+struct signal_data {
+ WakeupType wakeup;
+
+ /* For each priority we maintain one signal fd, so that we
+ * only have to dequeue a single event per priority at a
+ * time. */
+
+ int fd;
+ int64_t priority;
+ sigset_t sigset;
+ sd_event_source *current;
+};
+
struct sd_event {
unsigned n_ref;
int epoll_fd;
- int signal_fd;
int watchdog_fd;
Prioq *pending;
@@ -157,8 +183,8 @@ struct sd_event {
usec_t perturb;
- sigset_t sigset;
- sd_event_source **signal_sources;
+ sd_event_source **signal_sources; /* indexed by signal number */
+ Hashmap *signal_data; /* indexed by priority */
Hashmap *child_sources;
unsigned n_enabled_child_sources;
@@ -355,6 +381,7 @@ static int exit_prioq_compare(const void *a, const void *b) {
static void free_clock_data(struct clock_data *d) {
assert(d);
+ assert(d->wakeup == WAKEUP_CLOCK_DATA);
safe_close(d->fd);
prioq_free(d->earliest);
@@ -378,7 +405,6 @@ static void event_free(sd_event *e) {
*(e->default_event_ptr) = NULL;
safe_close(e->epoll_fd);
- safe_close(e->signal_fd);
safe_close(e->watchdog_fd);
free_clock_data(&e->realtime);
@@ -392,6 +418,7 @@ static void event_free(sd_event *e) {
prioq_free(e->exit);
free(e->signal_sources);
+ hashmap_free(e->signal_data);
hashmap_free(e->child_sources);
set_free(e->post_sources);
@@ -409,13 +436,12 @@ _public_ int sd_event_new(sd_event** ret) {
return -ENOMEM;
e->n_ref = 1;
- e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
+ e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
+ e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA;
e->original_pid = getpid();
e->perturb = USEC_INFINITY;
- assert_se(sigemptyset(&e->sigset) == 0);
-
e->pending = prioq_new(pending_prioq_compare);
if (!e->pending) {
r = -ENOMEM;
@@ -509,7 +535,6 @@ static int source_io_register(
r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
else
r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
-
if (r < 0)
return -errno;
@@ -591,45 +616,171 @@ static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
}
}
-static bool need_signal(sd_event *e, int signal) {
- return (e->signal_sources && e->signal_sources[signal] &&
- e->signal_sources[signal]->enabled != SD_EVENT_OFF)
- ||
- (signal == SIGCHLD &&
- e->n_enabled_child_sources > 0);
-}
+static int event_make_signal_data(
+ sd_event *e,
+ int sig,
+ struct signal_data **ret) {
-static int event_update_signal_fd(sd_event *e) {
struct epoll_event ev = {};
- bool add_to_epoll;
+ struct signal_data *d;
+ bool added = false;
+ sigset_t ss_copy;
+ int64_t priority;
int r;
assert(e);
if (event_pid_changed(e))
- return 0;
+ return -ECHILD;
- add_to_epoll = e->signal_fd < 0;
+ if (e->signal_sources && e->signal_sources[sig])
+ priority = e->signal_sources[sig]->priority;
+ else
+ priority = 0;
- r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
- if (r < 0)
- return -errno;
+ d = hashmap_get(e->signal_data, &priority);
+ if (d) {
+ if (sigismember(&d->sigset, sig) > 0) {
+ if (ret)
+ *ret = d;
+ return 0;
+ }
+ } else {
+ r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
+ if (r < 0)
+ return r;
+
+ d = new0(struct signal_data, 1);
+ if (!d)
+ return -ENOMEM;
+
+ d->wakeup = WAKEUP_SIGNAL_DATA;
+ d->fd = -1;
+ d->priority = priority;
+
+ r = hashmap_put(e->signal_data, &d->priority, d);
+ if (r < 0)
+ return r;
- e->signal_fd = r;
+ added = true;
+ }
+
+ ss_copy = d->sigset;
+ assert_se(sigaddset(&ss_copy, sig) >= 0);
+
+ r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
+ if (r < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ d->sigset = ss_copy;
- if (!add_to_epoll)
+ if (d->fd >= 0) {
+ if (ret)
+ *ret = d;
return 0;
+ }
+
+ d->fd = r;
ev.events = EPOLLIN;
- ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
+ ev.data.ptr = d;
- r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
- if (r < 0) {
- e->signal_fd = safe_close(e->signal_fd);
- return -errno;
+ r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
+ if (r < 0) {
+ r = -errno;
+ goto fail;
}
+ if (ret)
+ *ret = d;
+
return 0;
+
+fail:
+ if (added) {
+ d->fd = safe_close(d->fd);
+ hashmap_remove(e->signal_data, &d->priority);
+ free(d);
+ }
+
+ return r;
+}
+
+static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
+ assert(e);
+ assert(d);
+
+ /* Turns off the specified signal in the signal data
+ * object. If the signal mask of the object becomes empty that
+ * way removes it. */
+
+ if (sigismember(&d->sigset, sig) == 0)
+ return;
+
+ assert_se(sigdelset(&d->sigset, sig) >= 0);
+
+ if (sigisemptyset(&d->sigset)) {
+
+ /* If all the mask is all-zero we can get rid of the structure */
+ hashmap_remove(e->signal_data, &d->priority);
+ assert(!d->current);
+ safe_close(d->fd);
+ free(d);
+ return;
+ }
+
+ assert(d->fd >= 0);
+
+ if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
+ log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
+}
+
+static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
+ struct signal_data *d;
+ static const int64_t zero_priority = 0;
+
+ assert(e);
+
+ /* Rechecks if the specified signal is still something we are
+ * interested in. If not, we'll unmask it, and possibly drop
+ * the signalfd for it. */
+
+ if (sig == SIGCHLD &&
+ e->n_enabled_child_sources > 0)
+ return;
+
+ if (e->signal_sources &&
+ e->signal_sources[sig] &&
+ e->signal_sources[sig]->enabled != SD_EVENT_OFF)
+ return;
+
+ /*
+ * The specified signal might be enabled in three different queues:
+ *
+ * 1) the one that belongs to the priority passed (if it is non-NULL)
+ * 2) the one that belongs to the priority of the event source of the signal (if there is one)
+ * 3) the 0 priority (to cover the SIGCHLD case)
+ *
+ * Hence, let's remove it from all three here.
+ */
+
+ if (priority) {
+ d = hashmap_get(e->signal_data, priority);
+ if (d)
+ event_unmask_signal_data(e, d, sig);
+ }
+
+ if (e->signal_sources && e->signal_sources[sig]) {
+ d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
+ if (d)
+ event_unmask_signal_data(e, d, sig);
+ }
+
+ d = hashmap_get(e->signal_data, &zero_priority);
+ if (d)
+ event_unmask_signal_data(e, d, sig);
}
static void source_disconnect(sd_event_source *s) {
@@ -668,17 +819,11 @@ static void source_disconnect(sd_event_source *s) {
case SOURCE_SIGNAL:
if (s->signal.sig > 0) {
+
if (s->event->signal_sources)
s->event->signal_sources[s->signal.sig] = NULL;
- /* If the signal was on and now it is off... */
- if (s->enabled != SD_EVENT_OFF && !need_signal(s->event, s->signal.sig)) {
- assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
-
- (void) event_update_signal_fd(s->event);
- /* If disabling failed, we might get a spurious event,
- * but otherwise nothing bad should happen. */
- }
+ event_gc_signal_data(s->event, &s->priority, s->signal.sig);
}
break;
@@ -688,18 +833,10 @@ static void source_disconnect(sd_event_source *s) {
if (s->enabled != SD_EVENT_OFF) {
assert(s->event->n_enabled_child_sources > 0);
s->event->n_enabled_child_sources--;
-
- /* We know the signal was on, if it is off now... */
- if (!need_signal(s->event, SIGCHLD)) {
- assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
-
- (void) event_update_signal_fd(s->event);
- /* If disabling failed, we might get a spurious event,
- * but otherwise nothing bad should happen. */
- }
}
- hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
+ (void) hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
+ event_gc_signal_data(s->event, &s->priority, SIGCHLD);
}
break;
@@ -778,6 +915,14 @@ static int source_set_pending(sd_event_source *s, bool b) {
d->needs_rearm = true;
}
+ if (s->type == SOURCE_SIGNAL && !b) {
+ struct signal_data *d;
+
+ d = hashmap_get(s->event->signal_data, &s->priority);
+ if (d && d->current == s)
+ d->current = NULL;
+ }
+
return 0;
}
@@ -827,6 +972,7 @@ _public_ int sd_event_add_io(
if (!s)
return -ENOMEM;
+ s->wakeup = WAKEUP_EVENT_SOURCE;
s->io.fd = fd;
s->io.events = events;
s->io.callback = callback;
@@ -883,7 +1029,7 @@ static int event_setup_timer_fd(
return -errno;
ev.events = EPOLLIN;
- ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
+ ev.data.ptr = d;
r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
if (r < 0) {
@@ -993,9 +1139,9 @@ _public_ int sd_event_add_signal(
void *userdata) {
sd_event_source *s;
+ struct signal_data *d;
sigset_t ss;
int r;
- bool previous;
assert_return(e, -EINVAL);
assert_return(sig > 0, -EINVAL);
@@ -1020,8 +1166,6 @@ _public_ int sd_event_add_signal(
} else if (e->signal_sources[sig])
return -EBUSY;
- previous = need_signal(e, sig);
-
s = source_new(e, !ret, SOURCE_SIGNAL);
if (!s)
return -ENOMEM;
@@ -1033,14 +1177,10 @@ _public_ int sd_event_add_signal(
e->signal_sources[sig] = s;
- if (!previous) {
- assert_se(sigaddset(&e->sigset, sig) == 0);
-
- r = event_update_signal_fd(e);
- if (r < 0) {
- source_free(s);
- return r;
- }
+ r = event_make_signal_data(e, sig, &d);
+ if (r < 0) {
+ source_free(s);
+ return r;
}
/* Use the signal name as description for the event source by default */
@@ -1062,7 +1202,6 @@ _public_ int sd_event_add_child(
sd_event_source *s;
int r;
- bool previous;
assert_return(e, -EINVAL);
assert_return(pid > 1, -EINVAL);
@@ -1079,8 +1218,6 @@ _public_ int sd_event_add_child(
if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
return -EBUSY;
- previous = need_signal(e, SIGCHLD);
-
s = source_new(e, !ret, SOURCE_CHILD);
if (!s)
return -ENOMEM;
@@ -1099,14 +1236,11 @@ _public_ int sd_event_add_child(
e->n_enabled_child_sources ++;
- if (!previous) {
- assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
-
- r = event_update_signal_fd(e);
- if (r < 0) {
- source_free(s);
- return r;
- }
+ r = event_make_signal_data(e, SIGCHLD, NULL);
+ if (r < 0) {
+ e->n_enabled_child_sources--;
+ source_free(s);
+ return r;
}
e->need_process_child = true;
@@ -1406,6 +1540,8 @@ _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority)
}
_public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
+ int r;
+
assert_return(s, -EINVAL);
assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
assert_return(!event_pid_changed(s->event), -ECHILD);
@@ -1413,7 +1549,25 @@ _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority)
if (s->priority == priority)
return 0;
- s->priority = priority;
+ if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
+ struct signal_data *old, *d;
+
+ /* Move us from the signalfd belonging to the old
+ * priority to the signalfd of the new priority */
+
+ assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
+
+ s->priority = priority;
+
+ r = event_make_signal_data(s->event, s->signal.sig, &d);
+ if (r < 0) {
+ s->priority = old->priority;
+ return r;
+ }
+
+ event_unmask_signal_data(s->event, old, s->signal.sig);
+ } else
+ s->priority = priority;
if (s->pending)
prioq_reshuffle(s->event->pending, s, &s->pending_index);
@@ -1478,34 +1632,18 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
}
case SOURCE_SIGNAL:
- assert(need_signal(s->event, s->signal.sig));
-
s->enabled = m;
- if (!need_signal(s->event, s->signal.sig)) {
- assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
-
- (void) event_update_signal_fd(s->event);
- /* If disabling failed, we might get a spurious event,
- * but otherwise nothing bad should happen. */
- }
-
+ event_gc_signal_data(s->event, &s->priority, s->signal.sig);
break;
case SOURCE_CHILD:
- assert(need_signal(s->event, SIGCHLD));
-
s->enabled = m;
assert(s->event->n_enabled_child_sources > 0);
s->event->n_enabled_child_sources--;
- if (!need_signal(s->event, SIGCHLD)) {
- assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
-
- (void) event_update_signal_fd(s->event);
- }
-
+ event_gc_signal_data(s->event, &s->priority, SIGCHLD);
break;
case SOURCE_EXIT:
@@ -1551,37 +1689,33 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
}
case SOURCE_SIGNAL:
- /* Check status before enabling. */
- if (!need_signal(s->event, s->signal.sig)) {
- assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
-
- r = event_update_signal_fd(s->event);
- if (r < 0) {
- s->enabled = SD_EVENT_OFF;
- return r;
- }
- }
s->enabled = m;
+
+ r = event_make_signal_data(s->event, s->signal.sig, NULL);
+ if (r < 0) {
+ s->enabled = SD_EVENT_OFF;
+ event_gc_signal_data(s->event, &s->priority, s->signal.sig);
+ return r;
+ }
+
break;
case SOURCE_CHILD:
- /* Check status before enabling. */
- if (s->enabled == SD_EVENT_OFF) {
- if (!need_signal(s->event, SIGCHLD)) {
- assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
-
- r = event_update_signal_fd(s->event);
- if (r < 0) {
- s->enabled = SD_EVENT_OFF;
- return r;
- }
- }
+ if (s->enabled == SD_EVENT_OFF)
s->event->n_enabled_child_sources++;
- }
s->enabled = m;
+
+ r = event_make_signal_data(s->event, s->signal.sig, SIGCHLD);
+ if (r < 0) {
+ s->enabled = SD_EVENT_OFF;
+ s->event->n_enabled_child_sources--;
+ event_gc_signal_data(s->event, &s->priority, SIGCHLD);
+ return r;
+ }
+
break;
case SOURCE_EXIT:
@@ -2025,20 +2159,35 @@ static int process_child(sd_event *e) {
return 0;
}
-static int process_signal(sd_event *e, uint32_t events) {
+static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
bool read_one = false;
int r;
assert(e);
-
assert_return(events == EPOLLIN, -EIO);
+ /* If there's a signal queued on this priority and SIGCHLD is
+ on this priority too, then make sure to recheck the
+ children we watch. This is because we only ever dequeue
+ the first signal per priority, and if we dequeue one, and
+ SIGCHLD might be enqueued later we wouldn't know, but we
+ might have higher priority children we care about hence we
+ need to check that explicitly. */
+
+ if (sigismember(&d->sigset, SIGCHLD))
+ e->need_process_child = true;
+
+ /* If there's already an event source pending for this
+ * priority we don't read another */
+ if (d->current)
+ return 0;
+
for (;;) {
struct signalfd_siginfo si;
ssize_t n;
sd_event_source *s = NULL;
- n = read(e->signal_fd, &si, sizeof(si));
+ n = read(d->fd, &si, sizeof(si));
if (n < 0) {
if (errno == EAGAIN || errno == EINTR)
return read_one;
@@ -2053,24 +2202,21 @@ static int process_signal(sd_event *e, uint32_t events) {
read_one = true;
- if (si.ssi_signo == SIGCHLD) {
- r = process_child(e);
- if (r < 0)
- return r;
- if (r > 0)
- continue;
- }
-
if (e->signal_sources)
s = e->signal_sources[si.ssi_signo];
-
if (!s)
continue;
+ if (s->pending)
+ continue;
s->signal.siginfo = si;
+ d->current = s;
+
r = source_set_pending(s, true);
if (r < 0)
return r;
+
+ return 1;
}
}
@@ -2388,23 +2534,31 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
for (i = 0; i < m; i++) {
- if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
- r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
- else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME))
- r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next);
- else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
- r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
- else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
- r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
- else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
- r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
- else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
- r = process_signal(e, ev_queue[i].events);
- else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
+ if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
- else
- r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
+ else {
+ WakeupType *t = ev_queue[i].data.ptr;
+
+ switch (*t) {
+
+ case WAKEUP_EVENT_SOURCE:
+ r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
+ break;
+ case WAKEUP_CLOCK_DATA: {
+ struct clock_data *d = ev_queue[i].data.ptr;
+ r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
+ break;
+ }
+
+ case WAKEUP_SIGNAL_DATA:
+ r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
+ break;
+
+ default:
+ assert_not_reached("Invalid wake-up pointer");
+ }
+ }
if (r < 0)
goto finish;
}
diff --git a/src/libsystemd/sd-event/test-event.c b/src/libsystemd/sd-event/test-event.c
index 408e1679a2..c092e56b7a 100644
--- a/src/libsystemd/sd-event/test-event.c
+++ b/src/libsystemd/sd-event/test-event.c
@@ -156,7 +156,7 @@ static int exit_handler(sd_event_source *s, void *userdata) {
return 3;
}
-int main(int argc, char *argv[]) {
+static void test_basic(void) {
sd_event *e = NULL;
sd_event_source *w = NULL, *x = NULL, *y = NULL, *z = NULL, *q = NULL, *t = NULL;
static const char ch = 'x';
@@ -244,6 +244,70 @@ int main(int argc, char *argv[]) {
safe_close_pair(b);
safe_close_pair(d);
safe_close_pair(k);
+}
+
+static int last_rtqueue_sigval = 0;
+static int n_rtqueue = 0;
+
+static int rtqueue_handler(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ last_rtqueue_sigval = si->ssi_int;
+ n_rtqueue ++;
+ return 0;
+}
+
+static void test_rtqueue(void) {
+ sd_event_source *u = NULL, *v = NULL, *s = NULL;
+ sd_event *e = NULL;
+
+ assert_se(sd_event_default(&e) >= 0);
+
+ assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGRTMIN+2, SIGRTMIN+3, SIGUSR2, -1) >= 0);
+ assert_se(sd_event_add_signal(e, &u, SIGRTMIN+2, rtqueue_handler, NULL) >= 0);
+ assert_se(sd_event_add_signal(e, &v, SIGRTMIN+3, rtqueue_handler, NULL) >= 0);
+ assert_se(sd_event_add_signal(e, &s, SIGUSR2, rtqueue_handler, NULL) >= 0);
+
+ assert_se(sd_event_source_set_priority(v, -10) >= 0);
+
+ assert(sigqueue(getpid(), SIGRTMIN+2, (union sigval) { .sival_int = 1 }) >= 0);
+ assert(sigqueue(getpid(), SIGRTMIN+3, (union sigval) { .sival_int = 2 }) >= 0);
+ assert(sigqueue(getpid(), SIGUSR2, (union sigval) { .sival_int = 3 }) >= 0);
+ assert(sigqueue(getpid(), SIGRTMIN+3, (union sigval) { .sival_int = 4 }) >= 0);
+ assert(sigqueue(getpid(), SIGUSR2, (union sigval) { .sival_int = 5 }) >= 0);
+
+ assert_se(n_rtqueue == 0);
+ assert_se(last_rtqueue_sigval == 0);
+
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+ assert_se(n_rtqueue == 1);
+ assert_se(last_rtqueue_sigval == 2); /* first SIGRTMIN+3 */
+
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+ assert_se(n_rtqueue == 2);
+ assert_se(last_rtqueue_sigval == 4); /* second SIGRTMIN+3 */
+
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+ assert_se(n_rtqueue == 3);
+ assert_se(last_rtqueue_sigval == 3); /* first SIGUSR2 */
+
+ assert_se(sd_event_run(e, (uint64_t) -1) >= 1);
+ assert_se(n_rtqueue == 4);
+ assert_se(last_rtqueue_sigval == 1); /* SIGRTMIN+2 */
+
+ assert_se(sd_event_run(e, 0) == 0); /* the other SIGUSR2 is dropped, because the first one was still queued */
+ assert_se(n_rtqueue == 4);
+ assert_se(last_rtqueue_sigval == 1);
+
+ sd_event_source_unref(u);
+ sd_event_source_unref(v);
+ sd_event_source_unref(s);
+
+ sd_event_unref(e);
+}
+
+int main(int argc, char *argv[]) {
+
+ test_basic();
+ test_rtqueue();
return 0;
}
diff --git a/src/libsystemd/sd-login/sd-login.c b/src/libsystemd/sd-login/sd-login.c
index 7d6a4b78cf..55da26e9d9 100644
--- a/src/libsystemd/sd-login/sd-login.c
+++ b/src/libsystemd/sd-login/sd-login.c
@@ -35,6 +35,16 @@
#include "hostname-util.h"
#include "sd-login.h"
+/* Error codes:
+ *
+ * invalid input parameters → -EINVAL
+ * invalid fd → -EBADF
+ * process does not exist → -ESRCH
+ * cgroup does not exist → -ENOENT
+ * machine, session does not exist → -ENXIO
+ * requested metadata on object is missing → -ENODATA
+ */
+
_public_ int sd_pid_get_session(pid_t pid, char **session) {
assert_return(pid >= 0, -EINVAL);
@@ -91,6 +101,32 @@ _public_ int sd_pid_get_owner_uid(pid_t pid, uid_t *uid) {
return cg_pid_get_owner_uid(pid, uid);
}
+_public_ int sd_pid_get_cgroup(pid_t pid, char **cgroup) {
+ char *c;
+ int r;
+
+ assert_return(pid >= 0, -EINVAL);
+ assert_return(cgroup, -EINVAL);
+
+ r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &c);
+ if (r < 0)
+ return r;
+
+ /* The internal APIs return the empty string for the root
+ * cgroup, let's return the "/" in the public APIs instead, as
+ * that's easier and less ambigious for people to grok. */
+ if (isempty(c)) {
+ free(c);
+ c = strdup("/");
+ if (!c)
+ return -ENOMEM;
+
+ }
+
+ *cgroup = c;
+ return 0;
+}
+
_public_ int sd_peer_get_session(int fd, char **session) {
struct ucred ucred = {};
int r;
@@ -189,7 +225,23 @@ _public_ int sd_peer_get_user_slice(int fd, char **slice) {
return cg_pid_get_user_slice(ucred.pid, slice);
}
+_public_ int sd_peer_get_cgroup(int fd, char **cgroup) {
+ struct ucred ucred;
+ int r;
+
+ assert_return(fd >= 0, -EBADF);
+ assert_return(cgroup, -EINVAL);
+
+ r = getpeercred(fd, &ucred);
+ if (r < 0)
+ return r;
+
+ return sd_pid_get_cgroup(ucred.pid, cgroup);
+}
+
static int file_of_uid(uid_t uid, char **p) {
+
+ assert_return(uid_is_valid(uid), -EINVAL);
assert(p);
if (asprintf(p, "/run/systemd/users/" UID_FMT, uid) < 0)
@@ -216,11 +268,15 @@ _public_ int sd_uid_get_state(uid_t uid, char**state) {
if (!s)
return -ENOMEM;
- } else if (r < 0) {
+ }
+ if (r < 0) {
free(s);
return r;
- } else if (!s)
+ }
+ if (isempty(s)) {
+ free(s);
return -EIO;
+ }
*state = s;
return 0;
@@ -238,12 +294,11 @@ _public_ int sd_uid_get_display(uid_t uid, char **session) {
r = parse_env_file(p, NEWLINE, "DISPLAY", &s, NULL);
if (r == -ENOENT)
- return -ENXIO;
+ return -ENODATA;
if (r < 0)
return r;
-
if (isempty(s))
- return -ENXIO;
+ return -ENODATA;
*session = s;
s = NULL;
@@ -251,35 +306,63 @@ _public_ int sd_uid_get_display(uid_t uid, char **session) {
return 0;
}
+static int file_of_seat(const char *seat, char **_p) {
+ char *p;
+ int r;
+
+ assert(_p);
+
+ if (seat) {
+ if (!filename_is_valid(seat))
+ return -EINVAL;
+
+ p = strappend("/run/systemd/seats/", seat);
+ } else {
+ _cleanup_free_ char *buf = NULL;
+
+ r = sd_session_get_seat(NULL, &buf);
+ if (r < 0)
+ return r;
+
+ p = strappend("/run/systemd/seats/", buf);
+ }
+
+ if (!p)
+ return -ENOMEM;
+
+ *_p = p;
+ p = NULL;
+ return 0;
+}
+
_public_ int sd_uid_is_on_seat(uid_t uid, int require_active, const char *seat) {
_cleanup_free_ char *t = NULL, *s = NULL, *p = NULL;
size_t l;
int r;
const char *word, *variable, *state;
- assert_return(seat, -EINVAL);
+ assert_return(uid_is_valid(uid), -EINVAL);
- variable = require_active ? "ACTIVE_UID" : "UIDS";
+ r = file_of_seat(seat, &p);
+ if (r < 0)
+ return r;
- p = strappend("/run/systemd/seats/", seat);
- if (!p)
- return -ENOMEM;
+ variable = require_active ? "ACTIVE_UID" : "UIDS";
r = parse_env_file(p, NEWLINE, variable, &s, NULL);
-
+ if (r == -ENOENT)
+ return 0;
if (r < 0)
return r;
-
- if (!s)
- return -EIO;
+ if (isempty(s))
+ return 0;
if (asprintf(&t, UID_FMT, uid) < 0)
return -ENOMEM;
- FOREACH_WORD(word, l, s, state) {
+ FOREACH_WORD(word, l, s, state)
if (strneq(t, word, l))
return 1;
- }
return 0;
}
@@ -289,31 +372,22 @@ static int uid_get_array(uid_t uid, const char *variable, char ***array) {
char **a;
int r;
+ assert(variable);
+
r = file_of_uid(uid, &p);
if (r < 0)
return r;
- r = parse_env_file(p, NEWLINE,
- variable, &s,
- NULL);
- if (r < 0) {
- if (r == -ENOENT) {
- if (array)
- *array = NULL;
- return 0;
- }
-
- return r;
- }
-
- if (!s) {
+ r = parse_env_file(p, NEWLINE, variable, &s, NULL);
+ if (r == -ENOENT || (r >= 0 && isempty(s))) {
if (array)
*array = NULL;
return 0;
}
+ if (r < 0)
+ return r;
a = strv_split(s, " ");
-
if (!a)
return -ENOMEM;
@@ -375,37 +449,39 @@ static int file_of_session(const char *session, char **_p) {
}
_public_ int sd_session_is_active(const char *session) {
- int r;
_cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
r = file_of_session(session, &p);
if (r < 0)
return r;
r = parse_env_file(p, NEWLINE, "ACTIVE", &s, NULL);
+ if (r == -ENOENT)
+ return -ENXIO;
if (r < 0)
return r;
-
- if (!s)
+ if (isempty(s))
return -EIO;
return parse_boolean(s);
}
_public_ int sd_session_is_remote(const char *session) {
- int r;
_cleanup_free_ char *p = NULL, *s = NULL;
+ int r;
r = file_of_session(session, &p);
if (r < 0)
return r;
r = parse_env_file(p, NEWLINE, "REMOTE", &s, NULL);
+ if (r == -ENOENT)
+ return -ENXIO;
if (r < 0)
return r;
-
- if (!s)
- return -EIO;
+ if (isempty(s))
+ return -ENODATA;
return parse_boolean(s);
}
@@ -421,9 +497,11 @@ _public_ int sd_session_get_state(const char *session, char **state) {
return r;
r = parse_env_file(p, NEWLINE, "STATE", &s, NULL);
+ if (r == -ENOENT)
+ return -ENXIO;
if (r < 0)
return r;
- else if (!s)
+ if (isempty(s))
return -EIO;
*state = s;
@@ -443,10 +521,11 @@ _public_ int sd_session_get_uid(const char *session, uid_t *uid) {
return r;
r = parse_env_file(p, NEWLINE, "UID", &s, NULL);
+ if (r == -ENOENT)
+ return -ENXIO;
if (r < 0)
return r;
-
- if (!s)
+ if (isempty(s))
return -EIO;
return parse_uid(s, uid);
@@ -457,17 +536,19 @@ static int session_get_string(const char *session, const char *field, char **val
int r;
assert_return(value, -EINVAL);
+ assert(field);
r = file_of_session(session, &p);
if (r < 0)
return r;
r = parse_env_file(p, NEWLINE, field, &s, NULL);
+ if (r == -ENOENT)
+ return -ENXIO;
if (r < 0)
return r;
-
if (isempty(s))
- return -ENXIO;
+ return -ENODATA;
*value = s;
s = NULL;
@@ -487,6 +568,8 @@ _public_ int sd_session_get_vt(const char *session, unsigned *vtnr) {
unsigned u;
int r;
+ assert_return(vtnr, -EINVAL);
+
r = session_get_string(session, "VTNR", &vtnr_string);
if (r < 0)
return r;
@@ -542,32 +625,6 @@ _public_ int sd_session_get_remote_host(const char *session, char **remote_host)
return session_get_string(session, "REMOTE_HOST", remote_host);
}
-static int file_of_seat(const char *seat, char **_p) {
- char *p;
- int r;
-
- assert(_p);
-
- if (seat)
- p = strappend("/run/systemd/seats/", seat);
- else {
- _cleanup_free_ char *buf = NULL;
-
- r = sd_session_get_seat(NULL, &buf);
- if (r < 0)
- return r;
-
- p = strappend("/run/systemd/seats/", buf);
- }
-
- if (!p)
- return -ENOMEM;
-
- *_p = p;
- p = NULL;
- return 0;
-}
-
_public_ int sd_seat_get_active(const char *seat, char **session, uid_t *uid) {
_cleanup_free_ char *p = NULL, *s = NULL, *t = NULL;
int r;
@@ -582,6 +639,8 @@ _public_ int sd_seat_get_active(const char *seat, char **session, uid_t *uid) {
"ACTIVE", &s,
"ACTIVE_UID", &t,
NULL);
+ if (r == -ENOENT)
+ return -ENXIO;
if (r < 0)
return r;
@@ -620,7 +679,8 @@ _public_ int sd_seat_get_sessions(const char *seat, char ***sessions, uid_t **ui
"SESSIONS", &s,
"ACTIVE_SESSIONS", &t,
NULL);
-
+ if (r == -ENOENT)
+ return -ENXIO;
if (r < 0)
return r;
@@ -652,7 +712,6 @@ _public_ int sd_seat_get_sessions(const char *seat, char ***sessions, uid_t **ui
return -ENOMEM;
r = parse_uid(k, b + i);
-
if (r < 0)
continue;
@@ -683,7 +742,7 @@ static int seat_get_can(const char *seat, const char *variable) {
_cleanup_free_ char *p = NULL, *s = NULL;
int r;
- assert_return(variable, -EINVAL);
+ assert(variable);
r = file_of_seat(seat, &p);
if (r < 0)
@@ -692,10 +751,12 @@ static int seat_get_can(const char *seat, const char *variable) {
r = parse_env_file(p, NEWLINE,
variable, &s,
NULL);
+ if (r == -ENOENT)
+ return -ENXIO;
if (r < 0)
return r;
- if (!s)
- return 0;
+ if (isempty(s))
+ return -ENODATA;
return parse_boolean(s);
}
@@ -819,6 +880,8 @@ _public_ int sd_machine_get_class(const char *machine, char **class) {
p = strjoina("/run/systemd/machines/", machine);
r = parse_env_file(p, NEWLINE, "CLASS", &c, NULL);
+ if (r == -ENOENT)
+ return -ENXIO;
if (r < 0)
return r;
if (!c)
@@ -842,6 +905,8 @@ _public_ int sd_machine_get_ifindices(const char *machine, int **ifindices) {
p = strjoina("/run/systemd/machines/", machine);
r = parse_env_file(p, NEWLINE, "NETIF", &netif, NULL);
+ if (r == -ENOENT)
+ return -ENXIO;
if (r < 0)
return r;
if (!netif) {
diff --git a/src/libsystemd/sd-login/test-login.c b/src/libsystemd/sd-login/test-login.c
index ddea7ffa14..f734ce9eee 100644
--- a/src/libsystemd/sd-login/test-login.c
+++ b/src/libsystemd/sd-login/test-login.c
@@ -33,7 +33,7 @@ static void test_login(void) {
_cleanup_free_ char *pp = NULL, *qq = NULL;
int r, k;
uid_t u, u2;
- char *seat, *type, *class, *display, *remote_user, *remote_host, *display_session;
+ char *seat, *type, *class, *display, *remote_user, *remote_host, *display_session, *cgroup;
char *session;
char *state;
char *session2;
@@ -50,9 +50,13 @@ static void test_login(void) {
assert_se(sd_pid_get_owner_uid(0, &u2) == 0);
printf("user = "UID_FMT"\n", u2);
+ assert_se(sd_pid_get_cgroup(0, &cgroup) == 0);
+ printf("cgroup = %s\n", cgroup);
+ free(cgroup);
+
display_session = NULL;
r = sd_uid_get_display(u2, &display_session);
- assert_se(r >= 0 || r == -ENXIO);
+ assert_se(r >= 0 || r == -ENODATA);
printf("user's display session = %s\n", strna(display_session));
free(display_session);
@@ -108,19 +112,19 @@ static void test_login(void) {
display = NULL;
r = sd_session_get_display(session, &display);
- assert_se(r >= 0 || r == -ENXIO);
+ assert_se(r >= 0 || r == -ENODATA);
printf("display = %s\n", strna(display));
free(display);
remote_user = NULL;
r = sd_session_get_remote_user(session, &remote_user);
- assert_se(r >= 0 || r == -ENXIO);
+ assert_se(r >= 0 || r == -ENODATA);
printf("remote_user = %s\n", strna(remote_user));
free(remote_user);
remote_host = NULL;
r = sd_session_get_remote_host(session, &remote_host);
- assert_se(r >= 0 || r == -ENXIO);
+ assert_se(r >= 0 || r == -ENODATA);
printf("remote_host = %s\n", strna(remote_host));
free(remote_host);
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index a56960506c..1c64c3e771 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -4737,6 +4737,7 @@ static int create_subcgroup(pid_t pid) {
_cleanup_free_ char *cgroup = NULL;
const char *child;
int unified, r;
+ CGroupMask supported;
/* In the unified hierarchy inner nodes may only only contain
* subgroups, but not processes. Hence, if we running in the
@@ -4756,6 +4757,10 @@ static int create_subcgroup(pid_t pid) {
if (unified == 0)
return 0;
+ r = cg_mask_supported(&supported);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine supported controllers: %m");
+
r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
if (r < 0)
return log_error_errno(r, "Failed to get our control group: %m");
@@ -4770,6 +4775,8 @@ static int create_subcgroup(pid_t pid) {
if (r < 0)
return log_error_errno(r, "Failed to create %s subcgroup: %m", child);
+ /* Try to enable as many controllers as possible for the new payload. */
+ (void) cg_enable_everywhere(supported, supported, cgroup);
return 0;
}
diff --git a/src/systemd/sd-login.h b/src/systemd/sd-login.h
index 9260396d5d..59c6eedcda 100644
--- a/src/systemd/sd-login.h
+++ b/src/systemd/sd-login.h
@@ -81,34 +81,42 @@ int sd_pid_get_user_slice(pid_t pid, char **slice);
* container. This will return an error for non-machine processes. */
int sd_pid_get_machine_name(pid_t pid, char **machine);
-/* Similar to sd_pid_get_session(), but retrieves data about peer of
- * connected AF_UNIX socket */
+/* Get the control group from a PID, relative to the root of the
+ * hierarchy. */
+int sd_pid_get_cgroup(pid_t pid, char **cgroup);
+
+/* Similar to sd_pid_get_session(), but retrieves data about the peer
+ * of a connected AF_UNIX socket */
int sd_peer_get_session(int fd, char **session);
-/* Similar to sd_pid_get_owner_uid(), but retrieves data about peer of
- * connected AF_UNIX socket */
+/* Similar to sd_pid_get_owner_uid(), but retrieves data about the peer of
+ * a connected AF_UNIX socket */
int sd_peer_get_owner_uid(int fd, uid_t *uid);
-/* Similar to sd_pid_get_unit(), but retrieves data about peer of
- * connected AF_UNIX socket */
+/* Similar to sd_pid_get_unit(), but retrieves data about the peer of
+ * a connected AF_UNIX socket */
int sd_peer_get_unit(int fd, char **unit);
-/* Similar to sd_pid_get_user_unit(), but retrieves data about peer of
- * connected AF_UNIX socket */
+/* Similar to sd_pid_get_user_unit(), but retrieves data about the peer of
+ * a connected AF_UNIX socket */
int sd_peer_get_user_unit(int fd, char **unit);
-/* Similar to sd_pid_get_slice(), but retrieves data about peer of
- * connected AF_UNIX socket */
+/* Similar to sd_pid_get_slice(), but retrieves data about the peer of
+ * a connected AF_UNIX socket */
int sd_peer_get_slice(int fd, char **slice);
-/* Similar to sd_pid_get_user_slice(), but retrieves data about peer of
- * connected AF_UNIX socket */
+/* Similar to sd_pid_get_user_slice(), but retrieves data about the peer of
+ * a connected AF_UNIX socket */
int sd_peer_get_user_slice(int fd, char **slice);
-/* Similar to sd_pid_get_machine_name(), but retrieves data about peer
- * of connected AF_UNIX socket */
+/* Similar to sd_pid_get_machine_name(), but retrieves data about the
+ * peer of a a connected AF_UNIX socket */
int sd_peer_get_machine_name(int fd, char **machine);
+/* Similar to sd_pid_get_cgroup(), but retrieves data about the peer
+ * of a connected AF_UNIX socket. */
+int sd_peer_get_cgroup(pid_t pid, char **cgroup);
+
/* Get state from UID. Possible states: offline, lingering, online, active, closing */
int sd_uid_get_state(uid_t uid, char **state);
diff --git a/src/test/test-cgroup-util.c b/src/test/test-cgroup-util.c
index ecc9d70bf4..ff7e45901c 100644
--- a/src/test/test-cgroup-util.c
+++ b/src/test/test-cgroup-util.c
@@ -295,6 +295,17 @@ static void test_shift_path(void) {
test_shift_path_one("/foobar/waldo", "/fuckfuck", "/foobar/waldo");
}
+static void test_mask_supported(void) {
+
+ CGroupMask m;
+ CGroupController c;
+
+ assert_se(cg_mask_supported(&m) >= 0);
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++)
+ printf("'%s' is supported: %s\n", cgroup_controller_to_string(c), yes_no(m & CGROUP_CONTROLLER_TO_MASK(c)));
+}
+
int main(void) {
test_path_decode_unit();
test_path_get_unit();
@@ -309,6 +320,7 @@ int main(void) {
test_controller_is_valid();
test_slice_to_path();
test_shift_path();
+ test_mask_supported();
return 0;
}
diff --git a/src/test/test-util.c b/src/test/test-util.c
index dff38ab6f6..8ceb71f22a 100644
--- a/src/test/test-util.c
+++ b/src/test/test-util.c
@@ -270,6 +270,9 @@ static void test_parse_pid(void) {
r = parse_pid("0xFFFFFFFFFFFFFFFFF", &pid);
assert_se(r == -ERANGE);
assert_se(pid == 65);
+
+ r = parse_pid("junk", &pid);
+ assert_se(r == -EINVAL);
}
static void test_parse_uid(void) {
@@ -282,6 +285,9 @@ static void test_parse_uid(void) {
r = parse_uid("65535", &uid);
assert_se(r == -ENXIO);
+
+ r = parse_uid("asdsdas", &uid);
+ assert_se(r == -EINVAL);
}
static void test_safe_atou16(void) {