summaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorEvgeny Vereshchagin <evvers@ya.ru>2016-05-07 19:17:44 +0300
committerEvgeny Vereshchagin <evvers@ya.ru>2016-05-07 19:17:44 +0300
commit5ab42bc85a11a5250dcdf8e86291d3da90aa84bd (patch)
tree1493f86c5031484546ab98d242303101d3b43017 /src/core
parentd2cc96a8e134e8166acb917f67b3e8b308e09370 (diff)
parent23be5709e10b3b88a9908f3005351ccba9e5d48b (diff)
Merge pull request #3191 from poettering/cgroups-agent-dgram
core: use an AF_UNIX/SOCK_DGRAM socket for cgroup agent notification
Diffstat (limited to 'src/core')
-rw-r--r--src/core/cgroup.c6
-rw-r--r--src/core/dbus.c75
-rw-r--r--src/core/dbus.h2
-rw-r--r--src/core/execute.c2
-rw-r--r--src/core/manager.c164
-rw-r--r--src/core/manager.h3
6 files changed, 190 insertions, 62 deletions
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 25cc6962f9..1a94b188cb 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -1312,7 +1312,9 @@ int manager_setup_cgroup(Manager *m) {
if (r < 0)
return log_error_errno(r, "Failed to watch control group inotify object: %m");
- r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_IDLE - 5);
+ /* Process cgroup empty notifications early, but after service notifications and SIGCHLD. Also
+ * see handling of cgroup agent notifications, for the classic cgroup hierarchy support. */
+ r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_NORMAL-5);
if (r < 0)
return log_error_errno(r, "Failed to set priority of inotify event source: %m");
@@ -1458,6 +1460,8 @@ int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
assert(m);
assert(cgroup);
+ log_debug("Got cgroup empty notification for: %s", cgroup);
+
u = manager_get_unit_by_cgroup(m, cgroup);
if (!u)
return 0;
diff --git a/src/core/dbus.c b/src/core/dbus.c
index 263955d874..3422a02d68 100644
--- a/src/core/dbus.c
+++ b/src/core/dbus.c
@@ -71,28 +71,42 @@ int bus_send_queued_message(Manager *m) {
return 0;
}
+int bus_forward_agent_released(Manager *m, const char *path) {
+ int r;
+
+ assert(m);
+ assert(path);
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return 0;
+
+ if (!m->system_bus)
+ return 0;
+
+ /* If we are running a system instance we forward the agent message on the system bus, so that the user
+ * instances get notified about this, too */
+
+ r = sd_bus_emit_signal(m->system_bus,
+ "/org/freedesktop/systemd1/agent",
+ "org.freedesktop.systemd1.Agent",
+ "Released",
+ "s", path);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to propagate agent release message: %m");
+
+ return 1;
+}
+
static int signal_agent_released(sd_bus_message *message, void *userdata, sd_bus_error *error) {
_cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
- const char *cgroup, *me;
Manager *m = userdata;
+ const char *cgroup;
uid_t sender_uid;
- sd_bus *bus;
int r;
assert(message);
assert(m);
- /* ignore recursive events sent by us on the system/user bus */
- bus = sd_bus_message_get_bus(message);
- if (!sd_bus_is_server(bus)) {
- r = sd_bus_get_unique_name(bus, &me);
- if (r < 0)
- return r;
-
- if (streq_ptr(sd_bus_message_get_sender(message), me))
- return 0;
- }
-
/* only accept org.freedesktop.systemd1.Agent from UID=0 */
r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID, &creds);
if (r < 0)
@@ -110,16 +124,6 @@ static int signal_agent_released(sd_bus_message *message, void *userdata, sd_bus
}
manager_notify_cgroup_empty(m, cgroup);
-
- /* if running as system-instance, forward under our name */
- if (MANAGER_IS_SYSTEM(m) && m->system_bus) {
- r = sd_bus_message_rewind(message, 1);
- if (r >= 0)
- r = sd_bus_send(m->system_bus, message, NULL);
- if (r < 0)
- log_warning_errno(r, "Failed to forward Released message: %m");
- }
-
return 0;
}
@@ -690,25 +694,6 @@ static int bus_on_connection(sd_event_source *s, int fd, uint32_t revents, void
return 0;
}
- if (MANAGER_IS_SYSTEM(m)) {
- /* When we run as system instance we get the Released
- * signal via a direct connection */
-
- r = sd_bus_add_match(
- bus,
- NULL,
- "type='signal',"
- "interface='org.freedesktop.systemd1.Agent',"
- "member='Released',"
- "path='/org/freedesktop/systemd1/agent'",
- signal_agent_released, m);
-
- if (r < 0) {
- log_warning_errno(r, "Failed to register Released match on new connection bus: %m");
- return 0;
- }
- }
-
r = bus_setup_disconnected_match(m, bus);
if (r < 0)
return 0;
@@ -906,8 +891,8 @@ static int bus_setup_system(Manager *m, sd_bus *bus) {
assert(m);
assert(bus);
- /* On kdbus or if we are a user instance we get the Released message via the system bus */
- if (MANAGER_IS_USER(m) || m->kdbus_fd >= 0) {
+ /* if we are a user instance we get the Released message via the system bus */
+ if (MANAGER_IS_USER(m)) {
r = sd_bus_add_match(
bus,
NULL,
@@ -990,7 +975,7 @@ static int bus_init_private(Manager *m) {
return 0;
strcpy(sa.un.sun_path, "/run/systemd/private");
- salen = offsetof(union sockaddr_union, un.sun_path) + strlen("/run/systemd/private");
+ salen = SOCKADDR_UN_LEN(sa.un);
} else {
size_t left = sizeof(sa.un.sun_path);
char *p = sa.un.sun_path;
diff --git a/src/core/dbus.h b/src/core/dbus.h
index e16a84fbb8..6baaffbd75 100644
--- a/src/core/dbus.h
+++ b/src/core/dbus.h
@@ -40,3 +40,5 @@ int bus_verify_manage_units_async(Manager *m, sd_bus_message *call, sd_bus_error
int bus_verify_manage_unit_files_async(Manager *m, sd_bus_message *call, sd_bus_error *error);
int bus_verify_reload_daemon_async(Manager *m, sd_bus_message *call, sd_bus_error *error);
int bus_verify_set_environment_async(Manager *m, sd_bus_message *call, sd_bus_error *error);
+
+int bus_forward_agent_released(Manager *m, const char *path);
diff --git a/src/core/execute.c b/src/core/execute.c
index ac2ac39892..5eb3f13695 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -271,7 +271,7 @@ static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
}
}
- r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
+ r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
if (r < 0)
r = -errno;
diff --git a/src/core/manager.c b/src/core/manager.c
index bd00c224f4..e192cd475d 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -87,6 +87,7 @@
#include "watchdog.h"
#define NOTIFY_RCVBUF_SIZE (8*1024*1024)
+#define CGROUPS_AGENT_RCVBUF_SIZE (8*1024*1024)
/* Initial delay and the interval for printing status messages about running jobs */
#define JOBS_IN_PROGRESS_WAIT_USEC (5*USEC_PER_SEC)
@@ -94,6 +95,7 @@
#define JOBS_IN_PROGRESS_PERIOD_DIVISOR 3
static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
@@ -484,11 +486,11 @@ static int manager_setup_signals(Manager *m) {
(void) sd_event_source_set_description(m->signal_event_source, "manager-signal");
- /* Process signals a bit earlier than the rest of things, but
- * later than notify_fd processing, so that the notify
- * processing can still figure out to which process/service a
- * message belongs, before we reap the process. */
- r = sd_event_source_set_priority(m->signal_event_source, SD_EVENT_PRIORITY_NORMAL-5);
+ /* Process signals a bit earlier than the rest of things, but later than notify_fd processing, so that the
+ * notify processing can still figure out to which process/service a message belongs, before we reap the
+ * process. Also, process this before handling cgroup notifications, so that we always collect child exit
+ * status information before detecting that there's no process in a cgroup. */
+ r = sd_event_source_set_priority(m->signal_event_source, SD_EVENT_PRIORITY_NORMAL-6);
if (r < 0)
return r;
@@ -581,12 +583,12 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) {
m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1;
- m->pin_cgroupfs_fd = m->notify_fd = m->signal_fd = m->time_change_fd =
- m->dev_autofs_fd = m->private_listen_fd = m->kdbus_fd = m->cgroup_inotify_fd = -1;
+ m->pin_cgroupfs_fd = m->notify_fd = m->cgroups_agent_fd = m->signal_fd = m->time_change_fd =
+ m->dev_autofs_fd = m->private_listen_fd = m->kdbus_fd = m->cgroup_inotify_fd =
+ m->ask_password_inotify_fd = -1;
m->current_job_id = 1; /* start as id #1, so that we can leave #0 around as "null-like" value */
- m->ask_password_inotify_fd = -1;
m->have_ask_password = -EINVAL; /* we don't know */
m->first_boot = -1;
@@ -703,7 +705,7 @@ static int manager_setup_notify(Manager *m) {
(void) unlink(m->notify_socket);
strncpy(sa.un.sun_path, m->notify_socket, sizeof(sa.un.sun_path)-1);
- r = bind(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
+ r = bind(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
if (r < 0)
return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
@@ -722,8 +724,8 @@ static int manager_setup_notify(Manager *m) {
if (r < 0)
return log_error_errno(r, "Failed to allocate notify event source: %m");
- /* Process signals a bit earlier than SIGCHLD, so that we can
- * still identify to which service an exit message belongs */
+ /* Process notification messages a bit earlier than SIGCHLD, so that we can still identify to which
+ * service an exit message belongs. */
r = sd_event_source_set_priority(m->notify_event_source, SD_EVENT_PRIORITY_NORMAL-7);
if (r < 0)
return log_error_errno(r, "Failed to set priority of notify event source: %m");
@@ -734,6 +736,79 @@ static int manager_setup_notify(Manager *m) {
return 0;
}
+static int manager_setup_cgroups_agent(Manager *m) {
+
+ static const union sockaddr_union sa = {
+ .un.sun_family = AF_UNIX,
+ .un.sun_path = "/run/systemd/cgroups-agent",
+ };
+ int r;
+
+ /* This creates a listening socket we receive cgroups agent messages on. We do not use D-Bus for delivering
+ * these messages from the cgroups agent binary to PID 1, as the cgroups agent binary is very short-living, and
+ * each instance of it needs a new D-Bus connection. Since D-Bus connections are SOCK_STREAM/AF_UNIX, on
+ * overloaded systems the backlog of the D-Bus socket becomes relevant, as not more than the configured number
+ * of D-Bus connections may be queued until the kernel will start dropping further incoming connections,
+ * possibly resulting in lost cgroups agent messages. To avoid this, we'll use a private SOCK_DGRAM/AF_UNIX
+ * socket, where no backlog is relevant as communication may take place without an actual connect() cycle, and
+ * we thus won't lose messages.
+ *
+ * Note that PID 1 will forward the agent message to system bus, so that the user systemd instance may listen
+ * to it. The system instance hence listens on this special socket, but the user instances listen on the system
+ * bus for these messages. */
+
+ if (m->test_run)
+ return 0;
+
+ if (!MANAGER_IS_SYSTEM(m))
+ return 0;
+
+ if (cg_unified() > 0) /* We don't need this anymore on the unified hierarchy */
+ return 0;
+
+ if (m->cgroups_agent_fd < 0) {
+ _cleanup_close_ int fd = -1;
+
+ /* First free all secondary fields */
+ m->cgroups_agent_event_source = sd_event_source_unref(m->cgroups_agent_event_source);
+
+ fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to allocate cgroups agent socket: %m");
+
+ fd_inc_rcvbuf(fd, CGROUPS_AGENT_RCVBUF_SIZE);
+
+ (void) unlink(sa.un.sun_path);
+
+ /* Only allow root to connect to this socket */
+ RUN_WITH_UMASK(0077)
+ r = bind(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
+ if (r < 0)
+ return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
+
+ m->cgroups_agent_fd = fd;
+ fd = -1;
+ }
+
+ if (!m->cgroups_agent_event_source) {
+ r = sd_event_add_io(m->event, &m->cgroups_agent_event_source, m->cgroups_agent_fd, EPOLLIN, manager_dispatch_cgroups_agent_fd, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate cgroups agent event source: %m");
+
+ /* Process cgroups notifications early, but after having processed service notification messages or
+ * SIGCHLD signals, so that a cgroup running empty is always just the last safety net of notification,
+ * and we collected the metadata the notification and SIGCHLD stuff offers first. Also see handling of
+ * cgroup inotify for the unified cgroup stuff. */
+ r = sd_event_source_set_priority(m->cgroups_agent_event_source, SD_EVENT_PRIORITY_NORMAL-5);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set priority of cgroups agent event source: %m");
+
+ (void) sd_event_source_set_description(m->cgroups_agent_event_source, "manager-cgroups-agent");
+ }
+
+ return 0;
+}
+
static int manager_setup_kdbus(Manager *m) {
_cleanup_free_ char *p = NULL;
@@ -944,12 +1019,14 @@ Manager* manager_free(Manager *m) {
sd_event_source_unref(m->signal_event_source);
sd_event_source_unref(m->notify_event_source);
+ sd_event_source_unref(m->cgroups_agent_event_source);
sd_event_source_unref(m->time_change_event_source);
sd_event_source_unref(m->jobs_in_progress_event_source);
sd_event_source_unref(m->run_queue_event_source);
safe_close(m->signal_fd);
safe_close(m->notify_fd);
+ safe_close(m->cgroups_agent_fd);
safe_close(m->time_change_fd);
safe_close(m->kdbus_fd);
@@ -1142,6 +1219,10 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
if (q < 0 && r == 0)
r = q;
+ q = manager_setup_cgroups_agent(m);
+ if (q < 0 && r == 0)
+ r = q;
+
/* We might have deserialized the kdbus control fd, but if we
* didn't, then let's create the bus now. */
manager_setup_kdbus(m);
@@ -1479,6 +1560,35 @@ static unsigned manager_dispatch_dbus_queue(Manager *m) {
return n;
}
+static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ Manager *m = userdata;
+ char buf[PATH_MAX+1];
+ ssize_t n;
+
+ n = recv(fd, buf, sizeof(buf), 0);
+ if (n < 0)
+ return log_error_errno(errno, "Failed to read cgroups agent message: %m");
+ if (n == 0) {
+ log_error("Got zero-length cgroups agent message, ignoring.");
+ return 0;
+ }
+ if ((size_t) n >= sizeof(buf)) {
+ log_error("Got overly long cgroups agent message, ignoring.");
+ return 0;
+ }
+
+ if (memchr(buf, 0, n)) {
+ log_error("Got cgroups agent message with embedded NUL byte, ignoring.");
+ return 0;
+ }
+ buf[n] = 0;
+
+ manager_notify_cgroup_empty(m, buf);
+ bus_forward_agent_released(m, buf);
+
+ return 0;
+}
+
static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, const char *buf, size_t n, FDSet *fds) {
_cleanup_strv_free_ char **tags = NULL;
@@ -2135,11 +2245,10 @@ void manager_send_unit_audit(Manager *m, Unit *u, int type, bool success) {
}
void manager_send_unit_plymouth(Manager *m, Unit *u) {
- union sockaddr_union sa = PLYMOUTH_SOCKET;
-
- int n = 0;
+ static const union sockaddr_union sa = PLYMOUTH_SOCKET;
_cleanup_free_ char *message = NULL;
_cleanup_close_ int fd = -1;
+ int n = 0;
/* Don't generate plymouth events if the service was already
* started and we're just deserializing */
@@ -2165,7 +2274,7 @@ void manager_send_unit_plymouth(Manager *m, Unit *u) {
return;
}
- if (connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + 1 + strlen(sa.un.sun_path+1)) < 0) {
+ if (connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) {
if (!IN_SET(errno, EPIPE, EAGAIN, ENOENT, ECONNREFUSED, ECONNRESET, ECONNABORTED))
log_error_errno(errno, "connect() failed: %m");
@@ -2265,6 +2374,16 @@ int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) {
fprintf(f, "notify-socket=%s\n", m->notify_socket);
}
+ if (m->cgroups_agent_fd >= 0) {
+ int copy;
+
+ copy = fdset_put_dup(fds, m->cgroups_agent_fd);
+ if (copy < 0)
+ return copy;
+
+ fprintf(f, "cgroups-agent-fd=%i\n", copy);
+ }
+
if (m->kdbus_fd >= 0) {
int copy;
@@ -2432,6 +2551,17 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
free(m->notify_socket);
m->notify_socket = n;
+ } else if (startswith(l, "cgroups-agent-fd=")) {
+ int fd;
+
+ if (safe_atoi(l + 17, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd))
+ log_debug("Failed to parse cgroups agent fd: %s", l + 10);
+ else {
+ m->cgroups_agent_event_source = sd_event_source_unref(m->cgroups_agent_event_source);
+ safe_close(m->cgroups_agent_fd);
+ m->cgroups_agent_fd = fdset_remove(fds, fd);
+ }
+
} else if (startswith(l, "kdbus-fd=")) {
int fd;
@@ -2552,6 +2682,10 @@ int manager_reload(Manager *m) {
if (q < 0 && r >= 0)
r = q;
+ q = manager_setup_cgroups_agent(m);
+ if (q < 0 && r >= 0)
+ r = q;
+
/* Third, fire things up! */
manager_coldplug(m);
diff --git a/src/core/manager.h b/src/core/manager.h
index 17f84e6963..4bccca75cb 100644
--- a/src/core/manager.h
+++ b/src/core/manager.h
@@ -132,6 +132,9 @@ struct Manager {
int notify_fd;
sd_event_source *notify_event_source;
+ int cgroups_agent_fd;
+ sd_event_source *cgroups_agent_event_source;
+
int signal_fd;
sd_event_source *signal_event_source;