diff options
author | Evgeny Vereshchagin <evvers@ya.ru> | 2016-05-07 19:17:44 +0300 |
---|---|---|
committer | Evgeny Vereshchagin <evvers@ya.ru> | 2016-05-07 19:17:44 +0300 |
commit | 5ab42bc85a11a5250dcdf8e86291d3da90aa84bd (patch) | |
tree | 1493f86c5031484546ab98d242303101d3b43017 /src/core | |
parent | d2cc96a8e134e8166acb917f67b3e8b308e09370 (diff) | |
parent | 23be5709e10b3b88a9908f3005351ccba9e5d48b (diff) |
Merge pull request #3191 from poettering/cgroups-agent-dgram
core: use an AF_UNIX/SOCK_DGRAM socket for cgroup agent notification
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/cgroup.c | 6 | ||||
-rw-r--r-- | src/core/dbus.c | 75 | ||||
-rw-r--r-- | src/core/dbus.h | 2 | ||||
-rw-r--r-- | src/core/execute.c | 2 | ||||
-rw-r--r-- | src/core/manager.c | 164 | ||||
-rw-r--r-- | src/core/manager.h | 3 |
6 files changed, 190 insertions, 62 deletions
diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 25cc6962f9..1a94b188cb 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -1312,7 +1312,9 @@ int manager_setup_cgroup(Manager *m) { if (r < 0) return log_error_errno(r, "Failed to watch control group inotify object: %m"); - r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_IDLE - 5); + /* Process cgroup empty notifications early, but after service notifications and SIGCHLD. Also + * see handling of cgroup agent notifications, for the classic cgroup hierarchy support. */ + r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_NORMAL-5); if (r < 0) return log_error_errno(r, "Failed to set priority of inotify event source: %m"); @@ -1458,6 +1460,8 @@ int manager_notify_cgroup_empty(Manager *m, const char *cgroup) { assert(m); assert(cgroup); + log_debug("Got cgroup empty notification for: %s", cgroup); + u = manager_get_unit_by_cgroup(m, cgroup); if (!u) return 0; diff --git a/src/core/dbus.c b/src/core/dbus.c index 263955d874..3422a02d68 100644 --- a/src/core/dbus.c +++ b/src/core/dbus.c @@ -71,28 +71,42 @@ int bus_send_queued_message(Manager *m) { return 0; } +int bus_forward_agent_released(Manager *m, const char *path) { + int r; + + assert(m); + assert(path); + + if (!MANAGER_IS_SYSTEM(m)) + return 0; + + if (!m->system_bus) + return 0; + + /* If we are running a system instance we forward the agent message on the system bus, so that the user + * instances get notified about this, too */ + + r = sd_bus_emit_signal(m->system_bus, + "/org/freedesktop/systemd1/agent", + "org.freedesktop.systemd1.Agent", + "Released", + "s", path); + if (r < 0) + return log_warning_errno(r, "Failed to propagate agent release message: %m"); + + return 1; +} + static int signal_agent_released(sd_bus_message *message, void *userdata, sd_bus_error *error) { _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL; - const char *cgroup, *me; Manager *m = userdata; + const char *cgroup; uid_t sender_uid; - sd_bus *bus; int r; assert(message); assert(m); - /* ignore recursive events sent by us on the system/user bus */ - bus = sd_bus_message_get_bus(message); - if (!sd_bus_is_server(bus)) { - r = sd_bus_get_unique_name(bus, &me); - if (r < 0) - return r; - - if (streq_ptr(sd_bus_message_get_sender(message), me)) - return 0; - } - /* only accept org.freedesktop.systemd1.Agent from UID=0 */ r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID, &creds); if (r < 0) @@ -110,16 +124,6 @@ static int signal_agent_released(sd_bus_message *message, void *userdata, sd_bus } manager_notify_cgroup_empty(m, cgroup); - - /* if running as system-instance, forward under our name */ - if (MANAGER_IS_SYSTEM(m) && m->system_bus) { - r = sd_bus_message_rewind(message, 1); - if (r >= 0) - r = sd_bus_send(m->system_bus, message, NULL); - if (r < 0) - log_warning_errno(r, "Failed to forward Released message: %m"); - } - return 0; } @@ -690,25 +694,6 @@ static int bus_on_connection(sd_event_source *s, int fd, uint32_t revents, void return 0; } - if (MANAGER_IS_SYSTEM(m)) { - /* When we run as system instance we get the Released - * signal via a direct connection */ - - r = sd_bus_add_match( - bus, - NULL, - "type='signal'," - "interface='org.freedesktop.systemd1.Agent'," - "member='Released'," - "path='/org/freedesktop/systemd1/agent'", - signal_agent_released, m); - - if (r < 0) { - log_warning_errno(r, "Failed to register Released match on new connection bus: %m"); - return 0; - } - } - r = bus_setup_disconnected_match(m, bus); if (r < 0) return 0; @@ -906,8 +891,8 @@ static int bus_setup_system(Manager *m, sd_bus *bus) { assert(m); assert(bus); - /* On kdbus or if we are a user instance we get the Released message via the system bus */ - if (MANAGER_IS_USER(m) || m->kdbus_fd >= 0) { + /* if we are a user instance we get the Released message via the system bus */ + if (MANAGER_IS_USER(m)) { r = sd_bus_add_match( bus, NULL, @@ -990,7 +975,7 @@ static int bus_init_private(Manager *m) { return 0; strcpy(sa.un.sun_path, "/run/systemd/private"); - salen = offsetof(union sockaddr_union, un.sun_path) + strlen("/run/systemd/private"); + salen = SOCKADDR_UN_LEN(sa.un); } else { size_t left = sizeof(sa.un.sun_path); char *p = sa.un.sun_path; diff --git a/src/core/dbus.h b/src/core/dbus.h index e16a84fbb8..6baaffbd75 100644 --- a/src/core/dbus.h +++ b/src/core/dbus.h @@ -40,3 +40,5 @@ int bus_verify_manage_units_async(Manager *m, sd_bus_message *call, sd_bus_error int bus_verify_manage_unit_files_async(Manager *m, sd_bus_message *call, sd_bus_error *error); int bus_verify_reload_daemon_async(Manager *m, sd_bus_message *call, sd_bus_error *error); int bus_verify_set_environment_async(Manager *m, sd_bus_message *call, sd_bus_error *error); + +int bus_forward_agent_released(Manager *m, const char *path); diff --git a/src/core/execute.c b/src/core/execute.c index ac2ac39892..5eb3f13695 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -271,7 +271,7 @@ static int connect_journal_socket(int fd, uid_t uid, gid_t gid) { } } - r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path)); + r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)); if (r < 0) r = -errno; diff --git a/src/core/manager.c b/src/core/manager.c index bd00c224f4..e192cd475d 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -87,6 +87,7 @@ #include "watchdog.h" #define NOTIFY_RCVBUF_SIZE (8*1024*1024) +#define CGROUPS_AGENT_RCVBUF_SIZE (8*1024*1024) /* Initial delay and the interval for printing status messages about running jobs */ #define JOBS_IN_PROGRESS_WAIT_USEC (5*USEC_PER_SEC) @@ -94,6 +95,7 @@ #define JOBS_IN_PROGRESS_PERIOD_DIVISOR 3 static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); +static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); @@ -484,11 +486,11 @@ static int manager_setup_signals(Manager *m) { (void) sd_event_source_set_description(m->signal_event_source, "manager-signal"); - /* Process signals a bit earlier than the rest of things, but - * later than notify_fd processing, so that the notify - * processing can still figure out to which process/service a - * message belongs, before we reap the process. */ - r = sd_event_source_set_priority(m->signal_event_source, SD_EVENT_PRIORITY_NORMAL-5); + /* Process signals a bit earlier than the rest of things, but later than notify_fd processing, so that the + * notify processing can still figure out to which process/service a message belongs, before we reap the + * process. Also, process this before handling cgroup notifications, so that we always collect child exit + * status information before detecting that there's no process in a cgroup. */ + r = sd_event_source_set_priority(m->signal_event_source, SD_EVENT_PRIORITY_NORMAL-6); if (r < 0) return r; @@ -581,12 +583,12 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) { m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1; - m->pin_cgroupfs_fd = m->notify_fd = m->signal_fd = m->time_change_fd = - m->dev_autofs_fd = m->private_listen_fd = m->kdbus_fd = m->cgroup_inotify_fd = -1; + m->pin_cgroupfs_fd = m->notify_fd = m->cgroups_agent_fd = m->signal_fd = m->time_change_fd = + m->dev_autofs_fd = m->private_listen_fd = m->kdbus_fd = m->cgroup_inotify_fd = + m->ask_password_inotify_fd = -1; m->current_job_id = 1; /* start as id #1, so that we can leave #0 around as "null-like" value */ - m->ask_password_inotify_fd = -1; m->have_ask_password = -EINVAL; /* we don't know */ m->first_boot = -1; @@ -703,7 +705,7 @@ static int manager_setup_notify(Manager *m) { (void) unlink(m->notify_socket); strncpy(sa.un.sun_path, m->notify_socket, sizeof(sa.un.sun_path)-1); - r = bind(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path)); + r = bind(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)); if (r < 0) return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path); @@ -722,8 +724,8 @@ static int manager_setup_notify(Manager *m) { if (r < 0) return log_error_errno(r, "Failed to allocate notify event source: %m"); - /* Process signals a bit earlier than SIGCHLD, so that we can - * still identify to which service an exit message belongs */ + /* Process notification messages a bit earlier than SIGCHLD, so that we can still identify to which + * service an exit message belongs. */ r = sd_event_source_set_priority(m->notify_event_source, SD_EVENT_PRIORITY_NORMAL-7); if (r < 0) return log_error_errno(r, "Failed to set priority of notify event source: %m"); @@ -734,6 +736,79 @@ static int manager_setup_notify(Manager *m) { return 0; } +static int manager_setup_cgroups_agent(Manager *m) { + + static const union sockaddr_union sa = { + .un.sun_family = AF_UNIX, + .un.sun_path = "/run/systemd/cgroups-agent", + }; + int r; + + /* This creates a listening socket we receive cgroups agent messages on. We do not use D-Bus for delivering + * these messages from the cgroups agent binary to PID 1, as the cgroups agent binary is very short-living, and + * each instance of it needs a new D-Bus connection. Since D-Bus connections are SOCK_STREAM/AF_UNIX, on + * overloaded systems the backlog of the D-Bus socket becomes relevant, as not more than the configured number + * of D-Bus connections may be queued until the kernel will start dropping further incoming connections, + * possibly resulting in lost cgroups agent messages. To avoid this, we'll use a private SOCK_DGRAM/AF_UNIX + * socket, where no backlog is relevant as communication may take place without an actual connect() cycle, and + * we thus won't lose messages. + * + * Note that PID 1 will forward the agent message to system bus, so that the user systemd instance may listen + * to it. The system instance hence listens on this special socket, but the user instances listen on the system + * bus for these messages. */ + + if (m->test_run) + return 0; + + if (!MANAGER_IS_SYSTEM(m)) + return 0; + + if (cg_unified() > 0) /* We don't need this anymore on the unified hierarchy */ + return 0; + + if (m->cgroups_agent_fd < 0) { + _cleanup_close_ int fd = -1; + + /* First free all secondary fields */ + m->cgroups_agent_event_source = sd_event_source_unref(m->cgroups_agent_event_source); + + fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0); + if (fd < 0) + return log_error_errno(errno, "Failed to allocate cgroups agent socket: %m"); + + fd_inc_rcvbuf(fd, CGROUPS_AGENT_RCVBUF_SIZE); + + (void) unlink(sa.un.sun_path); + + /* Only allow root to connect to this socket */ + RUN_WITH_UMASK(0077) + r = bind(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)); + if (r < 0) + return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path); + + m->cgroups_agent_fd = fd; + fd = -1; + } + + if (!m->cgroups_agent_event_source) { + r = sd_event_add_io(m->event, &m->cgroups_agent_event_source, m->cgroups_agent_fd, EPOLLIN, manager_dispatch_cgroups_agent_fd, m); + if (r < 0) + return log_error_errno(r, "Failed to allocate cgroups agent event source: %m"); + + /* Process cgroups notifications early, but after having processed service notification messages or + * SIGCHLD signals, so that a cgroup running empty is always just the last safety net of notification, + * and we collected the metadata the notification and SIGCHLD stuff offers first. Also see handling of + * cgroup inotify for the unified cgroup stuff. */ + r = sd_event_source_set_priority(m->cgroups_agent_event_source, SD_EVENT_PRIORITY_NORMAL-5); + if (r < 0) + return log_error_errno(r, "Failed to set priority of cgroups agent event source: %m"); + + (void) sd_event_source_set_description(m->cgroups_agent_event_source, "manager-cgroups-agent"); + } + + return 0; +} + static int manager_setup_kdbus(Manager *m) { _cleanup_free_ char *p = NULL; @@ -944,12 +1019,14 @@ Manager* manager_free(Manager *m) { sd_event_source_unref(m->signal_event_source); sd_event_source_unref(m->notify_event_source); + sd_event_source_unref(m->cgroups_agent_event_source); sd_event_source_unref(m->time_change_event_source); sd_event_source_unref(m->jobs_in_progress_event_source); sd_event_source_unref(m->run_queue_event_source); safe_close(m->signal_fd); safe_close(m->notify_fd); + safe_close(m->cgroups_agent_fd); safe_close(m->time_change_fd); safe_close(m->kdbus_fd); @@ -1142,6 +1219,10 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) { if (q < 0 && r == 0) r = q; + q = manager_setup_cgroups_agent(m); + if (q < 0 && r == 0) + r = q; + /* We might have deserialized the kdbus control fd, but if we * didn't, then let's create the bus now. */ manager_setup_kdbus(m); @@ -1479,6 +1560,35 @@ static unsigned manager_dispatch_dbus_queue(Manager *m) { return n; } +static int manager_dispatch_cgroups_agent_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) { + Manager *m = userdata; + char buf[PATH_MAX+1]; + ssize_t n; + + n = recv(fd, buf, sizeof(buf), 0); + if (n < 0) + return log_error_errno(errno, "Failed to read cgroups agent message: %m"); + if (n == 0) { + log_error("Got zero-length cgroups agent message, ignoring."); + return 0; + } + if ((size_t) n >= sizeof(buf)) { + log_error("Got overly long cgroups agent message, ignoring."); + return 0; + } + + if (memchr(buf, 0, n)) { + log_error("Got cgroups agent message with embedded NUL byte, ignoring."); + return 0; + } + buf[n] = 0; + + manager_notify_cgroup_empty(m, buf); + bus_forward_agent_released(m, buf); + + return 0; +} + static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, const char *buf, size_t n, FDSet *fds) { _cleanup_strv_free_ char **tags = NULL; @@ -2135,11 +2245,10 @@ void manager_send_unit_audit(Manager *m, Unit *u, int type, bool success) { } void manager_send_unit_plymouth(Manager *m, Unit *u) { - union sockaddr_union sa = PLYMOUTH_SOCKET; - - int n = 0; + static const union sockaddr_union sa = PLYMOUTH_SOCKET; _cleanup_free_ char *message = NULL; _cleanup_close_ int fd = -1; + int n = 0; /* Don't generate plymouth events if the service was already * started and we're just deserializing */ @@ -2165,7 +2274,7 @@ void manager_send_unit_plymouth(Manager *m, Unit *u) { return; } - if (connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + 1 + strlen(sa.un.sun_path+1)) < 0) { + if (connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0) { if (!IN_SET(errno, EPIPE, EAGAIN, ENOENT, ECONNREFUSED, ECONNRESET, ECONNABORTED)) log_error_errno(errno, "connect() failed: %m"); @@ -2265,6 +2374,16 @@ int manager_serialize(Manager *m, FILE *f, FDSet *fds, bool switching_root) { fprintf(f, "notify-socket=%s\n", m->notify_socket); } + if (m->cgroups_agent_fd >= 0) { + int copy; + + copy = fdset_put_dup(fds, m->cgroups_agent_fd); + if (copy < 0) + return copy; + + fprintf(f, "cgroups-agent-fd=%i\n", copy); + } + if (m->kdbus_fd >= 0) { int copy; @@ -2432,6 +2551,17 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { free(m->notify_socket); m->notify_socket = n; + } else if (startswith(l, "cgroups-agent-fd=")) { + int fd; + + if (safe_atoi(l + 17, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd)) + log_debug("Failed to parse cgroups agent fd: %s", l + 10); + else { + m->cgroups_agent_event_source = sd_event_source_unref(m->cgroups_agent_event_source); + safe_close(m->cgroups_agent_fd); + m->cgroups_agent_fd = fdset_remove(fds, fd); + } + } else if (startswith(l, "kdbus-fd=")) { int fd; @@ -2552,6 +2682,10 @@ int manager_reload(Manager *m) { if (q < 0 && r >= 0) r = q; + q = manager_setup_cgroups_agent(m); + if (q < 0 && r >= 0) + r = q; + /* Third, fire things up! */ manager_coldplug(m); diff --git a/src/core/manager.h b/src/core/manager.h index 17f84e6963..4bccca75cb 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -132,6 +132,9 @@ struct Manager { int notify_fd; sd_event_source *notify_event_source; + int cgroups_agent_fd; + sd_event_source *cgroups_agent_event_source; + int signal_fd; sd_event_source *signal_event_source; |