diff options
-rw-r--r-- | Makefile-man.am | 31 | ||||
-rw-r--r-- | man/sd_listen_fds.xml | 21 | ||||
-rw-r--r-- | man/sd_notify.xml | 122 | ||||
-rw-r--r-- | man/systemd.service.xml | 29 | ||||
-rw-r--r-- | src/core/dbus-service.c | 1 | ||||
-rw-r--r-- | src/core/load-fragment-gperf.gperf.m4 | 1 | ||||
-rw-r--r-- | src/core/manager.c | 78 | ||||
-rw-r--r-- | src/core/service.c | 166 | ||||
-rw-r--r-- | src/core/service.h | 14 | ||||
-rw-r--r-- | src/core/unit.c | 21 | ||||
-rw-r--r-- | src/core/unit.h | 6 | ||||
-rw-r--r-- | src/libsystemd/libsystemd.sym.m4 | 5 | ||||
-rw-r--r-- | src/libsystemd/sd-daemon/sd-daemon.c | 94 | ||||
-rw-r--r-- | src/shared/fdset.c | 54 | ||||
-rw-r--r-- | src/shared/fdset.h | 11 | ||||
-rw-r--r-- | src/shared/util.c | 25 | ||||
-rw-r--r-- | src/shared/util.h | 2 | ||||
-rw-r--r-- | src/systemd/sd-daemon.h | 13 |
18 files changed, 594 insertions, 100 deletions
diff --git a/Makefile-man.am b/Makefile-man.am index 68e7483e70..8dc8febcf6 100644 --- a/Makefile-man.am +++ b/Makefile-man.am @@ -198,6 +198,9 @@ MANPAGES_ALIAS += \ man/sd_journal_wait.3 \ man/sd_machine_get_ifindices.3 \ man/sd_notifyf.3 \ + man/sd_pid_notify.3 \ + man/sd_pid_notify_with_fds.3 \ + man/sd_pid_notifyf.3 \ man/sleep.conf.d.5 \ man/system.conf.d.5 \ man/systemd-ask-password-console.path.8 \ @@ -308,6 +311,9 @@ man/sd_journal_test_cursor.3: man/sd_journal_get_cursor.3 man/sd_journal_wait.3: man/sd_journal_get_fd.3 man/sd_machine_get_ifindices.3: man/sd_machine_get_class.3 man/sd_notifyf.3: man/sd_notify.3 +man/sd_pid_notify.3: man/sd_notify.3 +man/sd_pid_notify_with_fds.3: man/sd_notify.3 +man/sd_pid_notifyf.3: man/sd_notify.3 man/sleep.conf.d.5: man/systemd-sleep.conf.5 man/system.conf.d.5: man/systemd-system.conf.5 man/systemd-ask-password-console.path.8: man/systemd-ask-password-console.service.8 @@ -566,6 +572,15 @@ man/sd_machine_get_ifindices.html: man/sd_machine_get_class.html man/sd_notifyf.html: man/sd_notify.html $(html-alias) +man/sd_pid_notify.html: man/sd_notify.html + $(html-alias) + +man/sd_pid_notify_with_fds.html: man/sd_notify.html + $(html-alias) + +man/sd_pid_notifyf.html: man/sd_notify.html + $(html-alias) + man/sleep.conf.d.html: man/systemd-sleep.conf.html $(html-alias) @@ -674,12 +689,6 @@ man/systemd-user.conf.html: man/systemd-system.conf.html man/user.conf.d.html: man/systemd-system.conf.html $(html-alias) -if ENABLE_HWDB -MANPAGES += \ - man/hwdb.7 \ - man/systemd-hwdb.8 - -endif if ENABLE_BACKLIGHT MANPAGES += \ @@ -750,6 +759,16 @@ man/systemd-hostnamed.html: man/systemd-hostnamed.service.html endif +if ENABLE_HWDB +MANPAGES += \ + man/hwdb.7 \ + man/systemd-hwdb.8 +MANPAGES_ALIAS += \ + # + + +endif + if ENABLE_KDBUS MANPAGES += \ man/sd_bus_creds_get_pid.3 \ diff --git a/man/sd_listen_fds.xml b/man/sd_listen_fds.xml index 6999db9804..4377745634 100644 --- a/man/sd_listen_fds.xml +++ b/man/sd_listen_fds.xml @@ -73,7 +73,7 @@ <para>If the <parameter>unset_environment</parameter> parameter is non-zero, <function>sd_listen_fds()</function> will unset the - <varname>$LISTEN_FDS</varname>/<varname>$LISTEN_PID</varname> + <varname>$LISTEN_FDS</varname> and <varname>$LISTEN_PID</varname> environment variables before returning (regardless of whether the function call itself succeeded or not). Further calls to @@ -83,10 +83,11 @@ <para>If a daemon receives more than one file descriptor, they will be passed in the same order as - configured in the systemd socket definition - file. Nonetheless, it is recommended to verify the - correct socket types before using them. To simplify - this checking, the functions + configured in the systemd socket unit file (see + <citerefentry><refentrytitle>systemd.socket</refentrytitle><manvolnum>5</manvolnum></citerefentry> + for details). Nonetheless, it is recommended to verify + the correct socket types before using them. To + simplify this checking, the functions <citerefentry><refentrytitle>sd_is_fifo</refentrytitle><manvolnum>3</manvolnum></citerefentry>, <citerefentry><refentrytitle>sd_is_socket</refentrytitle><manvolnum>3</manvolnum></citerefentry>, <citerefentry><refentrytitle>sd_is_socket_inet</refentrytitle><manvolnum>3</manvolnum></citerefentry>, @@ -103,6 +104,16 @@ <para>This function call will set the FD_CLOEXEC flag for all passed file descriptors to avoid further inheritance to children of the calling process.</para> + + <para>If multiple socket units activate the same + service the order of the file descriptors passed to + its main process is undefined. If additional file + descriptors have been passed to the service manager + using + <citerefentry><refentrytitle>sd_pid_notify_with_fds</refentrytitle><manvolnum>3</manvolnum></citerefentry>'s + <literal>FDSTORE=1</literal> messages, these file + descriptors are passed last, in arbitrary order, and + with duplicates removed.</para> </refsect1> <refsect1> diff --git a/man/sd_notify.xml b/man/sd_notify.xml index 35f6f71ab3..2bf3383c0d 100644 --- a/man/sd_notify.xml +++ b/man/sd_notify.xml @@ -46,6 +46,9 @@ <refnamediv> <refname>sd_notify</refname> <refname>sd_notifyf</refname> + <refname>sd_pid_notify</refname> + <refname>sd_pid_notifyf</refname> + <refname>sd_pid_notify_with_fds</refname> <refpurpose>Notify service manager about start-up completion and other service status changes</refpurpose> </refnamediv> @@ -65,6 +68,30 @@ <paramdef>const char *<parameter>format</parameter></paramdef> <paramdef>...</paramdef> </funcprototype> + + <funcprototype> + <funcdef>int <function>sd_pid_notify</function></funcdef> + <paramdef>pid_t <parameter>pid</parameter></paramdef> + <paramdef>int <parameter>unset_environment</parameter></paramdef> + <paramdef>const char *<parameter>state</parameter></paramdef> + </funcprototype> + + <funcprototype> + <funcdef>int <function>sd_pid_notifyf</function></funcdef> + <paramdef>pid_t <parameter>pid</parameter></paramdef> + <paramdef>int <parameter>unset_environment</parameter></paramdef> + <paramdef>const char *<parameter>format</parameter></paramdef> + <paramdef>...</paramdef> + </funcprototype> + + <funcprototype> + <funcdef>int <function>sd_pid_notify_with_fds</function></funcdef> + <paramdef>pid_t <parameter>pid</parameter></paramdef> + <paramdef>int <parameter>unset_environment</parameter></paramdef> + <paramdef>const char *<parameter>state</parameter></paramdef> + <paramdef>const int *<parameter>fds</parameter></paramdef> + <paramdef>unsigned <parameter>n_fds</parameter></paramdef> + </funcprototype> </funcsynopsis> </refsynopsisdiv> @@ -175,7 +202,7 @@ <varlistentry> <term>MAINPID=...</term> - <listitem><para>The main pid of the + <listitem><para>The main process ID (PID) of the service, in case the service manager did not fork off the process itself. Example: @@ -185,7 +212,7 @@ <varlistentry> <term>WATCHDOG=1</term> - <listitem><para>Tells systemd to + <listitem><para>Tells the service manager to update the watchdog timestamp. This is the keep-alive ping that services need to issue in regular intervals if @@ -199,12 +226,53 @@ check if the the watchdog is enabled. </para></listitem> </varlistentry> + + + <varlistentry> + <term>FDSTORE=1</term> + + <listitem><para>Stores additional file + descriptors in the service + manager. File descriptors sent this + way will be maintained per-service by + the service manager and be passed + again using the usual file descriptor + passing logic on the next invocation + of the service (see + <citerefentry><refentrytitle>sd_listen_fds</refentrytitle><manvolnum>3</manvolnum></citerefentry>). This + is useful for implementing service + restart schemes where services + serialize their state to + <filename>/run</filename>, push their + file descriptors to the system + manager, and are then restarted, + retrieving their state again via + socket passing and + <filename>/run</filename>. Note that + the service manager will accept + messages for a service only if + <varname>FileDescriptorStoreMax=</varname> + is set to non-zero for it (defaults to + zero). See + <citerefentry><refentrytitle>systemd.service</refentrytitle><manvolnum>5</manvolnum></citerefentry> + for details. Multiple arrays of file + descriptors may be sent in seperate + messages, in which case the arrays are + combined. Note that the service + manager removes duplicate file + descriptors before passing them to the + service. Use + <function>sd_pid_notify_with_fds()</function> + to send messages with + <literal>FDSTORE=1</literal>, see + below.</para></listitem> + </varlistentry> + </variablelist> <para>It is recommended to prefix variable names that - are not shown in the list above with - <varname>X_</varname> to avoid namespace - clashes.</para> + are not listed above with <varname>X_</varname> to + avoid namespace clashes.</para> <para>Note that systemd will accept status data sent from a service only if the @@ -217,6 +285,36 @@ <function>sd_notify()</function> but takes a <function>printf()</function>-like format string plus arguments.</para> + + <para><function>sd_pid_notify()</function> and + <function>sd_pid_notifyf()</function> are similar to + <function>sd_notify()</function> and + <function>sd_notifyf()</function> but take a process + ID (PID) to use as originating PID for the message as + first argument. This is useful to send notification + messages on behalf of other processes, provided the + appropriate privileges are available. If the PID + argument is specified as 0 the process ID of the + calling process is used, in which case the calls are + fully equivalent to <function>sd_notify()</function> + and <function>sd_notifyf()</function>.</para> + + <para><function>sd_pid_notify_with_fds()</function> is + similar to <function>sd_pid_notify()</function> but + takes an additional array of file descriptors. These + file descriptors are sent along the notification + message to the service manager. This is particularly + useful for sending <literal>FDSTORE=1</literal> + messages, as described above. The additional arguments + are a pointer to the file descriptor array plus the + number of file descriptors in the array. If the number + of file descriptors is passed as 0, the call is fully + equivalent to <function>sd_pid_notify()</function>, + i.e. no file descriptors are passed. Note that sending + file descriptors to the service manager on messages + that do not expect them (i.e. without + <literal>FDSTORE=1</literal>) they are immediately + closed on reception.</para> </refsect1> <refsect1> @@ -295,13 +393,25 @@ <example> <title>Error Cause Notification</title> - <para>A service could send the following shortly before exiting, on failure</para> + <para>A service could send the following shortly before exiting, on failure:</para> <programlisting>sd_notifyf(0, "STATUS=Failed to start up: %s\n" "ERRNO=%i", strerror(errno), errno);</programlisting> </example> + + <example> + <title>Store a File Descriptor in the Service Manager</title> + + <para>To store an open file descriptor in the + service manager, in order to continue + operation after a service restart without + losing state use + <literal>FDSTORE=1</literal>:</para> + + <programlisting>sd_pid_notify_with_fds(0, 0, "FDSTORE=1", &fd, 1);</programlisting> + </example> </refsect1> <refsect1> diff --git a/man/systemd.service.xml b/man/systemd.service.xml index 0b68aa0890..4c890dfb7b 100644 --- a/man/systemd.service.xml +++ b/man/systemd.service.xml @@ -1117,6 +1117,35 @@ command.</para></listitem> </varlistentry> + <varlistentry> + <term><varname>FileDescriptorStoreMax=</varname></term> + <listitem><para>Configure how many + file descriptors may be stored in the + service manager for the service using + <citerefentry><refentrytitle>sd_pid_notify_with_fds</refentrytitle><manvolnum>3</manvolnum></citerefentry>'s + <literal>FDSTORE=1</literal> + messages. This is useful for + implementing service restart schemes + where the state is serialized to + <filename>/run</filename> and the file + descriptors passed to the service + manager, to allow restarts without + losing state. Defaults to 0, i.e. no + file descriptors may be stored in the + service manager by default. All file + descriptors passed to the service + manager from a specific service are + passed back to the service's main + process on the next service + restart. Any file descriptors passed + to the service manager are + automatically closed when POLLHUP or + POLLERR is seen on them, or when the + service is fully stopped and no job + queued or being executed for + it.</para></listitem> + </varlistentry> + </variablelist> <para>Check diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c index 2b50ac93d8..6d4713babc 100644 --- a/src/core/dbus-service.c +++ b/src/core/dbus-service.c @@ -59,6 +59,7 @@ const sd_bus_vtable bus_service_vtable[] = { SD_BUS_PROPERTY("MainPID", "u", bus_property_get_pid, offsetof(Service, main_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Service, control_pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("BusName", "s", NULL, offsetof(Service, bus_name), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("FileDescriptorStoreMax", "u", NULL, offsetof(Service, n_fd_store_max), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("StatusText", "s", NULL, offsetof(Service, status_text), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("StatusErrno", "i", NULL, offsetof(Service, status_errno), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Service, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index e0ffaa605a..9e87d91e77 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -226,6 +226,7 @@ Service.SuccessExitStatus, config_parse_set_status, 0, Service.SysVStartPriority, config_parse_warn_compat, DISABLED_LEGACY, 0 Service.NonBlocking, config_parse_bool, 0, offsetof(Service, exec_context.non_blocking) Service.BusName, config_parse_unit_string_printf, 0, offsetof(Service, bus_name) +Service.FileDescriptorStoreMax, config_parse_unsigned, 0, offsetof(Service, n_fd_store_max) Service.NotifyAccess, config_parse_notify_access, 0, offsetof(Service, notify_access) Service.Sockets, config_parse_service_sockets, 0, 0 m4_ifdef(`ENABLE_KDBUS', diff --git a/src/core/manager.c b/src/core/manager.c index 519b374382..c18312a369 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -84,6 +84,9 @@ #define JOBS_IN_PROGRESS_PERIOD_USEC (USEC_PER_SEC / 3) #define JOBS_IN_PROGRESS_PERIOD_DIVISOR 3 +#define NOTIFY_FD_MAX 768 +#define NOTIFY_BUFFER_MAX PIPE_BUF + static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); @@ -1449,7 +1452,7 @@ static unsigned manager_dispatch_dbus_queue(Manager *m) { return n; } -static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, char *buf, size_t n) { +static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, char *buf, size_t n, FDSet *fds) { _cleanup_strv_free_ char **tags = NULL; assert(m); @@ -1466,12 +1469,13 @@ static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, char * log_unit_debug(u->id, "Got notification message for unit %s", u->id); if (UNIT_VTABLE(u)->notify_message) - UNIT_VTABLE(u)->notify_message(u, pid, tags); + UNIT_VTABLE(u)->notify_message(u, pid, tags, fds); } static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) { Manager *m = userdata; ssize_t n; + int r; assert(m); assert(m->notify_fd == fd); @@ -1482,73 +1486,101 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t } for (;;) { - char buf[4096]; + _cleanup_fdset_free_ FDSet *fds = NULL; + char buf[NOTIFY_BUFFER_MAX+1]; struct iovec iovec = { .iov_base = buf, .iov_len = sizeof(buf)-1, }; - bool found = false; - union { struct cmsghdr cmsghdr; - uint8_t buf[CMSG_SPACE(sizeof(struct ucred))]; + uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) + + CMSG_SPACE(sizeof(int) * NOTIFY_FD_MAX)]; } control = {}; - struct msghdr msghdr = { .msg_iov = &iovec, .msg_iovlen = 1, .msg_control = &control, .msg_controllen = sizeof(control), }; - struct ucred *ucred; + struct cmsghdr *cmsg; + struct ucred *ucred = NULL; + bool found = false; Unit *u1, *u2, *u3; + int *fd_array = NULL; + unsigned n_fds = 0; - n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT); - if (n <= 0) { - if (n == 0) - return -EIO; - + n = recvmsg(m->notify_fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC); + if (n < 0) { if (errno == EAGAIN || errno == EINTR) break; return -errno; } + if (n == 0) + return -ECONNRESET; + + for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) { + if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) { + + fd_array = (int*) CMSG_DATA(cmsg); + n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int); + + } else if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_CREDENTIALS && + cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) { - if (msghdr.msg_controllen < CMSG_LEN(sizeof(struct ucred)) || - control.cmsghdr.cmsg_level != SOL_SOCKET || - control.cmsghdr.cmsg_type != SCM_CREDENTIALS || - control.cmsghdr.cmsg_len != CMSG_LEN(sizeof(struct ucred))) { - log_warning("Received notify message without credentials. Ignoring."); + ucred = (struct ucred*) CMSG_DATA(cmsg); + } + } + + if (n_fds > 0) { + assert(fd_array); + + r = fdset_new_array(&fds, fd_array, n_fds); + if (r < 0) { + close_many(fd_array, n_fds); + return log_oom(); + } + } + + if (!ucred || ucred->pid <= 0) { + log_warning("Received notify message without valid credentials. Ignoring."); continue; } - ucred = (struct ucred*) CMSG_DATA(&control.cmsghdr); + if ((size_t) n >= sizeof(buf)) { + log_warning("Received notify message exceeded maximum size. Ignoring."); + continue; + } - assert((size_t) n < sizeof(buf)); buf[n] = 0; /* Notify every unit that might be interested, but try * to avoid notifying the same one multiple times. */ u1 = manager_get_unit_by_pid(m, ucred->pid); if (u1) { - manager_invoke_notify_message(m, u1, ucred->pid, buf, n); + manager_invoke_notify_message(m, u1, ucred->pid, buf, n, fds); found = true; } u2 = hashmap_get(m->watch_pids1, LONG_TO_PTR(ucred->pid)); if (u2 && u2 != u1) { - manager_invoke_notify_message(m, u2, ucred->pid, buf, n); + manager_invoke_notify_message(m, u2, ucred->pid, buf, n, fds); found = true; } u3 = hashmap_get(m->watch_pids2, LONG_TO_PTR(ucred->pid)); if (u3 && u3 != u2 && u3 != u1) { - manager_invoke_notify_message(m, u3, ucred->pid, buf, n); + manager_invoke_notify_message(m, u3, ucred->pid, buf, n, fds); found = true; } if (!found) log_warning("Cannot find unit for notify message of PID "PID_FMT".", ucred->pid); + + if (fdset_size(fds) > 0) + log_warning("Got auxiliary fds with notification message, closing all."); } return 0; diff --git a/src/core/service.c b/src/core/service.c index bfbe959edb..78232ee71c 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -242,6 +242,42 @@ static void service_reset_watchdog(Service *s) { service_start_watchdog(s); } +static void service_fd_store_unlink(ServiceFDStore *fs) { + + if (!fs) + return; + + if (fs->service) { + assert(fs->service->n_fd_store > 0); + LIST_REMOVE(fd_store, fs->service->fd_store, fs); + fs->service->n_fd_store--; + } + + if (fs->event_source) { + sd_event_source_set_enabled(fs->event_source, SD_EVENT_OFF); + sd_event_source_unref(fs->event_source); + } + + safe_close(fs->fd); + free(fs); +} + +static void service_release_resources(Unit *u) { + Service *s = SERVICE(u); + + assert(s); + + if (!s->fd_store) + return; + + log_debug("Releasing all resources for %s", u->id); + + while (s->fd_store) + service_fd_store_unlink(s->fd_store); + + assert(s->n_fd_store == 0); +} + static void service_done(Unit *u) { Service *s = SERVICE(u); @@ -286,6 +322,8 @@ static void service_done(Unit *u) { service_stop_watchdog(s); s->timer_event_source = sd_event_source_unref(s->timer_event_source); + + service_release_resources(u); } static int service_arm_timer(Service *s, usec_t usec) { @@ -549,6 +587,14 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) { if (s->status_text) fprintf(f, "%sStatus Text: %s\n", prefix, s->status_text); + + if (s->n_fd_store_max > 0) { + fprintf(f, + "%sFile Descriptor Store Max: %u\n" + "%sFile Descriptor Store Current: %u\n", + prefix, s->n_fd_store_max, + prefix, s->n_fd_store); + } } static int service_load_pid_file(Service *s, bool may_warn) { @@ -806,10 +852,10 @@ static int service_coldplug(Unit *u) { } static int service_collect_fds(Service *s, int **fds, unsigned *n_fds) { + _cleanup_free_ int *rfds = NULL; + unsigned rn_fds = 0; Iterator i; int r; - int *rfds = NULL; - unsigned rn_fds = 0; Unit *u; assert(s); @@ -831,10 +877,12 @@ static int service_collect_fds(Service *s, int **fds, unsigned *n_fds) { r = socket_collect_fds(sock, &cfds, &cn_fds); if (r < 0) - goto fail; + return r; - if (!cfds) + if (cn_fds <= 0) { + free(cfds); continue; + } if (!rfds) { rfds = cfds; @@ -842,32 +890,39 @@ static int service_collect_fds(Service *s, int **fds, unsigned *n_fds) { } else { int *t; - t = new(int, rn_fds+cn_fds); + t = realloc(rfds, (rn_fds + cn_fds) * sizeof(int)); if (!t) { free(cfds); - r = -ENOMEM; - goto fail; + return -ENOMEM; } - memcpy(t, rfds, rn_fds * sizeof(int)); - memcpy(t+rn_fds, cfds, cn_fds * sizeof(int)); - free(rfds); + memcpy(t + rn_fds, cfds, cn_fds * sizeof(int)); + rfds = t; + rn_fds += cn_fds; + free(cfds); - rfds = t; - rn_fds = rn_fds+cn_fds; } } + if (s->n_fd_store > 0) { + ServiceFDStore *fs; + int *t; + + t = realloc(rfds, (rn_fds + s->n_fd_store) * sizeof(int)); + if (!t) + return -ENOMEM; + + rfds = t; + LIST_FOREACH(fd_store, fs, s->fd_store) + rfds[rn_fds++] = fs->fd; + } + *fds = rfds; *n_fds = rn_fds; + rfds = NULL; return 0; - -fail: - free(rfds); - - return r; } static int service_spawn( @@ -2543,7 +2598,75 @@ static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void return 0; } -static void service_notify_message(Unit *u, pid_t pid, char **tags) { +static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *userdata) { + ServiceFDStore *fs = userdata; + + assert(e); + assert(fs); + + /* If we get either EPOLLHUP or EPOLLERR, it's time to remove this entry from the fd store */ + service_fd_store_unlink(fs); + return 0; +} + +static int service_add_fd_set(Service *s, FDSet *fds) { + int r; + + assert(s); + + if (fdset_size(fds) <= 0) + return 0; + + while (s->n_fd_store < s->n_fd_store_max) { + _cleanup_close_ int fd = -1; + ServiceFDStore *fs; + bool same = false; + + fd = fdset_steal_first(fds); + if (fd < 0) + break; + + LIST_FOREACH(fd_store, fs, s->fd_store) { + r = same_fd(fs->fd, fd); + if (r < 0) + return log_unit_error_errno(UNIT(s)->id, r, "%s: Couldn't check if same fd: %m", UNIT(s)->id); + if (r > 0) { + same = true; + break; + } + } + + if (same) + continue; + + fs = new0(ServiceFDStore, 1); + if (!fs) + return log_oom(); + + fs->fd = fd; + fs->service = s; + + r = sd_event_add_io(UNIT(s)->manager->event, &fs->event_source, fd, 0, on_fd_store_io, fs); + if (r < 0) { + free(fs); + return log_unit_error_errno(UNIT(s)->id, r, "%s: Failed to add even source: %m", UNIT(s)->id); + } + + LIST_PREPEND(fd_store, s->fd_store, fs); + s->n_fd_store++; + + fd = -1; + + log_unit_debug(UNIT(s)->id, "%s: added fd to fd store.", UNIT(s)->id); + } + + if (fdset_size(fds) > 0) + log_unit_warning(UNIT(s)->id, "%s: tried to store more fds than FDStoreMax=%u allows, closing remaining.", UNIT(s)->id, s->n_fd_store_max); + + return 0; +} + +static void service_notify_message(Unit *u, pid_t pid, char **tags, FDSet *fds) { Service *s = SERVICE(u); _cleanup_free_ char *cc = NULL; bool notify_dbus = false; @@ -2675,6 +2798,12 @@ static void service_notify_message(Unit *u, pid_t pid, char **tags) { service_reset_watchdog(s); } + /* Add the passed fds to the fd store */ + if (strv_find(tags, "FDSTORE=1")) { + log_unit_debug(u->id, "%s: got FDSTORE=1", u->id); + service_add_fd_set(s, fds); + } + /* Notify clients about changed status or main pid */ if (notify_dbus) unit_add_to_dbus_queue(u); @@ -2917,6 +3046,7 @@ const UnitVTable service_vtable = { .init = service_init, .done = service_done, .load = service_load, + .release_resources = service_release_resources, .coldplug = service_coldplug, diff --git a/src/core/service.h b/src/core/service.h index f6a78c403b..dfeee6a68c 100644 --- a/src/core/service.h +++ b/src/core/service.h @@ -22,6 +22,7 @@ ***/ typedef struct Service Service; +typedef struct ServiceFDStore ServiceFDStore; #include "unit.h" #include "path.h" @@ -115,6 +116,15 @@ typedef enum ServiceResult { _SERVICE_RESULT_INVALID = -1 } ServiceResult; +struct ServiceFDStore { + Service *service; + + int fd; + sd_event_source *event_source; + + LIST_FIELDS(ServiceFDStore, fd_store); +}; + struct Service { Unit meta; @@ -198,6 +208,10 @@ struct Service { NotifyAccess notify_access; NotifyState notify_state; + + ServiceFDStore *fd_store; + unsigned n_fd_store; + unsigned n_fd_store_max; }; extern const UnitVTable service_vtable; diff --git a/src/core/unit.c b/src/core/unit.c index 229bd0f73a..7311c5804d 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -278,21 +278,32 @@ int unit_set_description(Unit *u, const char *description) { } bool unit_check_gc(Unit *u) { + UnitActiveState state; assert(u); - if (UNIT_VTABLE(u)->no_gc) + if (u->job) return true; - if (u->no_gc) + if (u->nop_job) return true; - if (u->job) + state = unit_active_state(u); + + /* If the unit is inactive and failed and no job is queued for + * it, then release its runtime resources */ + if (UNIT_IS_INACTIVE_OR_FAILED(state) && + UNIT_VTABLE(u)->release_resources) + UNIT_VTABLE(u)->release_resources(u); + + /* But we keep the unit object around for longer when it is + * referenced or configured to not be gc'ed */ + if (state != UNIT_INACTIVE) return true; - if (u->nop_job) + if (UNIT_VTABLE(u)->no_gc) return true; - if (unit_active_state(u) != UNIT_INACTIVE) + if (u->no_gc) return true; if (u->refs) diff --git a/src/core/unit.h b/src/core/unit.h index 19fa2f0585..53b8a7f66b 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -345,6 +345,10 @@ struct UnitVTable { * way */ bool (*check_gc)(Unit *u); + /* When the unit is not running and no job for it queued we + * shall release its runtime resources */ + void (*release_resources)(Unit *u); + /* Return true when this unit is suitable for snapshotting */ bool (*check_snapshot)(Unit *u); @@ -359,7 +363,7 @@ struct UnitVTable { void (*notify_cgroup_empty)(Unit *u); /* Called whenever a process of this unit sends us a message */ - void (*notify_message)(Unit *u, pid_t pid, char **tags); + void (*notify_message)(Unit *u, pid_t pid, char **tags, FDSet *fds); /* Called whenever a name this Unit registered for comes or * goes away. */ diff --git a/src/libsystemd/libsystemd.sym.m4 b/src/libsystemd/libsystemd.sym.m4 index 80a61baab8..19a49f45da 100644 --- a/src/libsystemd/libsystemd.sym.m4 +++ b/src/libsystemd/libsystemd.sym.m4 @@ -158,6 +158,11 @@ global: sd_session_get_desktop; } LIBSYSTEMD_216; +LIBSYSTEMD_219 { +global: + sd_pid_notify_with_fds; +} LIBSYSTEMD_217; + m4_ifdef(`ENABLE_KDBUS', LIBSYSTEMD_FUTURE { global: diff --git a/src/libsystemd/sd-daemon/sd-daemon.c b/src/libsystemd/sd-daemon/sd-daemon.c index 1f2a53393f..028c2a7a5b 100644 --- a/src/libsystemd/sd-daemon/sd-daemon.c +++ b/src/libsystemd/sd-daemon/sd-daemon.c @@ -340,16 +340,28 @@ _public_ int sd_is_mq(int fd, const char *path) { return 1; } -_public_ int sd_pid_notify(pid_t pid, int unset_environment, const char *state) { - union sockaddr_union sockaddr = {}; - _cleanup_close_ int fd = -1; - struct msghdr msghdr = {}; - struct iovec iovec = {}; - const char *e; +_public_ int sd_pid_notify_with_fds(pid_t pid, int unset_environment, const char *state, const int *fds, unsigned n_fds) { + union sockaddr_union sockaddr = { + .sa.sa_family = AF_UNIX, + }; + struct iovec iovec = { + .iov_base = (char*) state, + }; + struct msghdr msghdr = { + .msg_iov = &iovec, + .msg_iovlen = 1, + .msg_name = &sockaddr, + }; union { struct cmsghdr cmsghdr; - uint8_t buf[CMSG_SPACE(sizeof(struct ucred))]; - } control = {}; + uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) + + CMSG_SPACE(sizeof(int) * n_fds)]; + } control; + _cleanup_close_ int fd = -1; + struct cmsghdr *cmsg = NULL; + const char *e; + size_t controllen_without_ucred = 0; + bool try_without_ucred = false; int r; if (!state) { @@ -357,6 +369,11 @@ _public_ int sd_pid_notify(pid_t pid, int unset_environment, const char *state) goto finish; } + if (n_fds > 0 && !fds) { + r = -EINVAL; + goto finish; + } + e = getenv("NOTIFY_SOCKET"); if (!e) return 0; @@ -373,42 +390,50 @@ _public_ int sd_pid_notify(pid_t pid, int unset_environment, const char *state) goto finish; } - sockaddr.sa.sa_family = AF_UNIX; - strncpy(sockaddr.un.sun_path, e, sizeof(sockaddr.un.sun_path)); + iovec.iov_len = strlen(state); + strncpy(sockaddr.un.sun_path, e, sizeof(sockaddr.un.sun_path)); if (sockaddr.un.sun_path[0] == '@') sockaddr.un.sun_path[0] = 0; - iovec.iov_base = (char*) state; - iovec.iov_len = strlen(state); - - msghdr.msg_name = &sockaddr; msghdr.msg_namelen = offsetof(struct sockaddr_un, sun_path) + strlen(e); - if (msghdr.msg_namelen > sizeof(struct sockaddr_un)) msghdr.msg_namelen = sizeof(struct sockaddr_un); - msghdr.msg_iov = &iovec; - msghdr.msg_iovlen = 1; + if (n_fds > 0) { + msghdr.msg_control = &control; + msghdr.msg_controllen = CMSG_LEN(sizeof(int) * n_fds); + + cmsg = CMSG_FIRSTHDR(&msghdr); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(int) * n_fds); + + memcpy(CMSG_DATA(cmsg), fds, sizeof(int) * n_fds); + } if (pid != 0 && pid != getpid()) { - struct cmsghdr *cmsg; - struct ucred ucred = {}; + struct ucred *ucred; + + try_without_ucred = true; + controllen_without_ucred = msghdr.msg_controllen; msghdr.msg_control = &control; - msghdr.msg_controllen = sizeof(control); + msghdr.msg_controllen += CMSG_LEN(sizeof(struct ucred)); + + if (cmsg) + cmsg = CMSG_NXTHDR(&msghdr, cmsg); + else + cmsg = CMSG_FIRSTHDR(&msghdr); - cmsg = CMSG_FIRSTHDR(&msghdr); cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_CREDENTIALS; cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred)); - ucred.pid = pid; - ucred.uid = getuid(); - ucred.gid = getgid(); - - memcpy(CMSG_DATA(cmsg), &ucred, sizeof(struct ucred)); - msghdr.msg_controllen = cmsg->cmsg_len; + ucred = (struct ucred*) CMSG_DATA(cmsg); + ucred->pid = pid; + ucred->uid = getuid(); + ucred->gid = getgid(); } /* First try with fake ucred data, as requested */ @@ -417,10 +442,11 @@ _public_ int sd_pid_notify(pid_t pid, int unset_environment, const char *state) goto finish; } - /* If that failed, try with our own instead */ - if (msghdr.msg_control) { - msghdr.msg_control = NULL; - msghdr.msg_controllen = 0; + /* If that failed, try with our own ucred instead */ + if (try_without_ucred) { + if (controllen_without_ucred <= 0) + msghdr.msg_control = NULL; + msghdr.msg_controllen = controllen_without_ucred; if (sendmsg(fd, &msghdr, MSG_NOSIGNAL) >= 0) { r = 1; @@ -437,8 +463,12 @@ finish: return r; } +_public_ int sd_pid_notify(pid_t pid, int unset_environment, const char *state) { + return sd_pid_notify_with_fds(pid, unset_environment, state, NULL, 0); +} + _public_ int sd_notify(int unset_environment, const char *state) { - return sd_pid_notify(0, unset_environment, state); + return sd_pid_notify_with_fds(0, unset_environment, state, NULL, 0); } _public_ int sd_pid_notifyf(pid_t pid, int unset_environment, const char *format, ...) { diff --git a/src/shared/fdset.c b/src/shared/fdset.c index 46f7773a9e..9e35ce5cec 100644 --- a/src/shared/fdset.c +++ b/src/shared/fdset.c @@ -41,7 +41,31 @@ FDSet *fdset_new(void) { return MAKE_FDSET(set_new(NULL)); } -void fdset_free(FDSet *s) { +int fdset_new_array(FDSet **ret, int *fds, unsigned n_fds) { + unsigned i; + FDSet *s; + int r; + + assert(ret); + + s = fdset_new(); + if (!s) + return -ENOMEM; + + for (i = 0; i < n_fds; i++) { + + r = fdset_put(s, fds[i]); + if (r < 0) { + set_free(MAKE_SET(s)); + return r; + } + } + + *ret = s; + return 0; +} + +FDSet* fdset_free(FDSet *s) { void *p; while ((p = set_steal_first(MAKE_SET(s)))) { @@ -61,6 +85,7 @@ void fdset_free(FDSet *s) { } set_free(MAKE_SET(s)); + return NULL; } int fdset_put(FDSet *s, int fd) { @@ -70,6 +95,19 @@ int fdset_put(FDSet *s, int fd) { return set_put(MAKE_SET(s), FD_TO_PTR(fd)); } +int fdset_consume(FDSet *s, int fd) { + int r; + + assert(s); + assert(fd >= 0); + + r = fdset_put(s, fd); + if (r <= 0) + safe_close(fd); + + return r; +} + int fdset_put_dup(FDSet *s, int fd) { int copy, r; @@ -223,6 +261,10 @@ unsigned fdset_size(FDSet *fds) { return set_size(MAKE_SET(fds)); } +bool fdset_isempty(FDSet *fds) { + return set_isempty(MAKE_SET(fds)); +} + int fdset_iterate(FDSet *s, Iterator *i) { void *p; @@ -232,3 +274,13 @@ int fdset_iterate(FDSet *s, Iterator *i) { return PTR_TO_FD(p); } + +int fdset_steal_first(FDSet *fds) { + void *p; + + p = set_steal_first(MAKE_SET(fds)); + if (!p) + return -ENOENT; + + return PTR_TO_FD(p); +} diff --git a/src/shared/fdset.h b/src/shared/fdset.h index 907acd76dd..c3c5e52286 100644 --- a/src/shared/fdset.h +++ b/src/shared/fdset.h @@ -27,25 +27,30 @@ typedef struct FDSet FDSet; FDSet* fdset_new(void); -void fdset_free(FDSet *s); +FDSet* fdset_free(FDSet *s); int fdset_put(FDSet *s, int fd); int fdset_put_dup(FDSet *s, int fd); +int fdset_consume(FDSet *s, int fd); bool fdset_contains(FDSet *s, int fd); int fdset_remove(FDSet *s, int fd); -int fdset_new_fill(FDSet **_s); -int fdset_new_listen_fds(FDSet **_s, bool unset); +int fdset_new_array(FDSet **ret, int *fds, unsigned n_fds); +int fdset_new_fill(FDSet **ret); +int fdset_new_listen_fds(FDSet **ret, bool unset); int fdset_cloexec(FDSet *fds, bool b); int fdset_close_others(FDSet *fds); unsigned fdset_size(FDSet *fds); +bool fdset_isempty(FDSet *fds); int fdset_iterate(FDSet *s, Iterator *i); +int fdset_steal_first(FDSet *fds); + #define FDSET_FOREACH(fd, fds, i) \ for ((i) = ITERATOR_FIRST, (fd) = fdset_iterate((fds), &(i)); (fd) >= 0; (fd) = fdset_iterate((fds), &(i))) diff --git a/src/shared/util.c b/src/shared/util.c index bda3c93ae3..f01022ed0b 100644 --- a/src/shared/util.c +++ b/src/shared/util.c @@ -7672,3 +7672,28 @@ int fd_setcrtime(int fd, usec_t usec) { return 0; } + +int same_fd(int a, int b) { + struct stat sta, stb; + + assert(a >= 0); + assert(b >= 0); + + if (a == b) + return true; + + if (fstat(a, &sta) < 0) + return -errno; + + if (fstat(b, &stb) < 0) + return -errno; + + if ((sta.st_mode & S_IFMT) != (stb.st_mode & S_IFMT)) + return false; + + if (S_ISREG(sta.st_mode) || S_ISDIR(sta.st_mode) || S_ISFIFO(sta.st_mode) || S_ISSOCK(sta.st_mode) || S_ISLNK(sta.st_mode)) + return (sta.st_dev == stb.st_dev) && (sta.st_ino == stb.st_ino); + + /* We consider all device fds different... */ + return false; +} diff --git a/src/shared/util.h b/src/shared/util.h index a131a3c0f1..4b7e12e628 100644 --- a/src/shared/util.h +++ b/src/shared/util.h @@ -1069,3 +1069,5 @@ int fd_setcrtime(int fd, usec_t usec); int fd_getcrtime(int fd, usec_t *usec); int path_getcrtime(const char *p, usec_t *usec); int fd_getcrtime_at(int dirfd, const char *name, usec_t *usec, int flags); + +int same_fd(int a, int b); diff --git a/src/systemd/sd-daemon.h b/src/systemd/sd-daemon.h index 351b4e59c4..b878b4d8a6 100644 --- a/src/systemd/sd-daemon.h +++ b/src/systemd/sd-daemon.h @@ -190,6 +190,12 @@ int sd_is_mq(int fd, const char *path); timestamps to detect failed services. Also see sd_watchdog_enabled() below. + FDSTORE=1 Store the file descriptors passed along with the + message in the per-service file descriptor store, + and pass them to the main process again on next + invocation. This variable is only supported with + sd_pid_notify_with_fds(). + Daemons can choose to send additional variables. However, it is recommended to prefix variable names not listed above with X_. @@ -243,6 +249,13 @@ int sd_pid_notify(pid_t pid, int unset_environment, const char *state); int sd_pid_notifyf(pid_t pid, int unset_environment, const char *format, ...) _sd_printf_(3,4); /* + Similar to sd_pid_notify(), but also passes the specified fd array + to the service manager for storage. This is particularly useful for + FDSTORE=1 messages. +*/ +int sd_pid_notify_with_fds(pid_t pid, int unset_environment, const char *state, const int *fds, unsigned n_fds); + +/* Returns > 0 if the system was booted with systemd. Returns < 0 on error. Returns 0 if the system was not booted with systemd. Note that all of the functions above handle non-systemd boots just |