diff options
34 files changed, 304 insertions, 119 deletions
diff --git a/DISTRO_PORTING b/DISTRO_PORTING index 9f61bd6de6..51a244389d 100644 --- a/DISTRO_PORTING +++ b/DISTRO_PORTING @@ -48,6 +48,9 @@ PAM: /etc/security/limits.conf will not be read unless you load pam_limits. Make sure you add modules your distro expects from user services. + Pass --with-pamconfdir=no to ./configure to avoid installing this file + and instead install your own. + CONTRIBUTING UPSTREAM: We generally do no longer accept distribution-specific diff --git a/Makefile.am b/Makefile.am index 4a69236090..a0c17db179 100644 --- a/Makefile.am +++ b/Makefile.am @@ -5905,8 +5905,10 @@ pam_systemd_la_LIBADD = \ pamlib_LTLIBRARIES = \ pam_systemd.la +if ENABLE_PAM_CONFIG dist_pamconf_DATA = \ src/login/systemd-user +endif EXTRA_DIST += \ src/login/systemd-user.m4 @@ -40,14 +40,6 @@ Features: * switch to ProtectSystem=strict for all our long-running services where that's possible -* introduce an "invocation ID" for units, that is randomly generated, and - identifies each runtime-cycle of a unit. It should be set freshly each time - we traverse inactive → activating/active, and should be the primary key to - map offline data (stored in the journal) with online bus objects. Let's pass - this as $SYSTEMD_INVOCATION_ID to services, as well as set this as xattr on - the cgroup of a services. The former is accessible without privileges, the - latter ensures the ID cannot be faked. - * If RootDirectory= is used, mount /proc, /sys, /dev into it, if not mounted yet * Permit masking specific netlink APIs with RestrictAddressFamily= @@ -498,7 +490,6 @@ Features: message that works, but alraedy after a short tiemout - check if we can make journalctl by default use --follow mode inside of less if called without args? - maybe add API to send pairs of iovecs via sd_journal_send - - journal: when writing journal auto-rotate if time jumps backwards - journal: add a setgid "systemd-journal" utility to invoke from libsystemd-journal, which passes fds via STDOUT and does PK access - journactl: support negative filtering, i.e. FOOBAR!="waldo", and !FOOBAR for events without FOOBAR. diff --git a/configure.ac b/configure.ac index ccd212ef13..0e87adc38f 100644 --- a/configure.ac +++ b/configure.ac @@ -1507,9 +1507,10 @@ AC_ARG_WITH([pamlibdir], AX_NORMALIZE_PATH([with_pamlibdir]) AC_ARG_WITH([pamconfdir], - AS_HELP_STRING([--with-pamconfdir=DIR], [Directory for PAM configuration]), + AS_HELP_STRING([--with-pamconfdir=DIR], [Directory for PAM configuration (pass no to disable installing)]), [], [with_pamconfdir=${sysconfdir}/pam.d]) +AM_CONDITIONAL(ENABLE_PAM_CONFIG, [test "$with_pamconfdir" != "no"]) AX_NORMALIZE_PATH([with_pamconfdir]) AC_ARG_ENABLE([split-usr], diff --git a/hwdb/60-keyboard.hwdb b/hwdb/60-keyboard.hwdb index f7d5ac58d4..9c87aecd30 100644 --- a/hwdb/60-keyboard.hwdb +++ b/hwdb/60-keyboard.hwdb @@ -238,8 +238,9 @@ evdev:atkbd:dmi:bvn*:bvr*:bd*:svnDell*:pnInspiron*1110:pvr* evdev:atkbd:dmi:bvn*:bvr*:bd*:svnDell*:pnInspiron*1210:pvr* KEYBOARD_KEY_84=wlan -# Dell Inspiron 1520 +# Dell Inspiron 1520 and Latitude 2110 evdev:atkbd:dmi:bvn*:bvr*:bd*:svnDell*:pnInspiron*1520:pvr* +evdev:atkbd:dmi:bvn*:bvr*:bd*:svnDell*:pnLatitude*2110:pvr* KEYBOARD_KEY_85=unknown # Brightness Down, also emitted by acpi-video, ignore KEYBOARD_KEY_86=unknown # Brightness Up, also emitted by acpi-video, ignore diff --git a/man/logind.conf.xml b/man/logind.conf.xml index 9b0e181849..994e0e1140 100644 --- a/man/logind.conf.xml +++ b/man/logind.conf.xml @@ -261,7 +261,7 @@ <listitem><para>Controls whether actions that <command>systemd-logind</command> takes when the power and sleep keys and the lid switch are triggered are subject to high-level inhibitor locks ("shutdown", "sleep", "idle"). Low level inhibitor - locks ("handle-*-key"), are always honoured, irrespective of this setting.</para> + locks ("handle-*-key"), are always honored, irrespective of this setting.</para> <para>These settings take boolean arguments. If <literal>no</literal>, the inhibitor locks taken by applications are respected. If <literal>yes</literal>, diff --git a/man/sd-event.xml b/man/sd-event.xml index fc615f0906..24a69bb645 100644 --- a/man/sd-event.xml +++ b/man/sd-event.xml @@ -97,7 +97,7 @@ iteration a single event source is dispatched. Each time an event source is dispatched the kernel is polled for new events, before the next event source is dispatched. The event loop is designed to - honour priorities and provide fairness within each priority. It is + honor priorities and provide fairness within each priority. It is not designed to provide optimal throughput, as this contradicts these goals due the limitations of the underlying <citerefentry project='man-pages'><refentrytitle>epoll</refentrytitle><manvolnum>7</manvolnum></citerefentry> diff --git a/man/sd_event_source_set_priority.xml b/man/sd_event_source_set_priority.xml index 8c9b39fe5e..6e7032fc80 100644 --- a/man/sd_event_source_set_priority.xml +++ b/man/sd_event_source_set_priority.xml @@ -115,7 +115,7 @@ reliable. However, it is guaranteed that if events are seen on multiple same-priority event sources at the same time, each one is not dispatched again until all others have been dispatched - once. This behaviour guarantees that within each priority + once. This behavior guarantees that within each priority particular event sources do not starve or dominate the event loop.</para> diff --git a/man/systemctl.xml b/man/systemctl.xml index 3b883ea754..b51badf7fe 100644 --- a/man/systemctl.xml +++ b/man/systemctl.xml @@ -306,7 +306,7 @@ <para><literal>ignore-requirements</literal> is similar to <literal>ignore-dependencies</literal>, but only causes the requirement dependencies to be ignored, the ordering - dependencies will still be honoured.</para> + dependencies will still be honored.</para> </listitem> </varlistentry> @@ -1006,7 +1006,7 @@ kobject-uevent 1 systemd-udevd-kernel.socket systemd-udevd.service desired, combine this command with the <option>--now</option> switch, or invoke <command>start</command> with appropriate arguments later. Note that in case of unit instance enablement (i.e. enablement of units of the form <filename>foo@bar.service</filename>), symlinks named the same as instances are created in the - unit configuration diectory, however they point to the single template unit file they are instantiated + unit configuration directory, however they point to the single template unit file they are instantiated from.</para> <para>This command expects either valid unit names (in which case various unit file directories are diff --git a/man/systemd-coredump.xml b/man/systemd-coredump.xml index a28dc62e5a..4a1bc8b296 100644 --- a/man/systemd-coredump.xml +++ b/man/systemd-coredump.xml @@ -107,7 +107,7 @@ <citerefentry><refentrytitle>systemd-sysctl</refentrytitle><manvolnum>8</manvolnum></citerefentry>. </para> - <para>The behaviour of <command>systemd-coredump</command> itself is configured through the configuration file + <para>The behavior of <command>systemd-coredump</command> itself is configured through the configuration file <filename>/etc/systemd/coredump.conf</filename> and corresponding snippets <filename>/etc/systemd/coredump.conf.d/*.conf</filename>, see <citerefentry><refentrytitle>coredump.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>. A new diff --git a/man/systemd-mount.xml b/man/systemd-mount.xml index e6c07863c0..06b7c85bd8 100644 --- a/man/systemd-mount.xml +++ b/man/systemd-mount.xml @@ -260,7 +260,7 @@ <refsect1> <title>The udev Database</title> - <para>If <option>--discover</option> is used, <command>systemd-mount</command> honours a couple of additional udev + <para>If <option>--discover</option> is used, <command>systemd-mount</command> honors a couple of additional udev properties of block devices:</para> <variablelist class='udev-directives'> diff --git a/man/systemd-nspawn.xml b/man/systemd-nspawn.xml index 5ac54df81a..f153034296 100644 --- a/man/systemd-nspawn.xml +++ b/man/systemd-nspawn.xml @@ -405,7 +405,7 @@ purposes (usually in the range beyond the host's UID/GID 65536). The parameter may be specified as follows:</para> <orderedlist> - <listitem><para>If one or two colon-separated numers are specified, user namespacing is turned on. The first + <listitem><para>If one or two colon-separated numbers are specified, user namespacing is turned on. The first parameter specifies the first host UID/GID to assign to the container, the second parameter specifies the number of host UIDs/GIDs to assign to the container. If the second parameter is omitted, 65536 UIDs/GIDs are assigned.</para></listitem> @@ -425,13 +425,13 @@ range is automatically chosen. As first step, the file owner of the root directory of the container's directory tree is read, and it is checked that it is currently not used by the system otherwise (in particular, that no other container is using it). If this check is successful, the UID/GID range determined - this way is used, similar to the behaviour if "yes" is specified. If the check is not successful (and thus + this way is used, similar to the behavior if "yes" is specified. If the check is not successful (and thus the UID/GID range indicated in the root directory's file owner is already used elsewhere) a new – currently unused – UID/GID range of 65536 UIDs/GIDs is randomly chosen between the host UID/GIDs of 524288 and 1878982656, always starting at a multiple of 65536. This setting implies <option>--private-users-chown</option> (see below), which has the effect that the files and directories in the container's directory tree will be owned by the appropriate users of the range picked. Using this option - makes user namespace behaviour fully automatic. Note that the first invocation of a previously unused + makes user namespace behavior fully automatic. Note that the first invocation of a previously unused container image might result in picking a new UID/GID range for it, and thus in the (possibly expensive) file ownership adjustment operation. However, subsequent invocations of the container will be cheap (unless of course the picked UID/GID range is assigned to a different use by then).</para></listitem> @@ -440,7 +440,7 @@ <para>It is recommended to assign at least 65536 UIDs/GIDs to each container, so that the usable UID/GID range in the container covers 16 bit. For best security, do not assign overlapping UID/GID ranges to multiple containers. It is hence a good idea to use the upper 16 bit of the host 32-bit UIDs/GIDs as container identifier, while the lower 16 - bit encode the container UID/GID used. This is in fact the behaviour enforced by the + bit encode the container UID/GID used. This is in fact the behavior enforced by the <option>--private-users=pick</option> option.</para> <para>When user namespaces are used, the GID range assigned to each container is always chosen identical to the @@ -722,7 +722,7 @@ and the subdirectory is symlinked into the host at the same location. <literal>try-host</literal> and <literal>try-guest</literal> do the same but do not fail if - the host does not have persistent journalling enabled. If + the host does not have persistent journaling enabled. If <literal>auto</literal> (the default), and the right subdirectory of <filename>/var/log/journal</filename> exists, it will be bind mounted into the container. If the diff --git a/man/systemd-run.xml b/man/systemd-run.xml index 15f9119e54..2ad8cb0835 100644 --- a/man/systemd-run.xml +++ b/man/systemd-run.xml @@ -402,7 +402,7 @@ There is a screen on: when the user first logs in, and stays around as long as at least one login session is open. After the user logs out of the last session, <filename>user@.service</filename> and all services underneath it - are terminated. This behaviour is the default, when "lingering" is + are terminated. This behavior is the default, when "lingering" is not enabled for that user. Enabling lingering means that <filename>user@.service</filename> is started automatically during boot, even if the user is not logged in, and that the service is diff --git a/man/systemd-system.conf.xml b/man/systemd-system.conf.xml index a268397d09..1d995f143e 100644 --- a/man/systemd-system.conf.xml +++ b/man/systemd-system.conf.xml @@ -109,7 +109,7 @@ <term><varname>CtrlAltDelBurstAction=</varname></term> <listitem><para>Defines what action will be performed - if user presses Ctr-Alt-Delete more than 7 times in 2s. + if user presses Ctrl-Alt-Delete more than 7 times in 2s. Can be set to <literal>reboot-force</literal>, <literal>poweroff-force</literal> or disabled with <literal>ignore</literal>. Defaults to <literal>reboot-force</literal>. diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 249fcb0363..71dc86ec2f 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -988,7 +988,7 @@ the unit's own user and group to themselves and everything else to the <literal>nobody</literal> user and group. This is useful to securely detach the user and group databases used by the unit from the rest of the system, and thus to create an effective sandbox environment. All files, directories, processes, IPC objects and - other resources owned by users/groups not equalling <literal>root</literal> or the unit's own will stay visible + other resources owned by users/groups not equaling <literal>root</literal> or the unit's own will stay visible from within the unit but appear owned by the <literal>nobody</literal> user and group. If this mode is enabled, all unit processes are run without privileges in the host user namespace (regardless if the unit's own user/group is <literal>root</literal> or not). Specifically this means that the process will have zero process @@ -1583,7 +1583,7 @@ <varlistentry> <term><varname>$MAINPID</varname></term> - <listitem><para>The PID of the units main process if it is + <listitem><para>The PID of the unit's main process if it is known. This is only set for control processes as invoked by <varname>ExecReload=</varname> and similar. </para></listitem> </varlistentry> diff --git a/man/systemd.link.xml b/man/systemd.link.xml index 10fddeced0..8edbe758d9 100644 --- a/man/systemd.link.xml +++ b/man/systemd.link.xml @@ -107,7 +107,7 @@ <listitem> <para>A whitespace-separated list of shell-style globs matching the device name, as exposed by the udev property - "INTERFACE". This can not be used to match on names that have + "INTERFACE". This cannot be used to match on names that have already been changed from userspace. Caution is advised when matching on kernel-assigned names, as they are known to be unstable between reboots.</para> diff --git a/man/systemd.socket.xml b/man/systemd.socket.xml index d759e17289..0ce1203cfb 100644 --- a/man/systemd.socket.xml +++ b/man/systemd.socket.xml @@ -535,7 +535,7 @@ and the kernel will ignore initial ACK packets without any data. The argument specifies the approximate amount of time the kernel should wait for incoming data before falling back - to the normal behavior of honouring empty ACK packets. This + to the normal behavior of honoring empty ACK packets. This option is beneficial for protocols where the client sends the data first (e.g. HTTP, in contrast to SMTP), because the server process will not be woken up unnecessarily before it diff --git a/man/systemd.unit.xml b/man/systemd.unit.xml index 9778283fec..79bdb2cd38 100644 --- a/man/systemd.unit.xml +++ b/man/systemd.unit.xml @@ -195,7 +195,7 @@ instantiated units, this logic will first look for the instance <literal>.d/</literal> subdirectory and read its <literal>.conf</literal> files, followed by the template <literal>.d/</literal> subdirectory and the <literal>.conf</literal> files there. Also note that - settings from the <literal>[Install]</literal> section are not honoured in drop-in unit files, + settings from the <literal>[Install]</literal> section are not honored in drop-in unit files, and have no effect.</para> <para>In addition to <filename>/etc/systemd/system</filename>, the drop-in <literal>.d</literal> diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 37e6928a46..cede835920 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -2514,6 +2514,20 @@ int cg_blkio_weight_parse(const char *s, uint64_t *ret) { return 0; } +bool is_cgroup_fs(const struct statfs *s) { + return is_fs_type(s, CGROUP_SUPER_MAGIC) || + is_fs_type(s, CGROUP2_SUPER_MAGIC); +} + +bool fd_is_cgroup_fs(int fd) { + struct statfs s; + + if (fstatfs(fd, &s) < 0) + return -errno; + + return is_cgroup_fs(&s); +} + static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = { [CGROUP_CONTROLLER_CPU] = "cpu", [CGROUP_CONTROLLER_CPUACCT] = "cpuacct", diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index 7529c9719e..0aa27c4cd7 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -23,6 +23,7 @@ #include <stdbool.h> #include <stdint.h> #include <stdio.h> +#include <sys/statfs.h> #include <sys/types.h> #include "def.h" @@ -254,3 +255,6 @@ CGroupController cgroup_controller_from_string(const char *s) _pure_; int cg_weight_parse(const char *s, uint64_t *ret); int cg_cpu_shares_parse(const char *s, uint64_t *ret); int cg_blkio_weight_parse(const char *s, uint64_t *ret); + +bool is_cgroup_fs(const struct statfs *s); +bool fd_is_cgroup_fs(int fd); diff --git a/src/basic/gunicode.c b/src/basic/gunicode.c index 542110503f..e6ac0545a4 100644 --- a/src/basic/gunicode.c +++ b/src/basic/gunicode.c @@ -26,7 +26,7 @@ char * utf8_prev_char (const char *p) { - while (1) + for (;;) { p--; if ((*p & 0xc0) != 0x80) diff --git a/src/basic/rm-rf.c b/src/basic/rm-rf.c index 43816fd1bb..baa70c2c8d 100644 --- a/src/basic/rm-rf.c +++ b/src/basic/rm-rf.c @@ -27,6 +27,7 @@ #include <unistd.h> #include "btrfs-util.h" +#include "cgroup-util.h" #include "fd-util.h" #include "log.h" #include "macro.h" @@ -36,9 +37,14 @@ #include "stat-util.h" #include "string-util.h" +static bool is_physical_fs(const struct statfs *sfs) { + return !is_temporary_fs(sfs) && !is_cgroup_fs(sfs); +} + int rm_rf_children(int fd, RemoveFlags flags, struct stat *root_dev) { _cleanup_closedir_ DIR *d = NULL; int ret = 0, r; + struct statfs sfs; assert(fd >= 0); @@ -47,13 +53,13 @@ int rm_rf_children(int fd, RemoveFlags flags, struct stat *root_dev) { if (!(flags & REMOVE_PHYSICAL)) { - r = fd_is_temporary_fs(fd); + r = fstatfs(fd, &sfs); if (r < 0) { safe_close(fd); - return r; + return -errno; } - if (!r) { + if (is_physical_fs(&sfs)) { /* We refuse to clean physical file systems * with this call, unless explicitly * requested. This is extra paranoia just to @@ -210,7 +216,7 @@ int rm_rf(const char *path, RemoveFlags flags) { if (statfs(path, &s) < 0) return -errno; - if (!is_temporary_fs(&s)) { + if (is_physical_fs(&s)) { log_error("Attempted to remove disk file system, and we can't allow that."); return -EPERM; } diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c index db60d0af7a..a982c204be 100644 --- a/src/coredump/coredump.c +++ b/src/coredump/coredump.c @@ -589,7 +589,7 @@ static int get_mount_namespace_leader(pid_t pid, pid_t *container_pid) { if (r < 0) return r; - while (1) { + for (;;) { ino_t parent_mntns; r = get_process_ppid(cpid, &ppid); diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 349ef74e81..49199b269f 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -568,8 +568,8 @@ static int journal_file_verify_header(JournalFile *f) { return -ENODATA; if (f->writable) { - uint8_t state; sd_id128_t machine_id; + uint8_t state; int r; r = sd_id128_get_machine(&machine_id); @@ -590,6 +590,14 @@ static int journal_file_verify_header(JournalFile *f) { log_debug("Journal file %s has unknown state %i.", f->path, state); return -EBUSY; } + + /* Don't permit appending to files from the future. Because otherwise the realtime timestamps wouldn't + * be strictly ordered in the entries in the file anymore, and we can't have that since it breaks + * bisection. */ + if (le64toh(f->header->tail_entry_realtime) > now(CLOCK_REALTIME)) { + log_debug("Journal file %s is from the future, refusing to append new data to it that'd be older.", f->path); + return -ETXTBSY; + } } f->compress_xz = JOURNAL_HEADER_COMPRESSED_XZ(f->header); @@ -747,12 +755,16 @@ int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset assert(ret); /* Objects may only be located at multiple of 64 bit */ - if (!VALID64(offset)) + if (!VALID64(offset)) { + log_debug("Attempt to move to object at non-64bit boundary: %" PRIu64, offset); return -EBADMSG; + } /* Object may not be located in the file header */ - if (offset < le64toh(f->header->header_size)) + if (offset < le64toh(f->header->header_size)) { + log_debug("Attempt to move to object located in file header: %" PRIu64, offset); return -EBADMSG; + } r = journal_file_move_to(f, type, false, offset, sizeof(ObjectHeader), &t); if (r < 0) @@ -761,17 +773,29 @@ int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset o = (Object*) t; s = le64toh(o->object.size); - if (s < sizeof(ObjectHeader)) + if (s == 0) { + log_debug("Attempt to move to uninitialized object: %" PRIu64, offset); + return -EBADMSG; + } + if (s < sizeof(ObjectHeader)) { + log_debug("Attempt to move to overly short object: %" PRIu64, offset); return -EBADMSG; + } - if (o->object.type <= OBJECT_UNUSED) + if (o->object.type <= OBJECT_UNUSED) { + log_debug("Attempt to move to object with invalid type: %" PRIu64, offset); return -EBADMSG; + } - if (s < minimum_header_size(o)) + if (s < minimum_header_size(o)) { + log_debug("Attempt to move to truncated object: %" PRIu64, offset); return -EBADMSG; + } - if (type > OBJECT_UNUSED && o->object.type != type) + if (type > OBJECT_UNUSED && o->object.type != type) { + log_debug("Attempt to move to object of unexpected type: %" PRIu64, offset); return -EBADMSG; + } if (s > sizeof(ObjectHeader)) { r = journal_file_move_to(f, type, false, offset, s, &t); @@ -2472,6 +2496,37 @@ int journal_file_compare_locations(JournalFile *af, JournalFile *bf) { return 0; } +static int bump_array_index(uint64_t *i, direction_t direction, uint64_t n) { + + /* Increase or decrease the specified index, in the right direction. */ + + if (direction == DIRECTION_DOWN) { + if (*i >= n - 1) + return 0; + + (*i) ++; + } else { + if (*i <= 0) + return 0; + + (*i) --; + } + + return 1; +} + +static bool check_properly_ordered(uint64_t new_offset, uint64_t old_offset, direction_t direction) { + + /* Consider it an error if any of the two offsets is uninitialized */ + if (old_offset == 0 || new_offset == 0) + return false; + + /* If we go down, the new offset must be larger than the old one. */ + return direction == DIRECTION_DOWN ? + new_offset > old_offset : + new_offset < old_offset; +} + int journal_file_next_entry( JournalFile *f, uint64_t p, @@ -2502,36 +2557,34 @@ int journal_file_next_entry( if (r <= 0) return r; - if (direction == DIRECTION_DOWN) { - if (i >= n - 1) - return 0; - - i++; - } else { - if (i <= 0) - return 0; - - i--; - } + r = bump_array_index(&i, direction, n); + if (r <= 0) + return r; } /* And jump to it */ - r = generic_array_get(f, - le64toh(f->header->entry_array_offset), - i, - ret, &ofs); - if (r == -EBADMSG && direction == DIRECTION_DOWN) { - /* Special case: when we iterate throught the journal file linearly, and hit an entry we can't read, - * consider this the end of the journal file. */ - log_debug_errno(r, "Encountered entry we can't read while iterating through journal file. Considering this the end of the file."); - return 0; + for (;;) { + r = generic_array_get(f, + le64toh(f->header->entry_array_offset), + i, + ret, &ofs); + if (r > 0) + break; + if (r != -EBADMSG) + return r; + + /* OK, so this entry is borked. Most likely some entry didn't get synced to disk properly, let's see if + * the next one might work for us instead. */ + log_debug_errno(r, "Entry item %" PRIu64 " is bad, skipping over it.", i); + + r = bump_array_index(&i, direction, n); + if (r <= 0) + return r; } - if (r <= 0) - return r; - if (p > 0 && - (direction == DIRECTION_DOWN ? ofs <= p : ofs >= p)) { - log_debug("%s: entry array corrupted at entry %" PRIu64, f->path, i); + /* Ensure our array is properly ordered. */ + if (p > 0 && !check_properly_ordered(ofs, p, direction)) { + log_debug("%s: entry array not properly ordered at entry %" PRIu64, f->path, i); return -EBADMSG; } @@ -2548,9 +2601,9 @@ int journal_file_next_entry_for_data( direction_t direction, Object **ret, uint64_t *offset) { - uint64_t n, i; - int r; + uint64_t i, n, ofs; Object *d; + int r; assert(f); assert(p > 0 || !o); @@ -2582,25 +2635,39 @@ int journal_file_next_entry_for_data( if (r <= 0) return r; - if (direction == DIRECTION_DOWN) { - if (i >= n - 1) - return 0; + r = bump_array_index(&i, direction, n); + if (r <= 0) + return r; + } - i++; - } else { - if (i <= 0) - return 0; + for (;;) { + r = generic_array_get_plus_one(f, + le64toh(d->data.entry_offset), + le64toh(d->data.entry_array_offset), + i, + ret, &ofs); + if (r > 0) + break; + if (r != -EBADMSG) + return r; - i--; - } + log_debug_errno(r, "Data entry item %" PRIu64 " is bad, skipping over it.", i); + + r = bump_array_index(&i, direction, n); + if (r <= 0) + return r; + } + /* Ensure our array is properly ordered. */ + if (p > 0 && check_properly_ordered(ofs, p, direction)) { + log_debug("%s data entry array not properly ordered at entry %" PRIu64, f->path, i); + return -EBADMSG; } - return generic_array_get_plus_one(f, - le64toh(d->data.entry_offset), - le64toh(d->data.entry_array_offset), - i, - ret, offset); + if (offset) + *offset = ofs; + + return 1; } int journal_file_move_to_entry_by_offset_for_data( @@ -3271,7 +3338,8 @@ int journal_file_open_reliably( -EBUSY, /* unclean shutdown */ -ESHUTDOWN, /* already archived */ -EIO, /* IO error, including SIGBUS on mmap */ - -EIDRM /* File has been deleted */)) + -EIDRM, /* File has been deleted */ + -ETXTBSY)) /* File is from the future */ return r; if ((flags & O_ACCMODE) == O_RDONLY) diff --git a/src/journal/journal-vacuum.c b/src/journal/journal-vacuum.c index f09dc66e03..12ce2fd56c 100644 --- a/src/journal/journal-vacuum.c +++ b/src/journal/journal-vacuum.c @@ -343,7 +343,7 @@ finish: free(list[i].filename); free(list); - log_full(verbose ? LOG_INFO : LOG_DEBUG, "Vacuuming done, freed %s of archived journals on disk.", format_bytes(sbytes, sizeof(sbytes), freed)); + log_full(verbose ? LOG_INFO : LOG_DEBUG, "Vacuuming done, freed %s of archived journals from %s.", format_bytes(sbytes, sizeof(sbytes), freed), directory); return r; } diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c index 4350925fb0..7f997487b4 100644 --- a/src/journal/journalctl.c +++ b/src/journal/journalctl.c @@ -1091,8 +1091,10 @@ static int discover_next_boot(sd_journal *j, r = sd_journal_previous(j); if (r < 0) return r; - else if (r == 0) + else if (r == 0) { + log_debug("Whoopsie! We found a boot ID but can't read its last entry."); return -ENODATA; /* This shouldn't happen. We just came from this very boot ID. */ + } r = sd_journal_get_realtime_usec(j, &next_boot->last); if (r < 0) @@ -1112,7 +1114,7 @@ static int get_boots( bool skip_once; int r, count = 0; - BootId *head = NULL, *tail = NULL; + BootId *head = NULL, *tail = NULL, *id; const bool advance_older = boot_id && offset <= 0; sd_id128_t previous_boot_id; @@ -1203,6 +1205,13 @@ static int get_boots( break; } } else { + LIST_FOREACH(boot_list, id, head) { + if (sd_id128_equal(id->id, current->id)) { + /* boot id already stored, something wrong with the journal files */ + /* exiting as otherwise this problem would cause forever loop */ + goto finish; + } + } LIST_INSERT_AFTER(boot_list, head, tail, current); tail = current; current = NULL; @@ -2257,7 +2266,7 @@ int main(int argc, char *argv[]) { if (r < 0) goto finish; - printf("Archived and active journals take up %s on disk.\n", + printf("Archived and active journals take up %s in the file system.\n", format_bytes(sbytes, sizeof(sbytes), bytes)); goto finish; } diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c index f01cf1d937..381182fa2c 100644 --- a/src/journal/journald-server.c +++ b/src/journal/journald-server.c @@ -595,52 +595,86 @@ static void server_cache_hostname(Server *s) { static bool shall_try_append_again(JournalFile *f, int r) { switch(r) { + case -E2BIG: /* Hit configured limit */ case -EFBIG: /* Hit fs limit */ case -EDQUOT: /* Quota limit hit */ case -ENOSPC: /* Disk full */ log_debug("%s: Allocation limit reached, rotating.", f->path); return true; + case -EIO: /* I/O error of some kind (mmap) */ log_warning("%s: IO error, rotating.", f->path); return true; + case -EHOSTDOWN: /* Other machine */ log_info("%s: Journal file from other machine, rotating.", f->path); return true; + case -EBUSY: /* Unclean shutdown */ log_info("%s: Unclean shutdown, rotating.", f->path); return true; + case -EPROTONOSUPPORT: /* Unsupported feature */ log_info("%s: Unsupported feature, rotating.", f->path); return true; + case -EBADMSG: /* Corrupted */ case -ENODATA: /* Truncated */ case -ESHUTDOWN: /* Already archived */ log_warning("%s: Journal file corrupted, rotating.", f->path); return true; + case -EIDRM: /* Journal file has been deleted */ log_warning("%s: Journal file has been deleted, rotating.", f->path); return true; + + case -ETXTBSY: /* Journal file is from the future */ + log_warning("%s: Journal file is from the future, rotating.", f->path); + return true; + default: return false; } } static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) { + bool vacuumed = false, rotate = false; + struct dual_timestamp ts; JournalFile *f; - bool vacuumed = false; int r; assert(s); assert(iovec); assert(n > 0); - f = find_journal(s, uid); - if (!f) - return; + /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use + * the source time, and not even the time the event was originally seen, but instead simply the time we started + * processing it, as we want strictly linear ordering in what we write out.) */ + assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0); + assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0); + + if (ts.realtime < s->last_realtime_clock) { + /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during + * regular operation. However, when it does happen, then we should make sure that we start fresh files + * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure + * bisection works correctly. */ + + log_debug("Time jumped backwards, rotating."); + rotate = true; + } else { + + f = find_journal(s, uid); + if (!f) + return; + + if (journal_file_rotate_suggested(f, s->max_file_usec)) { + log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path); + rotate = true; + } + } - if (journal_file_rotate_suggested(f, s->max_file_usec)) { - log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path); + if (rotate) { server_rotate(s); server_vacuum(s, false, false); vacuumed = true; @@ -650,7 +684,9 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned return; } - r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL); + s->last_realtime_clock = ts.realtime; + + r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL); if (r >= 0) { server_schedule_sync(s, priority); return; @@ -669,7 +705,7 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned return; log_debug("Retrying write."); - r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL); + r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL); if (r < 0) log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n)); else diff --git a/src/journal/journald-server.h b/src/journal/journald-server.h index dfb5724794..cc68a0a690 100644 --- a/src/journal/journald-server.h +++ b/src/journal/journald-server.h @@ -149,6 +149,8 @@ struct Server { char *cgroup_root; usec_t watchdog_usec; + + usec_t last_realtime_clock; }; #define SERVER_MACHINE_ID(s) ((s)->machine_id_field + strlen("_MACHINE_ID=")) diff --git a/src/libsystemd/sd-bus/bus-error.c b/src/libsystemd/sd-bus/bus-error.c index 26219bdeed..378f7a377a 100644 --- a/src/libsystemd/sd-bus/bus-error.c +++ b/src/libsystemd/sd-bus/bus-error.c @@ -70,11 +70,9 @@ BUS_ERROR_MAP_ELF_REGISTER const sd_bus_error_map bus_standard_errors[] = { SD_BUS_ERROR_MAP_END }; -/* GCC maps this magically to the beginning and end of the BUS_ERROR_MAP section. - * Hide them; for currently unknown reasons they get exported to the shared libries - * even without being listed in the sym file. */ -extern const sd_bus_error_map __start_BUS_ERROR_MAP[] _hidden_; -extern const sd_bus_error_map __stop_BUS_ERROR_MAP[] _hidden_; +/* GCC maps this magically to the beginning and end of the BUS_ERROR_MAP section */ +extern const sd_bus_error_map __start_BUS_ERROR_MAP[]; +extern const sd_bus_error_map __stop_BUS_ERROR_MAP[]; /* Additional maps registered with sd_bus_error_add_map() are in this * NULL terminated array */ diff --git a/src/login/systemd-user.m4 b/src/login/systemd-user.m4 index fe38b24fef..e33963b125 100644 --- a/src/login/systemd-user.m4 +++ b/src/login/systemd-user.m4 @@ -2,6 +2,8 @@ # # Used by systemd --user instances. +account required pam_unix.so + m4_ifdef(`HAVE_SELINUX', session required pam_selinux.so close session required pam_selinux.so nottys open diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c index 6793df1286..fd0578b85c 100644 --- a/src/nspawn/nspawn-cgroup.c +++ b/src/nspawn/nspawn-cgroup.c @@ -25,27 +25,18 @@ #include "mkdir.h" #include "mount-util.h" #include "nspawn-cgroup.h" +#include "rm-rf.h" #include "string-util.h" #include "strv.h" #include "util.h" -int chown_cgroup(pid_t pid, uid_t uid_shift) { - _cleanup_free_ char *path = NULL, *fs = NULL; +static int chown_cgroup_path(const char *path, uid_t uid_shift) { _cleanup_close_ int fd = -1; const char *fn; - int r; - r = cg_pid_get_path(NULL, pid, &path); - if (r < 0) - return log_error_errno(r, "Failed to get container cgroup path: %m"); - - r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs); - if (r < 0) - return log_error_errno(r, "Failed to get file system path for container cgroup: %m"); - - fd = open(fs, O_RDONLY|O_CLOEXEC|O_DIRECTORY); + fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY); if (fd < 0) - return log_error_errno(errno, "Failed to open %s: %m", fs); + return -errno; FOREACH_STRING(fn, ".", @@ -63,7 +54,27 @@ int chown_cgroup(pid_t pid, uid_t uid_shift) { return 0; } -int sync_cgroup(pid_t pid, CGroupUnified unified_requested) { +int chown_cgroup(pid_t pid, uid_t uid_shift) { + _cleanup_free_ char *path = NULL, *fs = NULL; + _cleanup_close_ int fd = -1; + int r; + + r = cg_pid_get_path(NULL, pid, &path); + if (r < 0) + return log_error_errno(r, "Failed to get container cgroup path: %m"); + + r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, path, NULL, &fs); + if (r < 0) + return log_error_errno(r, "Failed to get file system path for container cgroup: %m"); + + r = chown_cgroup_path(fs, uid_shift); + if (r < 0) + return log_error_errno(r, "Failed to chown() cgroup %s: %m", fs); + + return 0; +} + +int sync_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t arg_uid_shift) { _cleanup_free_ char *cgroup = NULL; char tree[] = "/tmp/unifiedXXXXXX", pid_string[DECIMAL_STR_MAX(pid) + 1]; bool undo_mount = false; @@ -101,14 +112,26 @@ int sync_cgroup(pid_t pid, CGroupUnified unified_requested) { undo_mount = true; + /* If nspawn dies abruptly the cgroup hierarchy created below + * its unit isn't cleaned up. So, let's remove it + * https://github.com/systemd/systemd/pull/4223#issuecomment-252519810 */ + fn = strjoina(tree, cgroup); + (void) rm_rf(fn, REMOVE_ROOT|REMOVE_ONLY_DIRECTORIES); + fn = strjoina(tree, cgroup, "/cgroup.procs"); (void) mkdir_parents(fn, 0755); sprintf(pid_string, PID_FMT, pid); r = write_string_file(fn, pid_string, 0); - if (r < 0) + if (r < 0) { log_error_errno(r, "Failed to move process: %m"); + goto finish; + } + fn = strjoina(tree, cgroup); + r = chown_cgroup_path(fn, arg_uid_shift); + if (r < 0) + log_error_errno(r, "Failed to chown() cgroup %s: %m", fn); finish: if (undo_mount) (void) umount_verbose(tree); diff --git a/src/nspawn/nspawn-cgroup.h b/src/nspawn/nspawn-cgroup.h index dc33da8abe..fa4321ab43 100644 --- a/src/nspawn/nspawn-cgroup.h +++ b/src/nspawn/nspawn-cgroup.h @@ -25,5 +25,5 @@ #include "cgroup-util.h" int chown_cgroup(pid_t pid, uid_t uid_shift); -int sync_cgroup(pid_t pid, CGroupUnified unified_requested); +int sync_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t uid_shift); int create_subcgroup(pid_t pid, CGroupUnified unified_requested); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index d95204f71e..14af51fc0e 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -3879,7 +3879,7 @@ static int run(int master, return r; } - r = sync_cgroup(*pid, arg_unified_cgroup_hierarchy); + r = sync_cgroup(*pid, arg_unified_cgroup_hierarchy, arg_uid_shift); if (r < 0) return r; diff --git a/src/test/test-cgroup-util.c b/src/test/test-cgroup-util.c index 43f8906172..c24c784e9b 100644 --- a/src/test/test-cgroup-util.c +++ b/src/test/test-cgroup-util.c @@ -24,6 +24,7 @@ #include "formats-util.h" #include "parse-util.h" #include "process-util.h" +#include "stat-util.h" #include "string-util.h" #include "test-helper.h" #include "user-util.h" @@ -309,6 +310,28 @@ static void test_mask_supported(void) { printf("'%s' is supported: %s\n", cgroup_controller_to_string(c), yes_no(m & CGROUP_CONTROLLER_TO_MASK(c))); } +static void test_is_cgroup_fs(void) { + struct statfs sfs; + assert_se(statfs("/sys/fs/cgroup", &sfs) == 0); + if (is_temporary_fs(&sfs)) + assert_se(statfs("/sys/fs/cgroup/systemd", &sfs) == 0); + assert_se(is_cgroup_fs(&sfs)); +} + +static void test_fd_is_cgroup_fs(void) { + int fd; + + fd = open("/sys/fs/cgroup", O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW); + assert_se(fd >= 0); + if (fd_is_temporary_fs(fd)) { + fd = safe_close(fd); + fd = open("/sys/fs/cgroup/systemd", O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW); + assert_se(fd >= 0); + } + assert_se(fd_is_cgroup_fs(fd)); + fd = safe_close(fd); +} + int main(void) { test_path_decode_unit(); test_path_get_unit(); @@ -324,6 +347,8 @@ int main(void) { test_slice_to_path(); test_shift_path(); TEST_REQ_RUNNING_SYSTEMD(test_mask_supported()); + TEST_REQ_RUNNING_SYSTEMD(test_is_cgroup_fs()); + TEST_REQ_RUNNING_SYSTEMD(test_fd_is_cgroup_fs()); return 0; } |