diff options
Diffstat (limited to 'src/coredump')
-rw-r--r-- | src/coredump/coredump.c | 177 |
1 files changed, 159 insertions, 18 deletions
diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c index 999de63900..e3d17c864d 100644 --- a/src/coredump/coredump.c +++ b/src/coredump/coredump.c @@ -157,10 +157,8 @@ static int fix_acl(int fd, uid_t uid) { if (acl_create_entry(&acl, &entry) < 0 || acl_set_tag_type(entry, ACL_USER) < 0 || - acl_set_qualifier(entry, &uid) < 0) { - log_error_errno(errno, "Failed to patch ACL: %m"); - return -errno; - } + acl_set_qualifier(entry, &uid) < 0) + return log_error_errno(errno, "Failed to patch ACL: %m"); if (acl_get_permset(entry, &permset) < 0 || acl_add_perm(permset, ACL_READ) < 0) @@ -560,6 +558,89 @@ static int compose_open_fds(pid_t pid, char **open_fds) { return 0; } +static int get_process_ns(pid_t pid, const char *namespace, ino_t *ns) { + const char *p; + struct stat stbuf; + _cleanup_close_ int proc_ns_dir_fd; + + p = procfs_file_alloca(pid, "ns"); + + proc_ns_dir_fd = open(p, O_DIRECTORY | O_CLOEXEC | O_RDONLY); + if (proc_ns_dir_fd < 0) + return -errno; + + if (fstatat(proc_ns_dir_fd, namespace, &stbuf, /* flags */0) < 0) + return -errno; + + *ns = stbuf.st_ino; + return 0; +} + +static int get_mount_namespace_leader(pid_t pid, pid_t *container_pid) { + pid_t cpid = pid, ppid = 0; + ino_t proc_mntns; + int r = 0; + + r = get_process_ns(pid, "mnt", &proc_mntns); + if (r < 0) + return r; + + while (1) { + ino_t parent_mntns; + + r = get_process_ppid(cpid, &ppid); + if (r < 0) + return r; + + r = get_process_ns(ppid, "mnt", &parent_mntns); + if (r < 0) + return r; + + if (proc_mntns != parent_mntns) + break; + + if (ppid == 1) + return -ENOENT; + + cpid = ppid; + } + + *container_pid = ppid; + return 0; +} + +/* Returns 1 if the parent was found. + * Returns 0 if there is not a process we can call the pid's + * container parent (the pid's process isn't 'containerized'). + * Returns a negative number on errors. + */ +static int get_process_container_parent_cmdline(pid_t pid, char** cmdline) { + int r = 0; + pid_t container_pid; + const char *proc_root_path; + struct stat root_stat, proc_root_stat; + + /* To compare inodes of / and /proc/[pid]/root */ + if (stat("/", &root_stat) < 0) + return -errno; + + proc_root_path = procfs_file_alloca(pid, "root"); + if (stat(proc_root_path, &proc_root_stat) < 0) + return -errno; + + /* The process uses system root. */ + if (proc_root_stat.st_ino == root_stat.st_ino) { + *cmdline = NULL; + return 0; + } + + r = get_mount_namespace_leader(pid, &container_pid); + if (r < 0) + return r; + + return get_process_cmdline(container_pid, 0, false, cmdline); +} + static int change_uid_gid(const char *context[]) { uid_t uid; gid_t gid; @@ -756,7 +837,6 @@ static int process_socket(int fd) { iovec[n_iovec].iov_len = l; iovec[n_iovec].iov_base = malloc(l + 1); - if (!iovec[n_iovec].iov_base) { r = log_oom(); goto finish; @@ -811,7 +891,7 @@ static int process_socket(int fd) { goto finish; } - /* Make sure we we got all data we really need */ + /* Make sure we got all data we really need */ assert(context[CONTEXT_PID]); assert(context[CONTEXT_UID]); assert(context[CONTEXT_GID]); @@ -852,12 +932,42 @@ static int send_iovec(const struct iovec iovec[], size_t n_iovec, int input_fd) return log_error_errno(errno, "Failed to connect to coredump service: %m"); for (i = 0; i < n_iovec; i++) { - ssize_t n; - assert(iovec[i].iov_len > 0); + struct msghdr mh = { + .msg_iov = (struct iovec*) iovec + i, + .msg_iovlen = 1, + }; + struct iovec copy[2]; + + for (;;) { + if (sendmsg(fd, &mh, MSG_NOSIGNAL) >= 0) + break; + + if (errno == EMSGSIZE && mh.msg_iov[0].iov_len > 0) { + /* This field didn't fit? That's a pity. Given that this is just metadata, + * let's truncate the field at half, and try again. We append three dots, in + * order to show that this is truncated. */ + + if (mh.msg_iov != copy) { + /* We don't want to modify the caller's iovec, hence let's create our + * own array, consisting of two new iovecs, where the first is a + * (truncated) copy of what we want to send, and the second one + * contains the trailing dots. */ + copy[0] = iovec[i]; + copy[1] = (struct iovec) { + .iov_base = (char[]) { '.', '.', '.' }, + .iov_len = 3, + }; + + mh.msg_iov = copy; + mh.msg_iovlen = 2; + } + + copy[0].iov_len /= 2; /* halve it, and try again */ + continue; + } - n = send(fd, iovec[i].iov_base, iovec[i].iov_len, MSG_NOSIGNAL); - if (n < 0) return log_error_errno(errno, "Failed to send coredump datagram: %m"); + } } r = send_one_fd(fd, input_fd, 0); @@ -867,7 +977,7 @@ static int send_iovec(const struct iovec iovec[], size_t n_iovec, int input_fd) return 0; } -static int process_journald_crash(const char *context[], int input_fd) { +static int process_special_crash(const char *context[], int input_fd) { _cleanup_close_ int coredump_fd = -1, coredump_node_fd = -1; _cleanup_free_ char *filename = NULL; uint64_t coredump_size; @@ -876,7 +986,7 @@ static int process_journald_crash(const char *context[], int input_fd) { assert(context); assert(input_fd >= 0); - /* If we are journald, we cut things short, don't write to the journal, but still create a coredump. */ + /* If we are pid1 or journald, we cut things short, don't write to the journal, but still create a coredump. */ if (arg_storage != COREDUMP_STORAGE_NONE) arg_storage = COREDUMP_STORAGE_EXTERNAL; @@ -889,7 +999,8 @@ static int process_journald_crash(const char *context[], int input_fd) { if (r < 0) return r; - log_info("Detected coredump of the journal daemon itself, diverted to %s.", filename); + log_notice("Detected coredump of the journal daemon or PID 1, diverted to %s.", filename); + return 0; } @@ -905,11 +1016,13 @@ static int process_kernel(int argc, char* argv[]) { /* The larger ones we allocate on the heap */ _cleanup_free_ char *core_owner_uid = NULL, *core_open_fds = NULL, *core_proc_status = NULL, - *core_proc_maps = NULL, *core_proc_limits = NULL, *core_proc_cgroup = NULL, *core_environ = NULL; + *core_proc_maps = NULL, *core_proc_limits = NULL, *core_proc_cgroup = NULL, *core_environ = NULL, + *core_proc_mountinfo = NULL, *core_container_cmdline = NULL; _cleanup_free_ char *exe = NULL, *comm = NULL; const char *context[_CONTEXT_MAX]; - struct iovec iovec[25]; + bool proc_self_root_is_slash; + struct iovec iovec[27]; size_t n_iovec = 0; uid_t owner_uid; const char *p; @@ -949,9 +1062,17 @@ static int process_kernel(int argc, char* argv[]) { if (cg_pid_get_unit(pid, &t) >= 0) { - if (streq(t, SPECIAL_JOURNALD_SERVICE)) { + /* If this is PID 1 disable coredump collection, we'll unlikely be able to process it later on. */ + if (streq(t, SPECIAL_INIT_SCOPE)) { + log_notice("Due to PID 1 having crashed coredump collection will now be turned off."); + (void) write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", 0); + } + + /* Let's avoid dead-locks when processing journald and init crashes, as socket activation and logging + * are unlikely to work then. */ + if (STR_IN_SET(t, SPECIAL_JOURNALD_SERVICE, SPECIAL_INIT_SCOPE)) { free(t); - return process_journald_crash(context, STDIN_FILENO); + return process_special_crash(context, STDIN_FILENO); } core_unit = strjoina("COREDUMP_UNIT=", t); @@ -1074,6 +1195,15 @@ static int process_kernel(int argc, char* argv[]) { IOVEC_SET_STRING(iovec[n_iovec++], core_proc_cgroup); } + p = procfs_file_alloca(pid, "mountinfo"); + if (read_full_file(p, &t, NULL) >=0) { + core_proc_mountinfo = strappend("COREDUMP_PROC_MOUNTINFO=", t); + free(t); + + if (core_proc_mountinfo) + IOVEC_SET_STRING(iovec[n_iovec++], core_proc_mountinfo); + } + if (get_process_cwd(pid, &t) >= 0) { core_cwd = strjoina("COREDUMP_CWD=", t); free(t); @@ -1083,9 +1213,20 @@ static int process_kernel(int argc, char* argv[]) { if (get_process_root(pid, &t) >= 0) { core_root = strjoina("COREDUMP_ROOT=", t); - free(t); IOVEC_SET_STRING(iovec[n_iovec++], core_root); + + /* If the process' root is "/", then there is a chance it has + * mounted own root and hence being containerized. */ + proc_self_root_is_slash = strcmp(t, "/") == 0; + free(t); + if (proc_self_root_is_slash && get_process_container_parent_cmdline(pid, &t) > 0) { + core_container_cmdline = strappend("COREDUMP_CONTAINER_CMDLINE=", t); + free(t); + + if (core_container_cmdline) + IOVEC_SET_STRING(iovec[n_iovec++], core_container_cmdline); + } } if (get_process_environ(pid, &t) >= 0) { |