diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/basic/process-util.c | 102 | ||||
-rw-r--r-- | src/basic/process-util.h | 2 | ||||
-rw-r--r-- | src/basic/virt.c | 81 | ||||
-rw-r--r-- | src/journal/journalctl.c | 2 | ||||
-rw-r--r-- | src/machine/machinectl.c | 2 | ||||
-rw-r--r-- | src/nspawn/nspawn-stub-pid1.c | 29 | ||||
-rw-r--r-- | src/nspawn/nspawn-stub-pid1.h | 4 | ||||
-rw-r--r-- | src/nspawn/nspawn.c | 2 | ||||
-rw-r--r-- | src/sysv-generator/sysv-generator.c | 10 | ||||
-rw-r--r-- | src/test/test-process-util.c | 61 |
10 files changed, 236 insertions, 59 deletions
diff --git a/src/basic/process-util.c b/src/basic/process-util.c index 1f4c2e4e43..d5e7edb589 100644 --- a/src/basic/process-util.c +++ b/src/basic/process-util.c @@ -27,6 +27,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/mman.h> #include <sys/personality.h> #include <sys/prctl.h> #include <sys/types.h> @@ -274,27 +275,100 @@ int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char * return 0; } -void rename_process(const char name[8]) { - assert(name); +int rename_process(const char name[]) { + static size_t mm_size = 0; + static char *mm = NULL; + bool truncated = false; + size_t l; + + /* This is a like a poor man's setproctitle(). It changes the comm field, argv[0], and also the glibc's + * internally used name of the process. For the first one a limit of 16 chars applies; to the second one in + * many cases one of 10 (i.e. length of "/sbin/init") — however if we have CAP_SYS_RESOURCES it is unbounded; + * to the third one 7 (i.e. the length of "systemd". If you pass a longer string it will likely be + * truncated. + * + * Returns 0 if a name was set but truncated, > 0 if it was set but not truncated. */ + + if (isempty(name)) + return -EINVAL; /* let's not confuse users unnecessarily with an empty name */ - /* This is a like a poor man's setproctitle(). It changes the - * comm field, argv[0], and also the glibc's internally used - * name of the process. For the first one a limit of 16 chars - * applies, to the second one usually one of 10 (i.e. length - * of "/sbin/init"), to the third one one of 7 (i.e. length of - * "systemd"). If you pass a longer string it will be - * truncated */ + l = strlen(name); + /* First step, change the comm field. */ (void) prctl(PR_SET_NAME, name); + if (l > 15) /* Linux process names can be 15 chars at max */ + truncated = true; + + /* Second step, change glibc's ID of the process name. */ + if (program_invocation_name) { + size_t k; + + k = strlen(program_invocation_name); + strncpy(program_invocation_name, name, k); + if (l > k) + truncated = true; + } + + /* Third step, completely replace the argv[] array the kernel maintains for us. This requires privileges, but + * has the advantage that the argv[] array is exactly what we want it to be, and not filled up with zeros at + * the end. This is the best option for changing /proc/self/cmdline.*/ + if (mm_size < l+1) { + size_t nn_size; + char *nn; + + /* Let's not bother with this if we don't have euid == 0. Strictly speaking if people do weird stuff + * with capabilities this could work even for euid != 0, but our own code generally doesn't do that, + * hence let's use this as quick bypass check, to avoid calling mmap() if PR_SET_MM_ARG_START fails + * with EPERM later on anyway. After all geteuid() is dead cheap to call, but mmap() is not. */ + if (geteuid() != 0) { + log_debug("Skipping PR_SET_MM_ARG_START, as we don't have privileges."); + goto use_saved_argv; + } + + nn_size = PAGE_ALIGN(l+1); + nn = mmap(NULL, nn_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (nn == MAP_FAILED) { + log_debug_errno(errno, "mmap() failed: %m"); + goto use_saved_argv; + } + + strncpy(nn, name, nn_size); + + /* Now, let's tell the kernel about this new memory */ + if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) { + log_debug_errno(errno, "PR_SET_MM_ARG_START failed, proceeding without: %m"); + (void) munmap(nn, nn_size); + goto use_saved_argv; + } + + /* And update the end pointer to the new end, too. If this fails, we don't really know what to do, it's + * pretty unlikely that we can rollback, hence we'll just accept the failure, and continue. */ + if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0) + log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m"); - if (program_invocation_name) - strncpy(program_invocation_name, name, strlen(program_invocation_name)); + if (mm) + (void) munmap(mm, mm_size); + + mm = nn; + mm_size = nn_size; + } else + strncpy(mm, name, mm_size); + +use_saved_argv: + /* Fourth step: in all cases we'll also update the original argv[], so that our own code gets it right too if + * it still looks here */ if (saved_argc > 0) { int i; - if (saved_argv[0]) - strncpy(saved_argv[0], name, strlen(saved_argv[0])); + if (saved_argv[0]) { + size_t k; + + k = strlen(saved_argv[0]); + strncpy(saved_argv[0], name, k); + if (l > k) + truncated = true; + } for (i = 1; i < saved_argc; i++) { if (!saved_argv[i]) @@ -303,6 +377,8 @@ void rename_process(const char name[8]) { memzero(saved_argv[i], strlen(saved_argv[i])); } } + + return !truncated; } int is_kernel_thread(pid_t pid) { diff --git a/src/basic/process-util.h b/src/basic/process-util.h index 89dfeb4d6a..d378901399 100644 --- a/src/basic/process-util.h +++ b/src/basic/process-util.h @@ -64,7 +64,7 @@ void sigkill_waitp(pid_t *pid); int kill_and_sigcont(pid_t pid, int sig); -void rename_process(const char name[8]); +int rename_process(const char name[]); int is_kernel_thread(pid_t pid); int getenv_for_pid(pid_t pid, const char *field, char **_value); diff --git a/src/basic/virt.c b/src/basic/virt.c index 9b7eb71319..33641e6886 100644 --- a/src/basic/virt.c +++ b/src/basic/virt.c @@ -409,8 +409,7 @@ int detect_container(void) { if (cached_found >= 0) return cached_found; - /* /proc/vz exists in container and outside of the container, - * /proc/bc only outside of the container. */ + /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */ if (access("/proc/vz", F_OK) >= 0 && access("/proc/bc", F_OK) < 0) { r = VIRTUALIZATION_OPENVZ; @@ -418,50 +417,58 @@ int detect_container(void) { } if (getpid() == 1) { - /* If we are PID 1 we can just check our own - * environment variable */ + /* If we are PID 1 we can just check our own environment variable, and that's authoritative. */ e = getenv("container"); if (isempty(e)) { r = VIRTUALIZATION_NONE; goto finish; } - } else { - - /* Otherwise, PID 1 dropped this information into a - * file in /run. This is better than accessing - * /proc/1/environ, since we don't need CAP_SYS_PTRACE - * for that. */ - - r = read_one_line_file("/run/systemd/container", &m); - if (r == -ENOENT) { - - /* Fallback for cases where PID 1 was not - * systemd (for example, cases where - * init=/bin/sh is used. */ - - r = getenv_for_pid(1, "container", &m); - if (r <= 0) { - - /* If that didn't work, give up, - * assume no container manager. - * - * Note: This means we still cannot - * detect containers if init=/bin/sh - * is passed but privileges dropped, - * as /proc/1/environ is only readable - * with privileges. */ - - r = VIRTUALIZATION_NONE; - goto finish; - } - } - if (r < 0) - return r; + goto translate_name; + } + + /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing + * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */ + r = read_one_line_file("/run/systemd/container", &m); + if (r >= 0) { + e = m; + goto translate_name; + } + if (r != -ENOENT) + return log_debug_errno(r, "Failed to read /run/systemd/container: %m"); + + /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */ + r = getenv_for_pid(1, "container", &m); + if (r > 0) { e = m; + goto translate_name; } + if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */ + log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m"); + + /* Interestingly /proc/1/sched actually shows the host's PID for what we see as PID 1. Hence, if the PID shown + * there is not 1, we know we are in a PID namespace. and hence a container. */ + r = read_one_line_file("/proc/1/sched", &m); + if (r >= 0) { + const char *t; + + t = strrchr(m, '('); + if (!t) + return -EIO; + + if (!startswith(t, "(1,")) { + r = VIRTUALIZATION_CONTAINER_OTHER; + goto finish; + } + } else if (r != -ENOENT) + return r; + + /* If that didn't work, give up, assume no container manager. */ + r = VIRTUALIZATION_NONE; + goto finish; +translate_name: for (j = 0; j < ELEMENTSOF(value_table); j++) if (streq(e, value_table[j].value)) { r = value_table[j].id; @@ -471,7 +478,7 @@ int detect_container(void) { r = VIRTUALIZATION_CONTAINER_OTHER; finish: - log_debug("Found container virtualization %s", virtualization_to_string(r)); + log_debug("Found container virtualization %s.", virtualization_to_string(r)); cached_found = r; return r; } diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c index 10d3ff3b45..ecd1e94a33 100644 --- a/src/journal/journalctl.c +++ b/src/journal/journalctl.c @@ -2318,7 +2318,7 @@ int main(int argc, char *argv[]) { if (arg_boot_offset != 0 && sd_journal_has_runtime_files(j) > 0 && sd_journal_has_persistent_files(j) == 0) { - log_info("Specifying boot ID has no effect, no persistent journal was found"); + log_info("Specifying boot ID or boot offset has no effect, no persistent journal was found."); r = 0; goto finish; } diff --git a/src/machine/machinectl.c b/src/machine/machinectl.c index 3294ea7821..36c2607ba9 100644 --- a/src/machine/machinectl.c +++ b/src/machine/machinectl.c @@ -2769,7 +2769,7 @@ static int parse_argv(int argc, char *argv[]) { assert(argv); for (;;) { - static const char option_string[] = "-hp:als:H:M:qn:o:"; + static const char option_string[] = "-hp:als:H:M:qn:o:E:"; c = getopt_long(argc, argv, option_string + reorder, options, NULL); if (c < 0) diff --git a/src/nspawn/nspawn-stub-pid1.c b/src/nspawn/nspawn-stub-pid1.c index 2de87e3c63..38ab37367e 100644 --- a/src/nspawn/nspawn-stub-pid1.c +++ b/src/nspawn/nspawn-stub-pid1.c @@ -20,6 +20,7 @@ #include <sys/reboot.h> #include <sys/unistd.h> #include <sys/wait.h> +#include <sys/prctl.h> #include "fd-util.h" #include "log.h" @@ -29,7 +30,22 @@ #include "time-util.h" #include "def.h" -int stub_pid1(void) { +static int reset_environ(const char *new_environment, size_t length) { + unsigned long start, end; + + start = (unsigned long) new_environment; + end = start + length; + + if (prctl(PR_SET_MM, PR_SET_MM_ENV_START, start, 0, 0) < 0) + return -errno; + + if (prctl(PR_SET_MM, PR_SET_MM_ENV_END, end, 0, 0) < 0) + return -errno; + + return 0; +} + +int stub_pid1(sd_id128_t uuid) { enum { STATE_RUNNING, STATE_REBOOT, @@ -41,6 +57,11 @@ int stub_pid1(void) { pid_t pid; int r; + /* The new environment we set up, on the stack. */ + char new_environment[] = + "container=systemd-nspawn\0" + "container_uuid=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"; + /* Implements a stub PID 1, that reaps all processes and processes a couple of standard signals. This is useful * for allowing arbitrary processes run in a container, and still have all zombies reaped. */ @@ -64,6 +85,12 @@ int stub_pid1(void) { close_all_fds(NULL, 0); log_open(); + /* Flush out /proc/self/environ, so that we don't leak the environment from the host into the container. Also, + * set $container= and $container_uuid= so that clients in the container that query it from /proc/1/environ + * find them set set. */ + sd_id128_to_string(uuid, new_environment + sizeof(new_environment) - SD_ID128_STRING_MAX); + reset_environ(new_environment, sizeof(new_environment)); + rename_process("STUBINIT"); assert_se(sigemptyset(&waitmask) >= 0); diff --git a/src/nspawn/nspawn-stub-pid1.h b/src/nspawn/nspawn-stub-pid1.h index 36c1aaf5dd..7ca83078c0 100644 --- a/src/nspawn/nspawn-stub-pid1.h +++ b/src/nspawn/nspawn-stub-pid1.h @@ -19,4 +19,6 @@ along with systemd; If not, see <http://www.gnu.org/licenses/>. ***/ -int stub_pid1(void); +#include "sd-id128.h" + +int stub_pid1(sd_id128_t uuid); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 080bd7c31e..dcc639f15c 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -2278,7 +2278,7 @@ static int inner_child( return log_error_errno(errno, "Failed to change to specified working directory %s: %m", arg_chdir); if (arg_start_mode == START_PID2) { - r = stub_pid1(); + r = stub_pid1(arg_uuid); if (r < 0) return r; } diff --git a/src/sysv-generator/sysv-generator.c b/src/sysv-generator/sysv-generator.c index 921fd478d0..9fde9b1884 100644 --- a/src/sysv-generator/sysv-generator.c +++ b/src/sysv-generator/sysv-generator.c @@ -292,8 +292,10 @@ static int sysv_translate_facility(SysvStub *s, unsigned line, const char *name, if (!streq(table[i], n)) continue; - if (!table[i+1]) + if (!table[i+1]) { + *ret = NULL; return 0; + } m = strdup(table[i+1]); if (!m) @@ -312,7 +314,7 @@ static int sysv_translate_facility(SysvStub *s, unsigned line, const char *name, if (r < 0) return log_error_errno(r, "[%s:%u] Could not build name for facility %s: %m", s->path, line, name); - return r; + return 1; } /* Strip ".sh" suffix from file name for comparison */ @@ -324,8 +326,10 @@ static int sysv_translate_facility(SysvStub *s, unsigned line, const char *name, } /* Names equaling the file name of the services are redundant */ - if (streq_ptr(n, filename)) + if (streq_ptr(n, filename)) { + *ret = NULL; return 0; + } /* Everything else we assume to be normal service names */ m = sysv_translate_name(n); diff --git a/src/test/test-process-util.c b/src/test/test-process-util.c index 7242b2c8b5..c5edbcc5d2 100644 --- a/src/test/test-process-util.c +++ b/src/test/test-process-util.c @@ -355,10 +355,70 @@ static void test_get_process_cmdline_harder(void) { _exit(0); } +static void test_rename_process_one(const char *p, int ret) { + _cleanup_free_ char *comm = NULL, *cmdline = NULL; + pid_t pid; + int r; + + pid = fork(); + assert_se(pid >= 0); + + if (pid > 0) { + siginfo_t si; + + assert_se(wait_for_terminate(pid, &si) >= 0); + assert_se(si.si_code == CLD_EXITED); + assert_se(si.si_status == EXIT_SUCCESS); + + return; + } + + /* child */ + r = rename_process(p); + + assert_se(r == ret || + (ret == 0 && r >= 0) || + (ret > 0 && r > 0)); + + if (r < 0) + goto finish; + +#ifdef HAVE_VALGRIND_VALGRIND_H + /* see above, valgrind is weird, we can't verify what we are doing here */ + if (RUNNING_ON_VALGRIND) + goto finish; +#endif + + assert_se(get_process_comm(0, &comm) >= 0); + log_info("comm = <%s>", comm); + assert_se(strneq(comm, p, 15)); + + assert_se(get_process_cmdline(0, 0, false, &cmdline) >= 0); + log_info("cmdline = <%s>", cmdline); + assert_se(strneq(p, cmdline, strlen("test-process-util"))); + assert_se(startswith(p, cmdline)); + +finish: + _exit(EXIT_SUCCESS); +} + +static void test_rename_process(void) { + test_rename_process_one(NULL, -EINVAL); + test_rename_process_one("", -EINVAL); + test_rename_process_one("foo", 1); /* should always fit */ + test_rename_process_one("this is a really really long process name, followed by some more words", 0); /* unlikely to fit */ + test_rename_process_one("1234567", 1); /* should always fit */ +} + int main(int argc, char *argv[]) { + + log_set_max_level(LOG_DEBUG); log_parse_environment(); log_open(); + saved_argc = argc; + saved_argv = argv; + if (argc > 1) { pid_t pid = 0; @@ -373,6 +433,7 @@ int main(int argc, char *argv[]) { test_pid_is_alive(); test_personality(); test_get_process_cmdline_harder(); + test_rename_process(); return 0; } |