diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/basic/process-util.c | 102 | ||||
| -rw-r--r-- | src/basic/process-util.h | 2 | ||||
| -rw-r--r-- | src/basic/virt.c | 81 | ||||
| -rw-r--r-- | src/journal/journalctl.c | 2 | ||||
| -rw-r--r-- | src/machine/machinectl.c | 2 | ||||
| -rw-r--r-- | src/nspawn/nspawn-stub-pid1.c | 29 | ||||
| -rw-r--r-- | src/nspawn/nspawn-stub-pid1.h | 4 | ||||
| -rw-r--r-- | src/nspawn/nspawn.c | 2 | ||||
| -rw-r--r-- | src/sysv-generator/sysv-generator.c | 10 | ||||
| -rw-r--r-- | src/test/test-process-util.c | 61 | 
10 files changed, 236 insertions, 59 deletions
| diff --git a/src/basic/process-util.c b/src/basic/process-util.c index 1f4c2e4e43..d5e7edb589 100644 --- a/src/basic/process-util.c +++ b/src/basic/process-util.c @@ -27,6 +27,7 @@  #include <stdio.h>  #include <stdlib.h>  #include <string.h> +#include <sys/mman.h>  #include <sys/personality.h>  #include <sys/prctl.h>  #include <sys/types.h> @@ -274,27 +275,100 @@ int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char *          return 0;  } -void rename_process(const char name[8]) { -        assert(name); +int rename_process(const char name[]) { +        static size_t mm_size = 0; +        static char *mm = NULL; +        bool truncated = false; +        size_t l; + +        /* This is a like a poor man's setproctitle(). It changes the comm field, argv[0], and also the glibc's +         * internally used name of the process. For the first one a limit of 16 chars applies; to the second one in +         * many cases one of 10 (i.e. length of "/sbin/init") — however if we have CAP_SYS_RESOURCES it is unbounded; +         * to the third one 7 (i.e. the length of "systemd". If you pass a longer string it will likely be +         * truncated. +         * +         * Returns 0 if a name was set but truncated, > 0 if it was set but not truncated. */ + +        if (isempty(name)) +                return -EINVAL; /* let's not confuse users unnecessarily with an empty name */ -        /* This is a like a poor man's setproctitle(). It changes the -         * comm field, argv[0], and also the glibc's internally used -         * name of the process. For the first one a limit of 16 chars -         * applies, to the second one usually one of 10 (i.e. length -         * of "/sbin/init"), to the third one one of 7 (i.e. length of -         * "systemd"). If you pass a longer string it will be -         * truncated */ +        l = strlen(name); +        /* First step, change the comm field. */          (void) prctl(PR_SET_NAME, name); +        if (l > 15) /* Linux process names can be 15 chars at max */ +                truncated = true; + +        /* Second step, change glibc's ID of the process name. */ +        if (program_invocation_name) { +                size_t k; + +                k = strlen(program_invocation_name); +                strncpy(program_invocation_name, name, k); +                if (l > k) +                        truncated = true; +        } + +        /* Third step, completely replace the argv[] array the kernel maintains for us. This requires privileges, but +         * has the advantage that the argv[] array is exactly what we want it to be, and not filled up with zeros at +         * the end. This is the best option for changing /proc/self/cmdline.*/ +        if (mm_size < l+1) { +                size_t nn_size; +                char *nn; + +                /* Let's not bother with this if we don't have euid == 0. Strictly speaking if people do weird stuff +                 * with capabilities this could work even for euid != 0, but our own code generally doesn't do that, +                 * hence let's use this as quick bypass check, to avoid calling mmap() if PR_SET_MM_ARG_START fails +                 * with EPERM later on anyway. After all geteuid() is dead cheap to call, but mmap() is not. */ +                if (geteuid() != 0) { +                        log_debug("Skipping PR_SET_MM_ARG_START, as we don't have privileges."); +                        goto use_saved_argv; +                } + +                nn_size = PAGE_ALIGN(l+1); +                nn = mmap(NULL, nn_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); +                if (nn == MAP_FAILED) { +                        log_debug_errno(errno, "mmap() failed: %m"); +                        goto use_saved_argv; +                } + +                strncpy(nn, name, nn_size); + +                /* Now, let's tell the kernel about this new memory */ +                if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) { +                        log_debug_errno(errno, "PR_SET_MM_ARG_START failed, proceeding without: %m"); +                        (void) munmap(nn, nn_size); +                        goto use_saved_argv; +                } + +                /* And update the end pointer to the new end, too. If this fails, we don't really know what to do, it's +                 * pretty unlikely that we can rollback, hence we'll just accept the failure, and continue. */ +                if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0) +                        log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m"); -        if (program_invocation_name) -                strncpy(program_invocation_name, name, strlen(program_invocation_name)); +                if (mm) +                        (void) munmap(mm, mm_size); + +                mm = nn; +                mm_size = nn_size; +        } else +                strncpy(mm, name, mm_size); + +use_saved_argv: +        /* Fourth step: in all cases we'll also update the original argv[], so that our own code gets it right too if +         * it still looks here */          if (saved_argc > 0) {                  int i; -                if (saved_argv[0]) -                        strncpy(saved_argv[0], name, strlen(saved_argv[0])); +                if (saved_argv[0]) { +                        size_t k; + +                        k = strlen(saved_argv[0]); +                        strncpy(saved_argv[0], name, k); +                        if (l > k) +                                truncated = true; +                }                  for (i = 1; i < saved_argc; i++) {                          if (!saved_argv[i]) @@ -303,6 +377,8 @@ void rename_process(const char name[8]) {                          memzero(saved_argv[i], strlen(saved_argv[i]));                  }          } + +        return !truncated;  }  int is_kernel_thread(pid_t pid) { diff --git a/src/basic/process-util.h b/src/basic/process-util.h index 89dfeb4d6a..d378901399 100644 --- a/src/basic/process-util.h +++ b/src/basic/process-util.h @@ -64,7 +64,7 @@ void sigkill_waitp(pid_t *pid);  int kill_and_sigcont(pid_t pid, int sig); -void rename_process(const char name[8]); +int rename_process(const char name[]);  int is_kernel_thread(pid_t pid);  int getenv_for_pid(pid_t pid, const char *field, char **_value); diff --git a/src/basic/virt.c b/src/basic/virt.c index 9b7eb71319..33641e6886 100644 --- a/src/basic/virt.c +++ b/src/basic/virt.c @@ -409,8 +409,7 @@ int detect_container(void) {          if (cached_found >= 0)                  return cached_found; -        /* /proc/vz exists in container and outside of the container, -         * /proc/bc only outside of the container. */ +        /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */          if (access("/proc/vz", F_OK) >= 0 &&              access("/proc/bc", F_OK) < 0) {                  r = VIRTUALIZATION_OPENVZ; @@ -418,50 +417,58 @@ int detect_container(void) {          }          if (getpid() == 1) { -                /* If we are PID 1 we can just check our own -                 * environment variable */ +                /* If we are PID 1 we can just check our own environment variable, and that's authoritative. */                  e = getenv("container");                  if (isempty(e)) {                          r = VIRTUALIZATION_NONE;                          goto finish;                  } -        } else { - -                /* Otherwise, PID 1 dropped this information into a -                 * file in /run. This is better than accessing -                 * /proc/1/environ, since we don't need CAP_SYS_PTRACE -                 * for that. */ - -                r = read_one_line_file("/run/systemd/container", &m); -                if (r == -ENOENT) { - -                        /* Fallback for cases where PID 1 was not -                         * systemd (for example, cases where -                         * init=/bin/sh is used. */ - -                        r = getenv_for_pid(1, "container", &m); -                        if (r <= 0) { - -                                /* If that didn't work, give up, -                                 * assume no container manager. -                                 * -                                 * Note: This means we still cannot -                                 * detect containers if init=/bin/sh -                                 * is passed but privileges dropped, -                                 * as /proc/1/environ is only readable -                                 * with privileges. */ - -                                r = VIRTUALIZATION_NONE; -                                goto finish; -                        } -                } -                if (r < 0) -                        return r; +                goto translate_name; +        } + +        /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing +         * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */ +        r = read_one_line_file("/run/systemd/container", &m); +        if (r >= 0) { +                e = m; +                goto translate_name; +        } +        if (r != -ENOENT) +                return log_debug_errno(r, "Failed to read /run/systemd/container: %m"); + +        /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */ +        r = getenv_for_pid(1, "container", &m); +        if (r > 0) {                  e = m; +                goto translate_name;          } +        if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */ +                log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m"); + +        /* Interestingly /proc/1/sched actually shows the host's PID for what we see as PID 1. Hence, if the PID shown +         * there is not 1, we know we are in a PID namespace. and hence a container. */ +        r = read_one_line_file("/proc/1/sched", &m); +        if (r >= 0) { +                const char *t; + +                t = strrchr(m, '('); +                if (!t) +                        return -EIO; + +                if (!startswith(t, "(1,")) { +                        r = VIRTUALIZATION_CONTAINER_OTHER; +                        goto finish; +                } +        } else if (r != -ENOENT) +                return r; + +        /* If that didn't work, give up, assume no container manager. */ +        r = VIRTUALIZATION_NONE; +        goto finish; +translate_name:          for (j = 0; j < ELEMENTSOF(value_table); j++)                  if (streq(e, value_table[j].value)) {                          r = value_table[j].id; @@ -471,7 +478,7 @@ int detect_container(void) {          r = VIRTUALIZATION_CONTAINER_OTHER;  finish: -        log_debug("Found container virtualization %s", virtualization_to_string(r)); +        log_debug("Found container virtualization %s.", virtualization_to_string(r));          cached_found = r;          return r;  } diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c index 10d3ff3b45..ecd1e94a33 100644 --- a/src/journal/journalctl.c +++ b/src/journal/journalctl.c @@ -2318,7 +2318,7 @@ int main(int argc, char *argv[]) {          if (arg_boot_offset != 0 &&              sd_journal_has_runtime_files(j) > 0 &&              sd_journal_has_persistent_files(j) == 0) { -                log_info("Specifying boot ID has no effect, no persistent journal was found"); +                log_info("Specifying boot ID or boot offset has no effect, no persistent journal was found.");                  r = 0;                  goto finish;          } diff --git a/src/machine/machinectl.c b/src/machine/machinectl.c index 3294ea7821..36c2607ba9 100644 --- a/src/machine/machinectl.c +++ b/src/machine/machinectl.c @@ -2769,7 +2769,7 @@ static int parse_argv(int argc, char *argv[]) {          assert(argv);          for (;;) { -                static const char option_string[] = "-hp:als:H:M:qn:o:"; +                static const char option_string[] = "-hp:als:H:M:qn:o:E:";                  c = getopt_long(argc, argv, option_string + reorder, options, NULL);                  if (c < 0) diff --git a/src/nspawn/nspawn-stub-pid1.c b/src/nspawn/nspawn-stub-pid1.c index 2de87e3c63..38ab37367e 100644 --- a/src/nspawn/nspawn-stub-pid1.c +++ b/src/nspawn/nspawn-stub-pid1.c @@ -20,6 +20,7 @@  #include <sys/reboot.h>  #include <sys/unistd.h>  #include <sys/wait.h> +#include <sys/prctl.h>  #include "fd-util.h"  #include "log.h" @@ -29,7 +30,22 @@  #include "time-util.h"  #include "def.h" -int stub_pid1(void) { +static int reset_environ(const char *new_environment, size_t length) { +        unsigned long start, end; + +        start = (unsigned long) new_environment; +        end = start + length; + +        if (prctl(PR_SET_MM, PR_SET_MM_ENV_START, start, 0, 0) < 0) +                return -errno; + +        if (prctl(PR_SET_MM, PR_SET_MM_ENV_END, end, 0, 0) < 0) +                return -errno; + +        return 0; +} + +int stub_pid1(sd_id128_t uuid) {          enum {                  STATE_RUNNING,                  STATE_REBOOT, @@ -41,6 +57,11 @@ int stub_pid1(void) {          pid_t pid;          int r; +        /* The new environment we set up, on the stack. */ +        char new_environment[] = +                "container=systemd-nspawn\0" +                "container_uuid=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"; +          /* Implements a stub PID 1, that reaps all processes and processes a couple of standard signals. This is useful           * for allowing arbitrary processes run in a container, and still have all zombies reaped. */ @@ -64,6 +85,12 @@ int stub_pid1(void) {          close_all_fds(NULL, 0);          log_open(); +        /* Flush out /proc/self/environ, so that we don't leak the environment from the host into the container. Also, +         * set $container= and $container_uuid= so that clients in the container that query it from /proc/1/environ +         * find them set set. */ +        sd_id128_to_string(uuid, new_environment + sizeof(new_environment) - SD_ID128_STRING_MAX); +        reset_environ(new_environment, sizeof(new_environment)); +          rename_process("STUBINIT");          assert_se(sigemptyset(&waitmask) >= 0); diff --git a/src/nspawn/nspawn-stub-pid1.h b/src/nspawn/nspawn-stub-pid1.h index 36c1aaf5dd..7ca83078c0 100644 --- a/src/nspawn/nspawn-stub-pid1.h +++ b/src/nspawn/nspawn-stub-pid1.h @@ -19,4 +19,6 @@    along with systemd; If not, see <http://www.gnu.org/licenses/>.  ***/ -int stub_pid1(void); +#include "sd-id128.h" + +int stub_pid1(sd_id128_t uuid); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 080bd7c31e..dcc639f15c 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -2278,7 +2278,7 @@ static int inner_child(                          return log_error_errno(errno, "Failed to change to specified working directory %s: %m", arg_chdir);          if (arg_start_mode == START_PID2) { -                r = stub_pid1(); +                r = stub_pid1(arg_uuid);                  if (r < 0)                          return r;          } diff --git a/src/sysv-generator/sysv-generator.c b/src/sysv-generator/sysv-generator.c index 921fd478d0..9fde9b1884 100644 --- a/src/sysv-generator/sysv-generator.c +++ b/src/sysv-generator/sysv-generator.c @@ -292,8 +292,10 @@ static int sysv_translate_facility(SysvStub *s, unsigned line, const char *name,                  if (!streq(table[i], n))                          continue; -                if (!table[i+1]) +                if (!table[i+1]) { +                        *ret = NULL;                          return 0; +                }                  m = strdup(table[i+1]);                  if (!m) @@ -312,7 +314,7 @@ static int sysv_translate_facility(SysvStub *s, unsigned line, const char *name,                  if (r < 0)                          return log_error_errno(r, "[%s:%u] Could not build name for facility %s: %m", s->path, line, name); -                return r; +                return 1;          }          /* Strip ".sh" suffix from file name for comparison */ @@ -324,8 +326,10 @@ static int sysv_translate_facility(SysvStub *s, unsigned line, const char *name,          }          /* Names equaling the file name of the services are redundant */ -        if (streq_ptr(n, filename)) +        if (streq_ptr(n, filename)) { +                *ret = NULL;                  return 0; +        }          /* Everything else we assume to be normal service names */          m = sysv_translate_name(n); diff --git a/src/test/test-process-util.c b/src/test/test-process-util.c index 7242b2c8b5..c5edbcc5d2 100644 --- a/src/test/test-process-util.c +++ b/src/test/test-process-util.c @@ -355,10 +355,70 @@ static void test_get_process_cmdline_harder(void) {          _exit(0);  } +static void test_rename_process_one(const char *p, int ret) { +        _cleanup_free_ char *comm = NULL, *cmdline = NULL; +        pid_t pid; +        int r; + +        pid = fork(); +        assert_se(pid >= 0); + +        if (pid > 0) { +                siginfo_t si; + +                assert_se(wait_for_terminate(pid, &si) >= 0); +                assert_se(si.si_code == CLD_EXITED); +                assert_se(si.si_status == EXIT_SUCCESS); + +                return; +        } + +        /* child */ +        r = rename_process(p); + +        assert_se(r == ret || +                  (ret == 0 && r >= 0) || +                  (ret > 0 && r > 0)); + +        if (r < 0) +                goto finish; + +#ifdef HAVE_VALGRIND_VALGRIND_H +        /* see above, valgrind is weird, we can't verify what we are doing here */ +        if (RUNNING_ON_VALGRIND) +                goto finish; +#endif + +        assert_se(get_process_comm(0, &comm) >= 0); +        log_info("comm = <%s>", comm); +        assert_se(strneq(comm, p, 15)); + +        assert_se(get_process_cmdline(0, 0, false, &cmdline) >= 0); +        log_info("cmdline = <%s>", cmdline); +        assert_se(strneq(p, cmdline, strlen("test-process-util"))); +        assert_se(startswith(p, cmdline)); + +finish: +        _exit(EXIT_SUCCESS); +} + +static void test_rename_process(void) { +        test_rename_process_one(NULL, -EINVAL); +        test_rename_process_one("", -EINVAL); +        test_rename_process_one("foo", 1); /* should always fit */ +        test_rename_process_one("this is a really really long process name, followed by some more words", 0); /* unlikely to fit */ +        test_rename_process_one("1234567", 1); /* should always fit */ +} +  int main(int argc, char *argv[]) { + +        log_set_max_level(LOG_DEBUG);          log_parse_environment();          log_open(); +        saved_argc = argc; +        saved_argv = argv; +          if (argc > 1) {                  pid_t pid = 0; @@ -373,6 +433,7 @@ int main(int argc, char *argv[]) {          test_pid_is_alive();          test_personality();          test_get_process_cmdline_harder(); +        test_rename_process();          return 0;  } | 
