/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ /*** This file is part of systemd. Copyright 2010 Lennart Poettering systemd is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. systemd is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with systemd; If not, see . ***/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* When we include libgen.h because we need dirname() we immediately * undefine basename() since libgen.h defines it as a macro to the * POSIX version which is really broken. We prefer GNU basename(). */ #include #undef basename #ifdef HAVE_SYS_AUXV_H #include #endif /* We include linux/fs.h as last of the system headers, as it * otherwise conflicts with sys/mount.h. Yay, Linux is great! */ #include #include "alloc-util.h" #include "build.h" #include "def.h" #include "device-nodes.h" #include "env-util.h" #include "escape.h" #include "exit-status.h" #include "fd-util.h" #include "fileio.h" #include "formats-util.h" #include "gunicode.h" #include "hashmap.h" #include "hostname-util.h" #include "ioprio.h" #include "log.h" #include "macro.h" #include "missing.h" #include "mkdir.h" #include "hexdecoct.h" #include "parse-util.h" #include "path-util.h" #include "process-util.h" #include "random-util.h" #include "signal-util.h" #include "sparse-endian.h" #include "string-table.h" #include "string-util.h" #include "strv.h" #include "terminal-util.h" #include "user-util.h" #include "utf8.h" #include "util.h" #include "virt.h" #include "dirent-util.h" #include "stat-util.h" /* Put this test here for a lack of better place */ assert_cc(EAGAIN == EWOULDBLOCK); int saved_argc = 0; char **saved_argv = NULL; size_t page_size(void) { static thread_local size_t pgsz = 0; long r; if (_likely_(pgsz > 0)) return pgsz; r = sysconf(_SC_PAGESIZE); assert(r > 0); pgsz = (size_t) r; return pgsz; } void rename_process(const char name[8]) { assert(name); /* This is a like a poor man's setproctitle(). It changes the * comm field, argv[0], and also the glibc's internally used * name of the process. For the first one a limit of 16 chars * applies, to the second one usually one of 10 (i.e. length * of "/sbin/init"), to the third one one of 7 (i.e. length of * "systemd"). If you pass a longer string it will be * truncated */ prctl(PR_SET_NAME, name); if (program_invocation_name) strncpy(program_invocation_name, name, strlen(program_invocation_name)); if (saved_argc > 0) { int i; if (saved_argv[0]) strncpy(saved_argv[0], name, strlen(saved_argv[0])); for (i = 1; i < saved_argc; i++) { if (!saved_argv[i]) break; memzero(saved_argv[i], strlen(saved_argv[i])); } } } noreturn void freeze(void) { /* Make sure nobody waits for us on a socket anymore */ close_all_fds(NULL, 0); sync(); for (;;) pause(); } static int do_execute(char **directories, usec_t timeout, char *argv[]) { _cleanup_hashmap_free_free_ Hashmap *pids = NULL; _cleanup_set_free_free_ Set *seen = NULL; char **directory; /* We fork this all off from a child process so that we can * somewhat cleanly make use of SIGALRM to set a time limit */ (void) reset_all_signal_handlers(); (void) reset_signal_mask(); assert_se(prctl(PR_SET_PDEATHSIG, SIGTERM) == 0); pids = hashmap_new(NULL); if (!pids) return log_oom(); seen = set_new(&string_hash_ops); if (!seen) return log_oom(); STRV_FOREACH(directory, directories) { _cleanup_closedir_ DIR *d; struct dirent *de; d = opendir(*directory); if (!d) { if (errno == ENOENT) continue; return log_error_errno(errno, "Failed to open directory %s: %m", *directory); } FOREACH_DIRENT(de, d, break) { _cleanup_free_ char *path = NULL; pid_t pid; int r; if (!dirent_is_file(de)) continue; if (set_contains(seen, de->d_name)) { log_debug("%1$s/%2$s skipped (%2$s was already seen).", *directory, de->d_name); continue; } r = set_put_strdup(seen, de->d_name); if (r < 0) return log_oom(); path = strjoin(*directory, "/", de->d_name, NULL); if (!path) return log_oom(); if (null_or_empty_path(path)) { log_debug("%s is empty (a mask).", path); continue; } pid = fork(); if (pid < 0) { log_error_errno(errno, "Failed to fork: %m"); continue; } else if (pid == 0) { char *_argv[2]; assert_se(prctl(PR_SET_PDEATHSIG, SIGTERM) == 0); if (!argv) { _argv[0] = path; _argv[1] = NULL; argv = _argv; } else argv[0] = path; execv(path, argv); return log_error_errno(errno, "Failed to execute %s: %m", path); } log_debug("Spawned %s as " PID_FMT ".", path, pid); r = hashmap_put(pids, UINT_TO_PTR(pid), path); if (r < 0) return log_oom(); path = NULL; } } /* Abort execution of this process after the timout. We simply * rely on SIGALRM as default action terminating the process, * and turn on alarm(). */ if (timeout != USEC_INFINITY) alarm((timeout + USEC_PER_SEC - 1) / USEC_PER_SEC); while (!hashmap_isempty(pids)) { _cleanup_free_ char *path = NULL; pid_t pid; pid = PTR_TO_UINT(hashmap_first_key(pids)); assert(pid > 0); path = hashmap_remove(pids, UINT_TO_PTR(pid)); assert(path); wait_for_terminate_and_warn(path, pid, true); } return 0; } void execute_directories(const char* const* directories, usec_t timeout, char *argv[]) { pid_t executor_pid; int r; char *name; char **dirs = (char**) directories; assert(!strv_isempty(dirs)); name = basename(dirs[0]); assert(!isempty(name)); /* Executes all binaries in the directories in parallel and waits * for them to finish. Optionally a timeout is applied. If a file * with the same name exists in more than one directory, the * earliest one wins. */ executor_pid = fork(); if (executor_pid < 0) { log_error_errno(errno, "Failed to fork: %m"); return; } else if (executor_pid == 0) { r = do_execute(dirs, timeout, argv); _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS); } wait_for_terminate_and_warn(name, executor_pid, true); } bool plymouth_running(void) { return access("/run/plymouth/pid", F_OK) >= 0; } bool display_is_local(const char *display) { assert(display); return display[0] == ':' && display[1] >= '0' && display[1] <= '9'; } int socket_from_display(const char *display, char **path) { size_t k; char *f, *c; assert(display); assert(path); if (!display_is_local(display)) return -EINVAL; k = strspn(display+1, "0123456789"); f = new(char, strlen("/tmp/.X11-unix/X") + k + 1); if (!f) return -ENOMEM; c = stpcpy(f, "/tmp/.X11-unix/X"); memcpy(c, display+1, k); c[k] = 0; *path = f; return 0; } int block_get_whole_disk(dev_t d, dev_t *ret) { char *p, *s; int r; unsigned n, m; assert(ret); /* If it has a queue this is good enough for us */ if (asprintf(&p, "/sys/dev/block/%u:%u/queue", major(d), minor(d)) < 0) return -ENOMEM; r = access(p, F_OK); free(p); if (r >= 0) { *ret = d; return 0; } /* If it is a partition find the originating device */ if (asprintf(&p, "/sys/dev/block/%u:%u/partition", major(d), minor(d)) < 0) return -ENOMEM; r = access(p, F_OK); free(p); if (r < 0) return -ENOENT; /* Get parent dev_t */ if (asprintf(&p, "/sys/dev/block/%u:%u/../dev", major(d), minor(d)) < 0) return -ENOMEM; r = read_one_line_file(p, &s); free(p); if (r < 0) return r; r = sscanf(s, "%u:%u", &m, &n); free(s); if (r != 2) return -EINVAL; /* Only return this if it is really good enough for us. */ if (asprintf(&p, "/sys/dev/block/%u:%u/queue", m, n) < 0) return -ENOMEM; r = access(p, F_OK); free(p); if (r >= 0) { *ret = makedev(m, n); return 0; } return -ENOENT; } static const char *const ioprio_class_table[] = { [IOPRIO_CLASS_NONE] = "none", [IOPRIO_CLASS_RT] = "realtime", [IOPRIO_CLASS_BE] = "best-effort", [IOPRIO_CLASS_IDLE] = "idle" }; DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ioprio_class, int, INT_MAX); static const char *const sigchld_code_table[] = { [CLD_EXITED] = "exited", [CLD_KILLED] = "killed", [CLD_DUMPED] = "dumped", [CLD_TRAPPED] = "trapped", [CLD_STOPPED] = "stopped", [CLD_CONTINUED] = "continued", }; DEFINE_STRING_TABLE_LOOKUP(sigchld_code, int); static const char* const sched_policy_table[] = { [SCHED_OTHER] = "other", [SCHED_BATCH] = "batch", [SCHED_IDLE] = "idle", [SCHED_FIFO] = "fifo", [SCHED_RR] = "rr" }; DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy, int, INT_MAX); bool kexec_loaded(void) { bool loaded = false; char *s; if (read_one_line_file("/sys/kernel/kexec_loaded", &s) >= 0) { if (s[0] == '1') loaded = true; free(s); } return loaded; } int prot_from_flags(int flags) { switch (flags & O_ACCMODE) { case O_RDONLY: return PROT_READ; case O_WRONLY: return PROT_WRITE; case O_RDWR: return PROT_READ|PROT_WRITE; default: return -EINVAL; } } int fork_agent(pid_t *pid, const int except[], unsigned n_except, const char *path, ...) { bool stdout_is_tty, stderr_is_tty; pid_t parent_pid, agent_pid; sigset_t ss, saved_ss; unsigned n, i; va_list ap; char **l; assert(pid); assert(path); /* Spawns a temporary TTY agent, making sure it goes away when * we go away */ parent_pid = getpid(); /* First we temporarily block all signals, so that the new * child has them blocked initially. This way, we can be sure * that SIGTERMs are not lost we might send to the agent. */ assert_se(sigfillset(&ss) >= 0); assert_se(sigprocmask(SIG_SETMASK, &ss, &saved_ss) >= 0); agent_pid = fork(); if (agent_pid < 0) { assert_se(sigprocmask(SIG_SETMASK, &saved_ss, NULL) >= 0); return -errno; } if (agent_pid != 0) { assert_se(sigprocmask(SIG_SETMASK, &saved_ss, NULL) >= 0); *pid = agent_pid; return 0; } /* In the child: * * Make sure the agent goes away when the parent dies */ if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0) _exit(EXIT_FAILURE); /* Make sure we actually can kill the agent, if we need to, in * case somebody invoked us from a shell script that trapped * SIGTERM or so... */ (void) reset_all_signal_handlers(); (void) reset_signal_mask(); /* Check whether our parent died before we were able * to set the death signal and unblock the signals */ if (getppid() != parent_pid) _exit(EXIT_SUCCESS); /* Don't leak fds to the agent */ close_all_fds(except, n_except); stdout_is_tty = isatty(STDOUT_FILENO); stderr_is_tty = isatty(STDERR_FILENO); if (!stdout_is_tty || !stderr_is_tty) { int fd; /* Detach from stdout/stderr. and reopen * /dev/tty for them. This is important to * ensure that when systemctl is started via * popen() or a similar call that expects to * read EOF we actually do generate EOF and * not delay this indefinitely by because we * keep an unused copy of stdin around. */ fd = open("/dev/tty", O_WRONLY); if (fd < 0) { log_error_errno(errno, "Failed to open /dev/tty: %m"); _exit(EXIT_FAILURE); } if (!stdout_is_tty) dup2(fd, STDOUT_FILENO); if (!stderr_is_tty) dup2(fd, STDERR_FILENO); if (fd > 2) close(fd); } /* Count arguments */ va_start(ap, path); for (n = 0; va_arg(ap, char*); n++) ; va_end(ap); /* Allocate strv */ l = alloca(sizeof(char *) * (n + 1)); /* Fill in arguments */ va_start(ap, path); for (i = 0; i <= n; i++) l[i] = va_arg(ap, char*); va_end(ap); execv(path, l); _exit(EXIT_FAILURE); } bool in_initrd(void) { static int saved = -1; struct statfs s; if (saved >= 0) return saved; /* We make two checks here: * * 1. the flag file /etc/initrd-release must exist * 2. the root file system must be a memory file system * * The second check is extra paranoia, since misdetecting an * initrd can have bad bad consequences due the initrd * emptying when transititioning to the main systemd. */ saved = access("/etc/initrd-release", F_OK) >= 0 && statfs("/", &s) >= 0 && is_temporary_fs(&s); return saved; } /* hey glibc, APIs with callbacks without a user pointer are so useless */ void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size, int (*compar) (const void *, const void *, void *), void *arg) { size_t l, u, idx; const void *p; int comparison; l = 0; u = nmemb; while (l < u) { idx = (l + u) / 2; p = (void *)(((const char *) base) + (idx * size)); comparison = compar(key, p, arg); if (comparison < 0) u = idx; else if (comparison > 0) l = idx + 1; else return (void *)p; } return NULL; } int on_ac_power(void) { bool found_offline = false, found_online = false; _cleanup_closedir_ DIR *d = NULL; d = opendir("/sys/class/power_supply"); if (!d) return errno == ENOENT ? true : -errno; for (;;) { struct dirent *de; _cleanup_close_ int fd = -1, device = -1; char contents[6]; ssize_t n; errno = 0; de = readdir(d); if (!de && errno != 0) return -errno; if (!de) break; if (hidden_file(de->d_name)) continue; device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY); if (device < 0) { if (errno == ENOENT || errno == ENOTDIR) continue; return -errno; } fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY); if (fd < 0) { if (errno == ENOENT) continue; return -errno; } n = read(fd, contents, sizeof(contents)); if (n < 0) return -errno; if (n != 6 || memcmp(contents, "Mains\n", 6)) continue; safe_close(fd); fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY); if (fd < 0) { if (errno == ENOENT) continue; return -errno; } n = read(fd, contents, sizeof(contents)); if (n < 0) return -errno; if (n != 2 || contents[1] != '\n') return -EIO; if (contents[0] == '1') { found_online = true; break; } else if (contents[0] == '0') found_offline = true; else return -EIO; } return found_online || !found_offline; } bool id128_is_valid(const char *s) { size_t i, l; l = strlen(s); if (l == 32) { /* Simple formatted 128bit hex string */ for (i = 0; i < l; i++) { char c = s[i]; if (!(c >= '0' && c <= '9') && !(c >= 'a' && c <= 'z') && !(c >= 'A' && c <= 'Z')) return false; } } else if (l == 36) { /* Formatted UUID */ for (i = 0; i < l; i++) { char c = s[i]; if ((i == 8 || i == 13 || i == 18 || i == 23)) { if (c != '-') return false; } else { if (!(c >= '0' && c <= '9') && !(c >= 'a' && c <= 'z') && !(c >= 'A' && c <= 'Z')) return false; } } } else return false; return true; } int container_get_leader(const char *machine, pid_t *pid) { _cleanup_free_ char *s = NULL, *class = NULL; const char *p; pid_t leader; int r; assert(machine); assert(pid); if (!machine_name_is_valid(machine)) return -EINVAL; p = strjoina("/run/systemd/machines/", machine); r = parse_env_file(p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL); if (r == -ENOENT) return -EHOSTDOWN; if (r < 0) return r; if (!s) return -EIO; if (!streq_ptr(class, "container")) return -EIO; r = parse_pid(s, &leader); if (r < 0) return r; if (leader <= 1) return -EIO; *pid = leader; return 0; } int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) { _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1; int rfd = -1; assert(pid >= 0); if (mntns_fd) { const char *mntns; mntns = procfs_file_alloca(pid, "ns/mnt"); mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC); if (mntnsfd < 0) return -errno; } if (pidns_fd) { const char *pidns; pidns = procfs_file_alloca(pid, "ns/pid"); pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC); if (pidnsfd < 0) return -errno; } if (netns_fd) { const char *netns; netns = procfs_file_alloca(pid, "ns/net"); netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC); if (netnsfd < 0) return -errno; } if (userns_fd) { const char *userns; userns = procfs_file_alloca(pid, "ns/user"); usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC); if (usernsfd < 0 && errno != ENOENT) return -errno; } if (root_fd) { const char *root; root = procfs_file_alloca(pid, "root"); rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY); if (rfd < 0) return -errno; } if (pidns_fd) *pidns_fd = pidnsfd; if (mntns_fd) *mntns_fd = mntnsfd; if (netns_fd) *netns_fd = netnsfd; if (userns_fd) *userns_fd = usernsfd; if (root_fd) *root_fd = rfd; pidnsfd = mntnsfd = netnsfd = usernsfd = -1; return 0; } int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) { if (userns_fd >= 0) { /* Can't setns to your own userns, since then you could * escalate from non-root to root in your own namespace, so * check if namespaces equal before attempting to enter. */ _cleanup_free_ char *userns_fd_path = NULL; int r; if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0) return -ENOMEM; r = files_same(userns_fd_path, "/proc/self/ns/user"); if (r < 0) return r; if (r) userns_fd = -1; } if (pidns_fd >= 0) if (setns(pidns_fd, CLONE_NEWPID) < 0) return -errno; if (mntns_fd >= 0) if (setns(mntns_fd, CLONE_NEWNS) < 0) return -errno; if (netns_fd >= 0) if (setns(netns_fd, CLONE_NEWNET) < 0) return -errno; if (userns_fd >= 0) if (setns(userns_fd, CLONE_NEWUSER) < 0) return -errno; if (root_fd >= 0) { if (fchdir(root_fd) < 0) return -errno; if (chroot(".") < 0) return -errno; } return reset_uid_gid(); } unsigned long personality_from_string(const char *p) { /* Parse a personality specifier. We introduce our own * identifiers that indicate specific ABIs, rather than just * hints regarding the register size, since we want to keep * things open for multiple locally supported ABIs for the * same register size. We try to reuse the ABI identifiers * used by libseccomp. */ #if defined(__x86_64__) if (streq(p, "x86")) return PER_LINUX32; if (streq(p, "x86-64")) return PER_LINUX; #elif defined(__i386__) if (streq(p, "x86")) return PER_LINUX; #elif defined(__s390x__) if (streq(p, "s390")) return PER_LINUX32; if (streq(p, "s390x")) return PER_LINUX; #elif defined(__s390__) if (streq(p, "s390")) return PER_LINUX; #endif return PERSONALITY_INVALID; } const char* personality_to_string(unsigned long p) { #if defined(__x86_64__) if (p == PER_LINUX32) return "x86"; if (p == PER_LINUX) return "x86-64"; #elif defined(__i386__) if (p == PER_LINUX) return "x86"; #elif defined(__s390x__) if (p == PER_LINUX) return "s390x"; if (p == PER_LINUX32) return "s390"; #elif defined(__s390__) if (p == PER_LINUX) return "s390"; #endif return NULL; } uint64_t physical_memory(void) { long mem; /* We return this as uint64_t in case we are running as 32bit * process on a 64bit kernel with huge amounts of memory */ mem = sysconf(_SC_PHYS_PAGES); assert(mem > 0); return (uint64_t) mem * (uint64_t) page_size(); } int update_reboot_param_file(const char *param) { int r = 0; if (param) { r = write_string_file(REBOOT_PARAM_FILE, param, WRITE_STRING_FILE_CREATE); if (r < 0) return log_error_errno(r, "Failed to write reboot param to "REBOOT_PARAM_FILE": %m"); } else (void) unlink(REBOOT_PARAM_FILE); return 0; } int version(void) { puts(PACKAGE_STRING "\n" SYSTEMD_FEATURES); return 0; } bool fdname_is_valid(const char *s) { const char *p; /* Validates a name for $LISTEN_FDNAMES. We basically allow * everything ASCII that's not a control character. Also, as * special exception the ":" character is not allowed, as we * use that as field separator in $LISTEN_FDNAMES. * * Note that the empty string is explicitly allowed * here. However, we limit the length of the names to 255 * characters. */ if (!s) return false; for (p = s; *p; p++) { if (*p < ' ') return false; if (*p >= 127) return false; if (*p == ':') return false; } return p - s < 256; } bool oom_score_adjust_is_valid(int oa) { return oa >= OOM_SCORE_ADJ_MIN && oa <= OOM_SCORE_ADJ_MAX; }