diff options
Diffstat (limited to 'src/cgroup-util.c')
-rw-r--r-- | src/cgroup-util.c | 1111 |
1 files changed, 1111 insertions, 0 deletions
diff --git a/src/cgroup-util.c b/src/cgroup-util.c new file mode 100644 index 0000000000..904d300952 --- /dev/null +++ b/src/cgroup-util.c @@ -0,0 +1,1111 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2010 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <errno.h> +#include <unistd.h> +#include <signal.h> +#include <string.h> +#include <stdlib.h> +#include <dirent.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <ftw.h> + +#include "cgroup-util.h" +#include "log.h" +#include "set.h" +#include "macro.h" +#include "util.h" + +int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) { + char *fs; + int r; + FILE *f; + + assert(controller); + assert(path); + assert(_f); + + if ((r = cg_get_path(controller, path, "cgroup.procs", &fs)) < 0) + return r; + + f = fopen(fs, "re"); + free(fs); + + if (!f) + return -errno; + + *_f = f; + return 0; +} + +int cg_enumerate_tasks(const char *controller, const char *path, FILE **_f) { + char *fs; + int r; + FILE *f; + + assert(controller); + assert(path); + assert(_f); + + if ((r = cg_get_path(controller, path, "tasks", &fs)) < 0) + return r; + + f = fopen(fs, "re"); + free(fs); + + if (!f) + return -errno; + + *_f = f; + return 0; +} + +int cg_read_pid(FILE *f, pid_t *_pid) { + unsigned long ul; + + /* Note that the cgroup.procs might contain duplicates! See + * cgroups.txt for details. */ + + errno = 0; + if (fscanf(f, "%lu", &ul) != 1) { + + if (feof(f)) + return 0; + + return errno ? -errno : -EIO; + } + + if (ul <= 0) + return -EIO; + + *_pid = (pid_t) ul; + return 1; +} + +int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) { + char *fs; + int r; + DIR *d; + + assert(controller); + assert(path); + assert(_d); + + /* This is not recursive! */ + + if ((r = cg_get_path(controller, path, NULL, &fs)) < 0) + return r; + + d = opendir(fs); + free(fs); + + if (!d) + return -errno; + + *_d = d; + return 0; +} + +int cg_read_subgroup(DIR *d, char **fn) { + struct dirent *de; + + assert(d); + + errno = 0; + while ((de = readdir(d))) { + char *b; + + if (de->d_type != DT_DIR) + continue; + + if (streq(de->d_name, ".") || + streq(de->d_name, "..")) + continue; + + if (!(b = strdup(de->d_name))) + return -ENOMEM; + + *fn = b; + return 1; + } + + if (errno) + return -errno; + + return 0; +} + +int cg_rmdir(const char *controller, const char *path, bool honour_sticky) { + char *p; + int r; + + r = cg_get_path(controller, path, NULL, &p); + if (r < 0) + return r; + + if (honour_sticky) { + char *tasks; + + /* If the sticky bit is set don't remove the directory */ + + tasks = strappend(p, "/tasks"); + if (!tasks) { + free(p); + return -ENOMEM; + } + + r = file_is_priv_sticky(tasks); + free(tasks); + + if (r > 0) { + free(p); + return 0; + } + } + + r = rmdir(p); + free(p); + + return (r < 0 && errno != ENOENT) ? -errno : 0; +} + +int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) { + bool done = false; + int r, ret = 0; + pid_t my_pid; + FILE *f = NULL; + Set *allocated_set = NULL; + + assert(controller); + assert(path); + assert(sig >= 0); + + /* This goes through the tasks list and kills them all. This + * is repeated until no further processes are added to the + * tasks list, to properly handle forking processes */ + + if (!s) + if (!(s = allocated_set = set_new(trivial_hash_func, trivial_compare_func))) + return -ENOMEM; + + my_pid = getpid(); + + do { + pid_t pid = 0; + done = true; + + if ((r = cg_enumerate_processes(controller, path, &f)) < 0) { + if (ret >= 0 && r != -ENOENT) + ret = r; + + goto finish; + } + + while ((r = cg_read_pid(f, &pid)) > 0) { + + if (pid == my_pid && ignore_self) + continue; + + if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid)) + continue; + + /* If we haven't killed this process yet, kill + * it */ + if (kill(pid, sig) < 0) { + if (ret >= 0 && errno != ESRCH) + ret = -errno; + } else if (ret == 0) { + + if (sigcont) + kill(pid, SIGCONT); + + ret = 1; + } + + done = false; + + if ((r = set_put(s, LONG_TO_PTR(pid))) < 0) { + if (ret >= 0) + ret = r; + + goto finish; + } + } + + if (r < 0) { + if (ret >= 0) + ret = r; + + goto finish; + } + + fclose(f); + f = NULL; + + /* To avoid racing against processes which fork + * quicker than we can kill them we repeat this until + * no new pids need to be killed. */ + + } while (!done); + +finish: + if (allocated_set) + set_free(allocated_set); + + if (f) + fclose(f); + + return ret; +} + +int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) { + int r, ret = 0; + DIR *d = NULL; + char *fn; + Set *allocated_set = NULL; + + assert(path); + assert(controller); + assert(sig >= 0); + + if (!s) + if (!(s = allocated_set = set_new(trivial_hash_func, trivial_compare_func))) + return -ENOMEM; + + ret = cg_kill(controller, path, sig, sigcont, ignore_self, s); + + if ((r = cg_enumerate_subgroups(controller, path, &d)) < 0) { + if (ret >= 0 && r != -ENOENT) + ret = r; + + goto finish; + } + + while ((r = cg_read_subgroup(d, &fn)) > 0) { + char *p = NULL; + + r = asprintf(&p, "%s/%s", path, fn); + free(fn); + + if (r < 0) { + if (ret >= 0) + ret = -ENOMEM; + + goto finish; + } + + r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s); + free(p); + + if (r != 0 && ret >= 0) + ret = r; + } + + if (r < 0 && ret >= 0) + ret = r; + + if (rem) + if ((r = cg_rmdir(controller, path, true)) < 0) { + if (ret >= 0 && + r != -ENOENT && + r != -EBUSY) + ret = r; + } + +finish: + if (d) + closedir(d); + + if (allocated_set) + set_free(allocated_set); + + return ret; +} + +int cg_kill_recursive_and_wait(const char *controller, const char *path, bool rem) { + unsigned i; + + assert(path); + assert(controller); + + /* This safely kills all processes; first it sends a SIGTERM, + * then checks 8 times after 200ms whether the group is now + * empty, then kills everything that is left with SIGKILL and + * finally checks 5 times after 200ms each whether the group + * is finally empty. */ + + for (i = 0; i < 15; i++) { + int sig, r; + + if (i <= 0) + sig = SIGTERM; + else if (i == 9) + sig = SIGKILL; + else + sig = 0; + + if ((r = cg_kill_recursive(controller, path, sig, true, true, rem, NULL)) <= 0) + return r; + + usleep(200 * USEC_PER_MSEC); + } + + return 0; +} + +int cg_migrate(const char *controller, const char *from, const char *to, bool ignore_self) { + bool done = false; + Set *s; + int r, ret = 0; + pid_t my_pid; + FILE *f = NULL; + + assert(controller); + assert(from); + assert(to); + + if (!(s = set_new(trivial_hash_func, trivial_compare_func))) + return -ENOMEM; + + my_pid = getpid(); + + do { + pid_t pid = 0; + done = true; + + if ((r = cg_enumerate_tasks(controller, from, &f)) < 0) { + if (ret >= 0 && r != -ENOENT) + ret = r; + + goto finish; + } + + while ((r = cg_read_pid(f, &pid)) > 0) { + + /* This might do weird stuff if we aren't a + * single-threaded program. However, we + * luckily know we are not */ + if (pid == my_pid && ignore_self) + continue; + + if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid)) + continue; + + if ((r = cg_attach(controller, to, pid)) < 0) { + if (ret >= 0 && r != -ESRCH) + ret = r; + } else if (ret == 0) + ret = 1; + + done = false; + + if ((r = set_put(s, LONG_TO_PTR(pid))) < 0) { + if (ret >= 0) + ret = r; + + goto finish; + } + } + + if (r < 0) { + if (ret >= 0) + ret = r; + + goto finish; + } + + fclose(f); + f = NULL; + + } while (!done); + +finish: + set_free(s); + + if (f) + fclose(f); + + return ret; +} + +int cg_migrate_recursive(const char *controller, const char *from, const char *to, bool ignore_self, bool rem) { + int r, ret = 0; + DIR *d = NULL; + char *fn; + + assert(controller); + assert(from); + assert(to); + + ret = cg_migrate(controller, from, to, ignore_self); + + if ((r = cg_enumerate_subgroups(controller, from, &d)) < 0) { + if (ret >= 0 && r != -ENOENT) + ret = r; + goto finish; + } + + while ((r = cg_read_subgroup(d, &fn)) > 0) { + char *p = NULL; + + r = asprintf(&p, "%s/%s", from, fn); + free(fn); + + if (r < 0) { + if (ret >= 0) + ret = -ENOMEM; + + goto finish; + } + + r = cg_migrate_recursive(controller, p, to, ignore_self, rem); + free(p); + + if (r != 0 && ret >= 0) + ret = r; + } + + if (r < 0 && ret >= 0) + ret = r; + + if (rem) + if ((r = cg_rmdir(controller, from, true)) < 0) { + if (ret >= 0 && + r != -ENOENT && + r != -EBUSY) + ret = r; + } + +finish: + if (d) + closedir(d); + + return ret; +} + +int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) { + const char *p; + char *t; + static __thread bool good = false; + + assert(controller); + assert(fs); + + if (_unlikely_(!good)) { + int r; + + r = path_is_mount_point("/sys/fs/cgroup", false); + if (r <= 0) + return r < 0 ? r : -ENOENT; + + /* Cache this to save a few stat()s */ + good = true; + } + + if (isempty(controller)) + return -EINVAL; + + /* This is a very minimal lookup from controller names to + * paths. Since we have mounted most hierarchies ourselves + * should be kinda safe, but eventually we might want to + * extend this to have a fallback to actually check + * /proc/mounts. Might need caching then. */ + + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) + p = "systemd"; + else if (startswith(controller, "name=")) + p = controller + 5; + else + p = controller; + + if (path && suffix) + t = join("/sys/fs/cgroup/", p, "/", path, "/", suffix, NULL); + else if (path) + t = join("/sys/fs/cgroup/", p, "/", path, NULL); + else if (suffix) + t = join("/sys/fs/cgroup/", p, "/", suffix, NULL); + else + t = join("/sys/fs/cgroup/", p, NULL); + + if (!t) + return -ENOMEM; + + path_kill_slashes(t); + + *fs = t; + return 0; +} + +static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) { + char *p; + bool is_sticky; + + if (typeflag != FTW_DP) + return 0; + + if (ftwbuf->level < 1) + return 0; + + p = strappend(path, "/tasks"); + if (!p) { + errno = ENOMEM; + return 1; + } + + is_sticky = file_is_priv_sticky(p) > 0; + free(p); + + if (is_sticky) + return 0; + + rmdir(path); + return 0; +} + +int cg_trim(const char *controller, const char *path, bool delete_root) { + char *fs; + int r = 0; + + assert(controller); + assert(path); + + r = cg_get_path(controller, path, NULL, &fs); + if (r < 0) + return r; + + errno = 0; + if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) < 0) + r = errno ? -errno : -EIO; + + if (delete_root) { + bool is_sticky; + char *p; + + p = strappend(fs, "/tasks"); + if (!p) { + free(fs); + return -ENOMEM; + } + + is_sticky = file_is_priv_sticky(p) > 0; + free(p); + + if (!is_sticky) + if (rmdir(fs) < 0 && errno != ENOENT) { + if (r == 0) + r = -errno; + } + } + + free(fs); + + return r; +} + +int cg_delete(const char *controller, const char *path) { + char *parent; + int r; + + assert(controller); + assert(path); + + if ((r = parent_of_path(path, &parent)) < 0) + return r; + + r = cg_migrate_recursive(controller, path, parent, false, true); + free(parent); + + return r == -ENOENT ? 0 : r; +} + +int cg_create(const char *controller, const char *path) { + char *fs; + int r; + + assert(controller); + assert(path); + + if ((r = cg_get_path(controller, path, NULL, &fs)) < 0) + return r; + + r = mkdir_parents(fs, 0755); + + if (r >= 0) { + if (mkdir(fs, 0755) >= 0) + r = 1; + else if (errno == EEXIST) + r = 0; + else + r = -errno; + } + + free(fs); + + return r; +} + +int cg_attach(const char *controller, const char *path, pid_t pid) { + char *fs; + int r; + char c[32]; + + assert(controller); + assert(path); + assert(pid >= 0); + + if ((r = cg_get_path(controller, path, "tasks", &fs)) < 0) + return r; + + if (pid == 0) + pid = getpid(); + + snprintf(c, sizeof(c), "%lu\n", (unsigned long) pid); + char_array_0(c); + + r = write_one_line_file(fs, c); + free(fs); + + return r; +} + +int cg_create_and_attach(const char *controller, const char *path, pid_t pid) { + int r, q; + + assert(controller); + assert(path); + assert(pid >= 0); + + if ((r = cg_create(controller, path)) < 0) + return r; + + if ((q = cg_attach(controller, path, pid)) < 0) + return q; + + /* This does not remove the cgroup on failure */ + + return r; +} + +int cg_set_group_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid) { + char *fs; + int r; + + assert(controller); + assert(path); + + if (mode != (mode_t) -1) + mode &= 0777; + + r = cg_get_path(controller, path, NULL, &fs); + if (r < 0) + return r; + + r = chmod_and_chown(fs, mode, uid, gid); + free(fs); + + return r; +} + +int cg_set_task_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid, int sticky) { + char *fs; + int r; + + assert(controller); + assert(path); + + if (mode == (mode_t) -1 && uid == (uid_t) -1 && gid == (gid_t) -1 && sticky < 0) + return 0; + + if (mode != (mode_t) -1) + mode &= 0666; + + r = cg_get_path(controller, path, "tasks", &fs); + if (r < 0) + return r; + + if (sticky >= 0 && mode != (mode_t) -1) + /* Both mode and sticky param are passed */ + mode |= (sticky ? S_ISVTX : 0); + else if ((sticky >= 0 && mode == (mode_t) -1) || + (mode != (mode_t) -1 && sticky < 0)) { + struct stat st; + + /* Only one param is passed, hence read the current + * mode from the file itself */ + + r = lstat(fs, &st); + if (r < 0) { + free(fs); + return -errno; + } + + if (mode == (mode_t) -1) + /* No mode set, we just shall set the sticky bit */ + mode = (st.st_mode & ~S_ISVTX) | (sticky ? S_ISVTX : 0); + else + /* Only mode set, leave sticky bit untouched */ + mode = (st.st_mode & ~0777) | mode; + } + + r = chmod_and_chown(fs, mode, uid, gid); + free(fs); + + return r; +} + +int cg_get_by_pid(const char *controller, pid_t pid, char **path) { + int r; + char *p = NULL; + FILE *f; + char *fs; + size_t cs; + + assert(controller); + assert(path); + assert(pid >= 0); + + if (pid == 0) + pid = getpid(); + + if (asprintf(&fs, "/proc/%lu/cgroup", (unsigned long) pid) < 0) + return -ENOMEM; + + f = fopen(fs, "re"); + free(fs); + + if (!f) + return errno == ENOENT ? -ESRCH : -errno; + + cs = strlen(controller); + + while (!feof(f)) { + char line[LINE_MAX]; + char *l; + + errno = 0; + if (!(fgets(line, sizeof(line), f))) { + if (feof(f)) + break; + + r = errno ? -errno : -EIO; + goto finish; + } + + truncate_nl(line); + + if (!(l = strchr(line, ':'))) + continue; + + l++; + if (strncmp(l, controller, cs) != 0) + continue; + + if (l[cs] != ':') + continue; + + if (!(p = strdup(l + cs + 1))) { + r = -ENOMEM; + goto finish; + } + + *path = p; + r = 0; + goto finish; + } + + r = -ENOENT; + +finish: + fclose(f); + + return r; +} + +int cg_install_release_agent(const char *controller, const char *agent) { + char *fs = NULL, *contents = NULL, *line = NULL, *sc; + int r; + + assert(controller); + assert(agent); + + if ((r = cg_get_path(controller, NULL, "release_agent", &fs)) < 0) + return r; + + if ((r = read_one_line_file(fs, &contents)) < 0) + goto finish; + + sc = strstrip(contents); + if (sc[0] == 0) { + + if (asprintf(&line, "%s\n", agent) < 0) { + r = -ENOMEM; + goto finish; + } + + if ((r = write_one_line_file(fs, line)) < 0) + goto finish; + + } else if (!streq(sc, agent)) { + r = -EEXIST; + goto finish; + } + + free(fs); + fs = NULL; + if ((r = cg_get_path(controller, NULL, "notify_on_release", &fs)) < 0) + goto finish; + + free(contents); + contents = NULL; + if ((r = read_one_line_file(fs, &contents)) < 0) + goto finish; + + sc = strstrip(contents); + + if (streq(sc, "0")) { + if ((r = write_one_line_file(fs, "1\n")) < 0) + goto finish; + + r = 1; + } else if (!streq(sc, "1")) { + r = -EIO; + goto finish; + } else + r = 0; + +finish: + free(fs); + free(contents); + free(line); + + return r; +} + +int cg_is_empty(const char *controller, const char *path, bool ignore_self) { + pid_t pid = 0; + int r; + FILE *f = NULL; + bool found = false; + + assert(controller); + assert(path); + + if ((r = cg_enumerate_tasks(controller, path, &f)) < 0) + return r == -ENOENT ? 1 : r; + + while ((r = cg_read_pid(f, &pid)) > 0) { + + if (ignore_self && pid == getpid()) + continue; + + found = true; + break; + } + + fclose(f); + + if (r < 0) + return r; + + return !found; +} + +int cg_is_empty_recursive(const char *controller, const char *path, bool ignore_self) { + int r; + DIR *d = NULL; + char *fn; + + assert(controller); + assert(path); + + if ((r = cg_is_empty(controller, path, ignore_self)) <= 0) + return r; + + if ((r = cg_enumerate_subgroups(controller, path, &d)) < 0) + return r == -ENOENT ? 1 : r; + + while ((r = cg_read_subgroup(d, &fn)) > 0) { + char *p = NULL; + + r = asprintf(&p, "%s/%s", path, fn); + free(fn); + + if (r < 0) { + r = -ENOMEM; + goto finish; + } + + r = cg_is_empty_recursive(controller, p, ignore_self); + free(p); + + if (r <= 0) + goto finish; + } + + if (r >= 0) + r = 1; + +finish: + + if (d) + closedir(d); + + return r; +} + +int cg_split_spec(const char *spec, char **controller, char **path) { + const char *e; + char *t = NULL, *u = NULL; + + assert(spec); + assert(controller || path); + + if (*spec == '/') { + + if (path) { + if (!(t = strdup(spec))) + return -ENOMEM; + + *path = t; + } + + if (controller) + *controller = NULL; + + return 0; + } + + if (!(e = strchr(spec, ':'))) { + + if (strchr(spec, '/') || spec[0] == 0) + return -EINVAL; + + if (controller) { + if (!(t = strdup(spec))) + return -ENOMEM; + + *controller = t; + } + + if (path) + *path = NULL; + + return 0; + } + + if (e[1] != '/' || + e == spec || + memchr(spec, '/', e-spec)) + return -EINVAL; + + if (controller) + if (!(t = strndup(spec, e-spec))) + return -ENOMEM; + + if (path) + if (!(u = strdup(e+1))) { + free(t); + return -ENOMEM; + } + + if (controller) + *controller = t; + + if (path) + *path = u; + + return 0; +} + +int cg_join_spec(const char *controller, const char *path, char **spec) { + assert(controller); + assert(path); + + if (!path_is_absolute(path) || + controller[0] == 0 || + strchr(controller, ':') || + strchr(controller, '/')) + return -EINVAL; + + if (asprintf(spec, "%s:%s", controller, path) < 0) + return -ENOMEM; + + return 0; +} + +int cg_fix_path(const char *path, char **result) { + char *t, *c, *p; + int r; + + assert(path); + assert(result); + + /* First check if it already is a filesystem path */ + if (path_is_absolute(path) && + path_startswith(path, "/sys/fs/cgroup") && + access(path, F_OK) >= 0) { + + if (!(t = strdup(path))) + return -ENOMEM; + + *result = t; + return 0; + } + + /* Otherwise treat it as cg spec */ + if ((r = cg_split_spec(path, &c, &p)) < 0) + return r; + + r = cg_get_path(c ? c : SYSTEMD_CGROUP_CONTROLLER, p ? p : "/", NULL, result); + free(c); + free(p); + + return r; +} + +int cg_get_user_path(char **path) { + char *root, *p; + + assert(path); + + /* Figure out the place to put user cgroups below. We use the + * same as PID 1 has but with the "/system" suffix replaced by + * "/user" */ + + if (cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &root) < 0) + p = strdup("/user"); + else { + if (endswith(root, "/system")) + root[strlen(root) - 7] = 0; + else if (streq(root, "/")) + root[0] = 0; + + p = strappend(root, "/user"); + free(root); + } + + if (!p) + return -ENOMEM; + + *path = p; + return 0; +} |