core: general cgroup rework

Replace the very generic cgroup hookup with a much simpler one. With this change only the high-level cgroup settings remain, the ability to set arbitrary cgroup attributes is removed, so is support for adding units to arbitrary cgroup controllers or setting arbitrary paths for them (especially paths that are different for the various controllers). This also introduces a new -.slice root slice, that is the parent of system.slice and friends. This enables easy admin configuration of root-level cgrouo properties. This replaces DeviceDeny= by DevicePolicy=, and implicitly adds in /dev/null, /dev/zero and friends if DeviceAllow= is used (unless this is turned off by DevicePolicy=).
author: Lennart Poettering <lennart@poettering.net> 2013-06-27 04:14:27 +0200
committer: Lennart Poettering <lennart@poettering.net> 2013-06-27 04:17:34 +0200
commit: 4ad490007b70e6ac18d3cb04fa2ed92eba1451fa (patch)
tree: 20c7aab57b1f2722be1a057a28a6e7c16788c976 /src/core/cgroup.c
parent: abb26902e424c4142b68ead35676028b12249b77 (diff)
1 files changed, 475 insertions, 356 deletions
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 5065329739..79467a82ce 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -3,7 +3,7 @@
 /***
   This file is part of systemd.
 
-  Copyright 2010 Lennart Poettering
+  Copyright 2013 Lennart Poettering
 
   systemd is free software; you can redistribute it and/or modify it
   under the terms of the GNU Lesser General Public License as published by
@@ -19,305 +19,554 @@
   along with systemd; If not, see <http://www.gnu.org/licenses/>.
 ***/
 
-#include <errno.h>
-#include <assert.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <signal.h>
-#include <sys/mount.h>
 #include <fcntl.h>
 
-#include "cgroup.h"
-#include "cgroup-util.h"
-#include "log.h"
-#include "strv.h"
 #include "path-util.h"
 #include "special.h"
+#include "cgroup-util.h"
+#include "cgroup.h"
 
-int cgroup_bonding_realize(CGroupBonding *b) {
-        int r;
+void cgroup_context_init(CGroupContext *c) {
+        assert(c);
+
+        /* Initialize everything to the kernel defaults, assuming the
+         * structure is preinitialized to 0 */
+
+        c->cpu_shares = 1024;
+        c->memory_limit = c->memory_soft_limit = (uint64_t) -1;
+        c->blockio_weight = 1000;
+}
 
+void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
+        assert(c);
+        assert(a);
+
+        LIST_REMOVE(CGroupDeviceAllow, device_allow, c->device_allow, a);
+        free(a->path);
+        free(a);
+}
+
+void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) {
+        assert(c);
+        assert(w);
+
+        LIST_REMOVE(CGroupBlockIODeviceWeight, device_weights, c->blockio_device_weights, w);
+        free(w->path);
+        free(w);
+}
+
+void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) {
+        assert(c);
         assert(b);
-        assert(b->path);
-        assert(b->controller);
 
-        r = cg_create(b->controller, b->path, NULL);
+        LIST_REMOVE(CGroupBlockIODeviceBandwidth, device_bandwidths, c->blockio_device_bandwidths, b);
+        free(b->path);
+        free(b);
+}
+
+void cgroup_context_done(CGroupContext *c) {
+        assert(c);
+
+        while (c->blockio_device_weights)
+                cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
+
+        while (c->blockio_device_bandwidths)
+                cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths);
+
+        while (c->device_allow)
+                cgroup_context_free_device_allow(c, c->device_allow);
+}
+
+void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
+        CGroupBlockIODeviceBandwidth *b;
+        CGroupBlockIODeviceWeight *w;
+        CGroupDeviceAllow *a;
+
+        assert(c);
+        assert(f);
+
+        prefix = strempty(prefix);
+
+        fprintf(f,
+                "%sCPUAccounting=%s\n"
+                "%sBlockIOAccounting=%s\n"
+                "%sMemoryAccounting=%s\n"
+                "%sCPUShares=%lu\n"
+                "%sBlockIOWeight%lu\n"
+                "%sMemoryLimit=%" PRIu64 "\n"
+                "%sMemorySoftLimit=%" PRIu64 "\n"
+                "%sDevicePolicy=%s\n",
+                prefix, yes_no(c->cpu_accounting),
+                prefix, yes_no(c->blockio_accounting),
+                prefix, yes_no(c->memory_accounting),
+                prefix, c->cpu_shares,
+                prefix, c->blockio_weight,
+                prefix, c->memory_limit,
+                prefix, c->memory_soft_limit,
+                prefix, cgroup_device_policy_to_string(c->device_policy));
+
+        LIST_FOREACH(device_allow, a, c->device_allow)
+                fprintf(f,
+                        "%sDeviceAllow=%s %s%s%s\n",
+                        prefix,
+                        a->path,
+                        a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
+
+        LIST_FOREACH(device_weights, w, c->blockio_device_weights)
+                fprintf(f,
+                        "%sBlockIOWeight=%s %lu",
+                        prefix,
+                        w->path,
+                        w->weight);
+
+        LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
+                char buf[FORMAT_BYTES_MAX];
+
+                fprintf(f,
+                        "%s%s=%s %s\n",
+                        prefix,
+                        b->read ? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth",
+                        b->path,
+                        format_bytes(buf, sizeof(buf), b->bandwidth));
+        }
+}
+
+static int lookup_blkio_device(const char *p, dev_t *dev) {
+        struct stat st;
+        int r;
+
+        assert(p);
+        assert(dev);
+
+        r = stat(p, &st);
         if (r < 0) {
-                log_warning("Failed to create cgroup %s:%s: %s", b->controller, b->path, strerror(-r));
-                return r;
+                log_warning("Couldn't stat device %s: %m", p);
+                return -errno;
         }
 
-        b->realized = true;
+        if (S_ISBLK(st.st_mode))
+                *dev = st.st_rdev;
+        else if (major(st.st_dev) != 0) {
+                /* If this is not a device node then find the block
+                 * device this file is stored on */
+                *dev = st.st_dev;
+
+                /* If this is a partition, try to get the originating
+                 * block device */
+                block_get_whole_disk(*dev, dev);
+        } else {
+                log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p);
+                return -ENODEV;
+        }
 
         return 0;
 }
 
-int cgroup_bonding_realize_list(CGroupBonding *first) {
-        CGroupBonding *b;
+static int whitelist_device(const char *path, const char *node, const char *acc) {
+        char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
+        struct stat st;
         int r;
 
-        LIST_FOREACH(by_unit, b, first)
-                if ((r = cgroup_bonding_realize(b)) < 0 && b->essential)
-                        return r;
+        assert(path);
+        assert(acc);
 
-        return 0;
+        if (stat(node, &st) < 0) {
+                log_warning("Couldn't stat device %s", node);
+                return -errno;
+        }
+
+        if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
+                log_warning("%s is not a device.", node);
+                return -ENODEV;
+        }
+
+        sprintf(buf,
+                "%c %u:%u %s",
+                S_ISCHR(st.st_mode) ? 'c' : 'b',
+                major(st.st_rdev), minor(st.st_rdev),
+                acc);
+
+        r = cg_set_attribute("devices", path, "devices.allow", buf);
+        if (r < 0)
+                log_warning("Failed to set devices.allow on %s: %s", path, strerror(-r));
+
+        return r;
 }
 
-void cgroup_bonding_free(CGroupBonding *b, bool trim) {
-        assert(b);
+void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const char *path) {
+        int r;
+
+        assert(c);
+        assert(path);
 
-        if (b->unit) {
-                CGroupBonding *f;
+        if (mask == 0)
+                return;
 
-                LIST_REMOVE(CGroupBonding, by_unit, b->unit->cgroup_bondings, b);
+        if (mask & CGROUP_CPU) {
+                char buf[DECIMAL_STR_MAX(unsigned long) + 1];
 
-                if (streq(b->controller, SYSTEMD_CGROUP_CONTROLLER)) {
-                        assert_se(f = hashmap_get(b->unit->manager->cgroup_bondings, b->path));
-                        LIST_REMOVE(CGroupBonding, by_path, f, b);
+                sprintf(buf, "%lu\n", c->cpu_shares);
+                r = cg_set_attribute("cpu", path, "cpu.shares", buf);
+                if (r < 0)
+                        log_warning("Failed to set cpu.shares on %s: %s", path, strerror(-r));
+        }
+
+        if (mask & CGROUP_BLKIO) {
+                char buf[MAX3(DECIMAL_STR_MAX(unsigned long)+1,
+                              DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(unsigned long)*1,
+                              DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
+                CGroupBlockIODeviceWeight *w;
+                CGroupBlockIODeviceBandwidth *b;
+
+                sprintf(buf, "%lu\n", c->blockio_weight);
+                r = cg_set_attribute("blkio", path, "blkio.weight", buf);
+                if (r < 0)
+                        log_warning("Failed to set blkio.weight on %s: %s", path, strerror(-r));
+
+                /* FIXME: no way to reset this list */
+                LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
+                        dev_t dev;
+
+                        r = lookup_blkio_device(w->path, &dev);
+                        if (r < 0)
+                                continue;
 
-                        if (f)
-                                hashmap_replace(b->unit->manager->cgroup_bondings, b->path, f);
-                        else
-                                hashmap_remove(b->unit->manager->cgroup_bondings, b->path);
+                        sprintf(buf, "%u:%u %lu", major(dev), minor(dev), w->weight);
+                        r = cg_set_attribute("blkio", path, "blkio.weight_device", buf);
+                        if (r < 0)
+                                log_error("Failed to set blkio.weight_device on %s: %s", path, strerror(-r));
+                }
+
+                /* FIXME: no way to reset this list */
+                LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
+                        const char *a;
+                        dev_t dev;
+
+                        r = lookup_blkio_device(b->path, &dev);
+                        if (r < 0)
+                                continue;
+
+                        a = b->read ? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device";
+
+                        sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), b->bandwidth);
+                        r = cg_set_attribute("blkio", path, a, buf);
+                        if (r < 0)
+                                log_error("Failed to set %s on %s: %s", a, path, strerror(-r));
                 }
         }
 
-        if (b->realized && b->ours && trim)
-                cg_trim(b->controller, b->path, false);
+        if (mask & CGROUP_MEMORY) {
+                char buf[DECIMAL_STR_MAX(uint64_t) + 1];
 
-        free(b->controller);
-        free(b->path);
-        free(b);
-}
+                sprintf(buf, "%" PRIu64 "\n", c->memory_limit);
+                r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
+                if (r < 0)
+                        log_error("Failed to set memory.limit_in_bytes on %s: %s", path, strerror(-r));
 
-void cgroup_bonding_free_list(CGroupBonding *first, bool remove_or_trim) {
-        CGroupBonding *b, *n;
+                sprintf(buf, "%" PRIu64 "\n", c->memory_soft_limit);
+                cg_set_attribute("memory", path, "memory.soft_limit_in_bytes", buf);
+                if (r < 0)
+                        log_error("Failed to set memory.limit_in_bytes on %s: %s", path, strerror(-r));
+        }
 
-        LIST_FOREACH_SAFE(by_unit, b, n, first)
-                cgroup_bonding_free(b, remove_or_trim);
-}
+        if (mask & CGROUP_DEVICE) {
+                CGroupDeviceAllow *a;
 
-void cgroup_bonding_trim(CGroupBonding *b, bool delete_root) {
-        assert(b);
+                if (c->device_allow || c->device_policy != CGROUP_AUTO)
+                        r = cg_set_attribute("devices", path, "devices.deny", "a");
+                else
+                        r = cg_set_attribute("devices", path, "devices.allow", "a");
+                if (r < 0)
+                        log_error("Failed to reset devices.list on %s: %s", path, strerror(-r));
 
-        if (b->realized && b->ours)
-                cg_trim(b->controller, b->path, delete_root);
-}
+                if (c->device_policy == CGROUP_CLOSED ||
+                    (c->device_policy == CGROUP_AUTO && c->device_allow)) {
+                        static const char auto_devices[] =
+                                "/dev/null\0" "rw\0"
+                                "/dev/zero\0" "rw\0"
+                                "/dev/full\0" "rw\0"
+                                "/dev/random\0" "rw\0"
+                                "/dev/urandom\0" "rw\0";
+
+                        const char *x, *y;
+
+                        NULSTR_FOREACH_PAIR(x, y, auto_devices)
+                                whitelist_device(path, x, y);
+                }
+
+                LIST_FOREACH(device_allow, a, c->device_allow) {
+                        char acc[4];
+                        unsigned k = 0;
+
+                        if (a->r)
+                                acc[k++] = 'r';
+                        if (a->w)
+                                acc[k++] = 'w';
+                        if (a->m)
+                                acc[k++] = 'm';
 
-void cgroup_bonding_trim_list(CGroupBonding *first, bool delete_root) {
-        CGroupBonding *b;
+                        if (k == 0)
+                                continue;
 
-        LIST_FOREACH(by_unit, b, first)
-                cgroup_bonding_trim(b, delete_root);
+                        acc[k++] = 0;
+                        whitelist_device(path, a->path, acc);
+                }
+        }
 }
 
-int cgroup_bonding_install(CGroupBonding *b, pid_t pid, const char *cgroup_suffix) {
-        _cleanup_free_ char *p = NULL;
-        const char *path;
-        int r;
+CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) {
+        CGroupControllerMask mask = 0;
 
-        assert(b);
-        assert(pid >= 0);
+        /* Figure out which controllers we need */
 
-        if (cgroup_suffix) {
-                p = strjoin(b->path, "/", cgroup_suffix, NULL);
-                if (!p)
-                        return -ENOMEM;
+        if (c->cpu_accounting || c->cpu_shares != 1024)
+                mask |= CGROUP_CPUACCT | CGROUP_CPU;
 
-                path = p;
-        } else
-                path = b->path;
+        if (c->blockio_accounting ||
+            c->blockio_weight != 1000 ||
+            c->blockio_device_weights ||
+            c->blockio_device_bandwidths)
+                mask |= CGROUP_BLKIO;
 
-        r = cg_create_and_attach(b->controller, path, pid);
-        if (r < 0)
-                return r;
+        if (c->memory_accounting ||
+            c->memory_limit != (uint64_t) -1 ||
+            c->memory_soft_limit != (uint64_t) -1)
+                mask |= CGROUP_MEMORY;
 
-        b->realized = true;
-        return 0;
+        if (c->device_allow || c->device_policy != CGROUP_AUTO)
+                mask |= CGROUP_DEVICE;
+
+        return mask;
 }
 
-int cgroup_bonding_install_list(CGroupBonding *first, pid_t pid, const char *cgroup_suffix) {
-        CGroupBonding *b;
-        int r;
+static CGroupControllerMask unit_get_cgroup_mask(Unit *u) {
+        CGroupContext *c;
 
-        LIST_FOREACH(by_unit, b, first) {
-                r = cgroup_bonding_install(b, pid, cgroup_suffix);
-                if (r < 0 && b->essential)
-                        return r;
-        }
+        c = unit_get_cgroup_context(u);
+        if (!c)
+                return 0;
 
-        return 0;
+        return cgroup_context_get_mask(c);
 }
 
-int cgroup_bonding_migrate(CGroupBonding *b, CGroupBonding *list) {
-        CGroupBonding *q;
-        int ret = 0;
+static CGroupControllerMask unit_get_members_mask(Unit *u) {
+        CGroupControllerMask mask = 0;
+        Unit *m;
+        Iterator i;
 
-        LIST_FOREACH(by_unit, q, list) {
-                int r;
+        assert(u);
 
-                if (q == b)
-                        continue;
+        SET_FOREACH(m, u->dependencies[UNIT_BEFORE], i) {
 
-                if (!q->ours)
+                if (UNIT_DEREF(m->slice) != u)
                         continue;
 
-                r = cg_migrate_recursive(q->controller, q->path, b->controller, b->path, true, false);
-                if (r < 0 && ret == 0)
-                        ret = r;
+                mask |= unit_get_cgroup_mask(m) | unit_get_members_mask(m);
         }
 
-        return ret;
+        return mask;
 }
 
-int cgroup_bonding_migrate_to(CGroupBonding *b, const char *target, bool rem) {
-        assert(b);
-        assert(target);
+static CGroupControllerMask unit_get_siblings_mask(Unit *u) {
+        assert(u);
 
-        return cg_migrate_recursive(b->controller, b->path, b->controller, target, true, rem);
+        if (!UNIT_ISSET(u->slice))
+                return 0;
+
+        /* Sibling propagation is only relevant for weight-based
+         * controllers, so let's mask out everything else */
+        return unit_get_members_mask(UNIT_DEREF(u->slice)) &
+                (CGROUP_CPU|CGROUP_BLKIO|CGROUP_CPUACCT);
 }
 
-int cgroup_bonding_set_group_access(CGroupBonding *b, mode_t mode, uid_t uid, gid_t gid) {
-        assert(b);
+static int unit_create_cgroups(Unit *u, CGroupControllerMask mask) {
+        char *path = NULL;
+        int r;
 
-        if (!b->realized)
-                return -EINVAL;
+        assert(u);
 
-        return cg_set_group_access(b->controller, b->path, mode, uid, gid);
-}
+        path = unit_default_cgroup_path(u);
+        if (!path)
+                return -ENOMEM;
 
-int cgroup_bonding_set_group_access_list(CGroupBonding *first, mode_t mode, uid_t uid, gid_t gid) {
-        CGroupBonding *b;
-        int r;
+        /* First, create our own group */
+        r = cg_create_with_mask(mask, path);
+        if (r < 0)
+                log_error("Failed to create cgroup %s: %s", path, strerror(-r));
 
-        LIST_FOREACH(by_unit, b, first) {
-                r = cgroup_bonding_set_group_access(b, mode, uid, gid);
+        /* Then, possibly move things over */
+        if (u->cgroup_path && !streq(path, u->cgroup_path)) {
+                r = cg_migrate_with_mask(mask, u->cgroup_path, path);
                 if (r < 0)
-                        return r;
+                        log_error("Failed to migrate cgroup %s: %s", path, strerror(-r));
         }
 
+        /* And remember the new data */
+        free(u->cgroup_path);
+        u->cgroup_path = path;
+        u->cgroup_realized = true;
+        u->cgroup_mask = mask;
+
         return 0;
 }
 
-int cgroup_bonding_set_task_access(CGroupBonding *b, mode_t mode, uid_t uid, gid_t gid, int sticky) {
-        assert(b);
+static void unit_realize_cgroup_now(Unit *u) {
+        CGroupControllerMask mask;
 
-        if (!b->realized)
-                return -EINVAL;
+        assert(u);
 
-        return cg_set_task_access(b->controller, b->path, mode, uid, gid, sticky);
-}
+        if (u->in_cgroup_queue) {
+                LIST_REMOVE(Unit, cgroup_queue, u->manager->cgroup_queue, u);
+                u->in_cgroup_queue = false;
+        }
 
-int cgroup_bonding_set_task_access_list(CGroupBonding *first, mode_t mode, uid_t uid, gid_t gid, int sticky) {
-        CGroupBonding *b;
-        int r;
+        mask = unit_get_cgroup_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u);
+        mask &= u->manager->cgroup_supported;
 
-        LIST_FOREACH(by_unit, b, first) {
-                r = cgroup_bonding_set_task_access(b, mode, uid, gid, sticky);
-                if (r < 0)
-                        return r;
-        }
+        if (u->cgroup_realized &&
+            u->cgroup_mask == mask)
+                return;
 
-        return 0;
+        /* First, realize parents */
+        if (UNIT_ISSET(u->slice))
+                unit_realize_cgroup_now(UNIT_DEREF(u->slice));
+
+        /* And then do the real work */
+        unit_create_cgroups(u, mask);
 }
 
-int cgroup_bonding_kill(CGroupBonding *b, int sig, bool sigcont, bool rem, Set *s, const char *cgroup_suffix) {
-        char *p = NULL;
-        const char *path;
-        int r;
+static void unit_add_to_cgroup_queue(Unit *u) {
 
-        assert(b);
-        assert(sig >= 0);
+        if (u->in_cgroup_queue)
+                return;
 
-        /* Don't kill cgroups that aren't ours */
-        if (!b->ours)
-                return 0;
+        LIST_PREPEND(Unit, cgroup_queue, u->manager->cgroup_queue, u);
+        u->in_cgroup_queue = true;
+}
 
-        if (cgroup_suffix) {
-                p = strjoin(b->path, "/", cgroup_suffix, NULL);
-                if (!p)
-                        return -ENOMEM;
+unsigned manager_dispatch_cgroup_queue(Manager *m) {
+        Unit *i;
+        unsigned n = 0;
 
-                path = p;
-        } else
-                path = b->path;
+        while ((i = m->cgroup_queue)) {
+                assert(i->in_cgroup_queue);
 
-        r = cg_kill_recursive(b->controller, path, sig, sigcont, true, rem, s);
-        free(p);
+                unit_realize_cgroup_now(i);
+                cgroup_context_apply(unit_get_cgroup_context(i), i->cgroup_mask, i->cgroup_path);
+                n++;
+        }
 
-        return r;
+        return n;
 }
 
-int cgroup_bonding_kill_list(CGroupBonding *first, int sig, bool sigcont, bool rem, Set *s, const char *cgroup_suffix) {
-        CGroupBonding *b;
-        Set *allocated_set = NULL;
-        int ret = -EAGAIN, r;
+static void unit_queue_siblings(Unit *u) {
+        Unit *slice;
 
-        if (!first)
-                return 0;
+        /* This adds the siblings of the specified unit and the
+         * siblings of all parent units to the cgroup queue. (But
+         * neither the specified unit itself nor the parents.) */
+
+        while ((slice = UNIT_DEREF(u->slice))) {
+                Iterator i;
+                Unit *m;
 
-        if (!s)
-                if (!(s = allocated_set = set_new(trivial_hash_func, trivial_compare_func)))
-                        return -ENOMEM;
+                SET_FOREACH(m, slice->dependencies[UNIT_BEFORE], i) {
+                        if (m == u)
+                                continue;
 
-        LIST_FOREACH(by_unit, b, first) {
-                r = cgroup_bonding_kill(b, sig, sigcont, rem, s, cgroup_suffix);
-                if (r < 0) {
-                        if (r == -EAGAIN || r == -ESRCH)
+                        if (UNIT_DEREF(m->slice) != slice)
                                 continue;
 
-                        ret = r;
-                        goto finish;
+                        unit_add_to_cgroup_queue(m);
                 }
 
-                if (ret < 0 || r > 0)
-                        ret = r;
+                u = slice;
         }
+}
+
+void unit_realize_cgroup(Unit *u) {
+        CGroupContext *c;
+
+        assert(u);
+
+        c = unit_get_cgroup_context(u);
+        if (!c)
+                return;
 
-finish:
-        if (allocated_set)
-                set_free(allocated_set);
+        /* So, here's the deal: when realizing the cgroups for this
+         * unit, we need to first create all parents, but there's more
+         * actually: for the weight-based controllers we also need to
+         * make sure that all our siblings (i.e. units that are in the
+         * same slice as we are) have cgroup too. Otherwise things
+         * would become very uneven as each of their processes would
+         * get as much resources as all our group together. This call
+         * will synchronously create the parent cgroups, but will
+         * defer work on the siblings to the next event loop
+         * iteration. */
 
-        return ret;
+        /* Add all sibling slices to the cgroup queue. */
+        unit_queue_siblings(u);
+
+        /* And realize this one now */
+        unit_realize_cgroup_now(u);
+
+        /* And apply the values */
+        cgroup_context_apply(c, u->cgroup_mask, u->cgroup_path);
 }
 
-/* Returns 1 if the group is empty, 0 if it is not, -EAGAIN if we
- * cannot know */
-int cgroup_bonding_is_empty(CGroupBonding *b) {
+void unit_destroy_cgroup(Unit *u) {
         int r;
 
-        assert(b);
+        assert(u);
 
-        if ((r = cg_is_empty_recursive(b->controller, b->path, true)) < 0)
-                return r;
+        if (!u->cgroup_path)
+                return;
 
-        /* If it is empty it is empty */
-        if (r > 0)
-                return 1;
+        r = cg_trim_with_mask(u->cgroup_mask, u->cgroup_path, true);
+        if (r < 0)
+                log_error("Failed to destroy cgroup %s: %s", u->cgroup_path, strerror(-r));
 
-        /* It's not only us using this cgroup, so we just don't know */
-        return b->ours ? 0 : -EAGAIN;
+        free(u->cgroup_path);
+        u->cgroup_path = NULL;
+        u->cgroup_realized = false;
+        u->cgroup_mask = 0;
 }
 
-int cgroup_bonding_is_empty_list(CGroupBonding *first) {
-        CGroupBonding *b;
+pid_t unit_search_main_pid(Unit *u) {
+        _cleanup_fclose_ FILE *f = NULL;
+        pid_t pid = 0, npid, mypid;
+
+        assert(u);
+
+        if (!u->cgroup_path)
+                return 0;
+
+        if (cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f) < 0)
+                return 0;
+
+        mypid = getpid();
+        while (cg_read_pid(f, &npid) > 0)  {
+                pid_t ppid;
+
+                if (npid == pid)
+                        continue;
 
-        LIST_FOREACH(by_unit, b, first) {
-                int r;
+                /* Ignore processes that aren't our kids */
+                if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid)
+                        continue;
 
-                r = cgroup_bonding_is_empty(b);
-                if (r < 0) {
-                        /* If this returned -EAGAIN, then we don't know if the
-                         * group is empty, so let's see if another group can
-                         * tell us */
+                if (pid != 0) {
+                        /* Dang, there's more than one daemonized PID
+                        in this group, so we don't know what process
+                        is the main process. */
+                        pid = 0;
+                        break;
+                }
 
-                        if (r != -EAGAIN)
-                                return r;
-                } else
-                        return r;
+                pid = npid;
         }
 
-        return -EAGAIN;
+        return pid;
 }
 
 int manager_setup_cgroup(Manager *m) {
@@ -394,8 +643,8 @@ int manager_setup_cgroup(Manager *m) {
                 return -errno;
         }
 
-        /* 6. Remove non-existing controllers from the default controllers list */
-        cg_shorten_controllers(m->default_controllers);
+        /* 6. Figure out which controllers are supported */
+        m->cgroup_supported = cg_mask_supported();
 
         return 0;
 }
@@ -417,201 +666,71 @@ void manager_shutdown_cgroup(Manager *m, bool delete) {
         m->cgroup_root = NULL;
 }
 
-int cgroup_bonding_get(Manager *m, const char *cgroup, CGroupBonding **bonding) {
-        CGroupBonding *b;
+Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
         char *p;
+        Unit *u;
 
         assert(m);
         assert(cgroup);
-        assert(bonding);
 
-        b = hashmap_get(m->cgroup_bondings, cgroup);
-        if (b) {
-                *bonding = b;
-                return 1;
-        }
+        u = hashmap_get(m->cgroup_unit, cgroup);
+        if (u)
+                return u;
 
         p = strdupa(cgroup);
-        if (!p)
-                return -ENOMEM;
-
         for (;;) {
                 char *e;
 
                 e = strrchr(p, '/');
-                if (e == p || !e) {
-                        *bonding = NULL;
-                        return 0;
-                }
+                if (e == p || !e)
+                        return NULL;
 
                 *e = 0;
 
-                b = hashmap_get(m->cgroup_bondings, p);
-                if (b) {
-                        *bonding = b;
-                        return 1;
-                }
+                u = hashmap_get(m->cgroup_unit, p);
+                if (u)
+                        return u;
         }
 }
 
-int cgroup_notify_empty(Manager *m, const char *group) {
-        CGroupBonding *l, *b;
+Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
+        _cleanup_free_ char *cgroup = NULL;
         int r;
 
         assert(m);
-        assert(group);
-
-        r = cgroup_bonding_get(m, group, &l);
-        if (r <= 0)
-                return r;
-
-        LIST_FOREACH(by_path, b, l) {
-                int t;
-
-                if (!b->unit)
-                        continue;
-
-                t = cgroup_bonding_is_empty_list(b);
-                if (t < 0) {
-
-                        /* If we don't know, we don't know */
-                        if (t != -EAGAIN)
-                                log_warning("Failed to check whether cgroup is empty: %s", strerror(errno));
-
-                        continue;
-                }
-
-                if (t > 0) {
-                        /* If it is empty, let's delete it */
-                        cgroup_bonding_trim_list(b->unit->cgroup_bondings, true);
-
-                        if (UNIT_VTABLE(b->unit)->cgroup_notify_empty)
-                                UNIT_VTABLE(b->unit)->cgroup_notify_empty(b->unit);
-                }
-        }
-
-        return 0;
-}
-
-Unit* cgroup_unit_by_pid(Manager *m, pid_t pid) {
-        CGroupBonding *l, *b;
-        char *group = NULL;
-
-        assert(m);
 
         if (pid <= 1)
                 return NULL;
 
-        if (cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &group) < 0)
-                return NULL;
-
-        l = hashmap_get(m->cgroup_bondings, group);
-
-        if (!l) {
-                char *slash;
-
-                while ((slash = strrchr(group, '/'))) {
-                        if (slash == group)
-                                break;
-
-                        *slash = 0;
-
-                        if ((l = hashmap_get(m->cgroup_bondings, group)))
-                                break;
-                }
-        }
-
-        free(group);
-
-        LIST_FOREACH(by_path, b, l) {
-
-                if (!b->unit)
-                        continue;
-
-                if (b->ours)
-                        return b->unit;
-        }
-
-        return NULL;
-}
-
-CGroupBonding *cgroup_bonding_find_list(CGroupBonding *first, const char *controller) {
-        CGroupBonding *b;
-
-        if (!controller)
-                controller = SYSTEMD_CGROUP_CONTROLLER;
-
-        LIST_FOREACH(by_unit, b, first)
-                if (streq(b->controller, controller))
-                        return b;
-
-        return NULL;
-}
-
-char *cgroup_bonding_to_string(CGroupBonding *b) {
-        char *r;
-
-        assert(b);
-
-        if (asprintf(&r, "%s:%s", b->controller, b->path) < 0)
+        r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
+        if (r < 0)
                 return NULL;
 
-        return r;
+        return manager_get_unit_by_cgroup(m, cgroup);
 }
 
-pid_t cgroup_bonding_search_main_pid(CGroupBonding *b) {
-        FILE *f;
-        pid_t pid = 0, npid, mypid;
-
-        assert(b);
+int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
+        Unit *u;
+        int r;
 
-        if (!b->ours)
-                return 0;
+        assert(m);
+        assert(cgroup);
 
-        if (cg_enumerate_processes(b->controller, b->path, &f) < 0)
+        r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, cgroup, true);
+        if (r == 0)
                 return 0;
 
-        mypid = getpid();
+        u = manager_get_unit_by_cgroup(m, cgroup);
+        if (u && UNIT_VTABLE(u)->notify_cgroup_empty)
+                UNIT_VTABLE(u)->notify_cgroup_empty(u);
 
-        while (cg_read_pid(f, &npid) > 0)  {
-                pid_t ppid;
-
-                if (npid == pid)
-                        continue;
-
-                /* Ignore processes that aren't our kids */
-                if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid)
-                        continue;
-
-                if (pid != 0) {
-                        /* Dang, there's more than one daemonized PID
-                        in this group, so we don't know what process
-                        is the main process. */
-                        pid = 0;
-                        break;
-                }
-
-                pid = npid;
-        }
-
-        fclose(f);
-
-        return pid;
+        return 0;
 }
 
-pid_t cgroup_bonding_search_main_pid_list(CGroupBonding *first) {
-        CGroupBonding *b;
-        pid_t pid;
-
-        /* Try to find a main pid from this cgroup, but checking if
-         * there's only one PID in the cgroup and returning it. Later
-         * on we might want to add additional, smarter heuristics
-         * here. */
-
-        LIST_FOREACH(by_unit, b, first)
-                if ((pid = cgroup_bonding_search_main_pid(b)) != 0)
-                        return pid;
+static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
+        [CGROUP_AUTO] = "auto",
+        [CGROUP_CLOSED] = "closed",
+        [CGROUP_STRICT] = "strict",
+};
 
-        return 0;
-
-}
+DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);
author	Lennart Poettering <lennart@poettering.net>	2013-06-27 04:14:27 +0200
committer	Lennart Poettering <lennart@poettering.net>	2013-06-27 04:17:34 +0200
commit	4ad490007b70e6ac18d3cb04fa2ed92eba1451fa (patch)
tree	20c7aab57b1f2722be1a057a28a6e7c16788c976 /src/core/cgroup.c
parent	abb26902e424c4142b68ead35676028b12249b77 (diff)