diff options
author | Lennart Poettering <lennart@poettering.net> | 2013-06-27 04:14:27 +0200 |
---|---|---|
committer | Lennart Poettering <lennart@poettering.net> | 2013-06-27 04:17:34 +0200 |
commit | 4ad490007b70e6ac18d3cb04fa2ed92eba1451fa (patch) | |
tree | 20c7aab57b1f2722be1a057a28a6e7c16788c976 | |
parent | abb26902e424c4142b68ead35676028b12249b77 (diff) |
core: general cgroup rework
Replace the very generic cgroup hookup with a much simpler one. With
this change only the high-level cgroup settings remain, the ability to
set arbitrary cgroup attributes is removed, so is support for adding
units to arbitrary cgroup controllers or setting arbitrary paths for
them (especially paths that are different for the various controllers).
This also introduces a new -.slice root slice, that is the parent of
system.slice and friends. This enables easy admin configuration of
root-level cgrouo properties.
This replaces DeviceDeny= by DevicePolicy=, and implicitly adds in
/dev/null, /dev/zero and friends if DeviceAllow= is used (unless this is
turned off by DevicePolicy=).
57 files changed, 1644 insertions, 2804 deletions
diff --git a/Makefile.am b/Makefile.am index 016d7dad32..12254e39a8 100644 --- a/Makefile.am +++ b/Makefile.am @@ -379,6 +379,7 @@ dist_systemunit_DATA = \ units/swap.target \ units/slices.target \ units/system.slice \ + units/-.slice \ units/systemd-initctl.socket \ units/systemd-shutdownd.socket \ units/syslog.socket \ @@ -880,14 +881,16 @@ libsystemd_core_la_SOURCES = \ src/core/dbus-snapshot.h \ src/core/dbus-device.c \ src/core/dbus-device.h \ - src/core/dbus-execute.c \ - src/core/dbus-execute.h \ - src/core/dbus-kill.c \ - src/core/dbus-kill.h \ src/core/dbus-path.c \ src/core/dbus-path.h \ src/core/dbus-slice.c \ src/core/dbus-slice.h \ + src/core/dbus-execute.c \ + src/core/dbus-execute.h \ + src/core/dbus-kill.c \ + src/core/dbus-kill.h \ + src/core/dbus-cgroup.c \ + src/core/dbus-cgroup.h \ src/core/cgroup.c \ src/core/cgroup.h \ src/core/selinux-access.c \ @@ -914,10 +917,6 @@ libsystemd_core_la_SOURCES = \ src/core/namespace.h \ src/core/tcpwrap.c \ src/core/tcpwrap.h \ - src/core/cgroup-attr.c \ - src/core/cgroup-attr.h \ - src/core/cgroup-semantics.c \ - src/core/cgroup-semantics.h \ src/core/securebits.h \ src/core/initreq.h \ src/core/special.h \ @@ -1137,6 +1136,10 @@ test_ns_SOURCES = \ test_ns_LDADD = \ libsystemd-core.la +test_ns_CFLAGS = \ + $(AM_CFLAGS) \ + $(DBUS_CFLAGS) + test_loopback_SOURCES = \ src/test/test-loopback.c @@ -28,11 +28,17 @@ Fedora 19: Features: -* journald: make sure ratelimit is actually really per-service with the new cgroup changes +* split out CreateMachine into systemd-machined + +* "transient" units, i.e units that are not sourced from disk but + created only transiently via bus calls -* when creating a session or machine, automatically move the process into the root cgroup for all other hierarchies +* introduce new Scope unit type then make logind's session and machine + registration use this to set up cgroups -* maybe reintroduce nspawn -C? +* should Slice= be part of [Unit] or of [Service]? + +* journald: make sure ratelimit is actually really per-service with the new cgroup changes * move systemctl dump to systemd-analyze @@ -49,12 +55,6 @@ Features: * when a service changes state make reflect that in the RUNNING/LISTENING states of its socket -* slices: - - add option to pam_systemd to move login session into a slice (?) - - remove ControlGroup= setting - - in sd_pid_get_owner_uid() fallback to query session file - - add api to determine slice of unit - * when recursively showing the cgroup hierarchy, optionally also show the hierarchies of child processes diff --git a/src/core/cgroup-attr.c b/src/core/cgroup-attr.c deleted file mode 100644 index 7e3e08eabb..0000000000 --- a/src/core/cgroup-attr.c +++ /dev/null @@ -1,132 +0,0 @@ -/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ - -/*** - This file is part of systemd. - - Copyright 2011 Lennart Poettering - - systemd is free software; you can redistribute it and/or modify it - under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1 of the License, or - (at your option) any later version. - - systemd is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with systemd; If not, see <http://www.gnu.org/licenses/>. -***/ - -#include "cgroup-attr.h" -#include "cgroup-util.h" -#include "list.h" -#include "fileio.h" - -int cgroup_attribute_apply(CGroupAttribute *a, CGroupBonding *b) { - _cleanup_free_ char *path = NULL, *v = NULL; - int r; - - assert(a); - - b = cgroup_bonding_find_list(b, a->controller); - if (!b) - return 0; - - if (a->semantics && a->semantics->map_write) { - r = a->semantics->map_write(a->semantics, a->value, &v); - if (r < 0) - return r; - } - - r = cg_get_path(a->controller, b->path, a->name, &path); - if (r < 0) - return r; - - r = write_string_file(path, v ? v : a->value); - if (r < 0) - log_warning("Failed to write '%s' to %s: %s", v ? v : a->value, path, strerror(-r)); - - return r; -} - -int cgroup_attribute_apply_list(CGroupAttribute *first, CGroupBonding *b) { - CGroupAttribute *a; - int r = 0; - - LIST_FOREACH(by_unit, a, first) { - int k; - - k = cgroup_attribute_apply(a, b); - if (r == 0) - r = k; - } - - return r; -} - -bool cgroup_attribute_matches(CGroupAttribute *a, const char *controller, const char *name) { - assert(a); - - if (controller) { - if (streq(a->controller, controller) && (!name || streq(a->name, name))) - return true; - - } else if (!name) - return true; - else if (streq(a->name, name)) { - size_t x, y; - x = strlen(a->controller); - y = strlen(name); - - if (y > x && - memcmp(a->controller, name, x) == 0 && - name[x] == '.') - return true; - } - - return false; -} - -CGroupAttribute *cgroup_attribute_find_list( - CGroupAttribute *first, - const char *controller, - const char *name) { - CGroupAttribute *a; - - assert(name); - - LIST_FOREACH(by_unit, a, first) - if (cgroup_attribute_matches(a, controller, name)) - return a; - - return NULL; -} - -void cgroup_attribute_free(CGroupAttribute *a) { - assert(a); - - if (a->unit) - LIST_REMOVE(CGroupAttribute, by_unit, a->unit->cgroup_attributes, a); - - free(a->controller); - free(a->name); - free(a->value); - free(a); -} - -void cgroup_attribute_free_list(CGroupAttribute *first) { - CGroupAttribute *a, *n; - - LIST_FOREACH_SAFE(by_unit, a, n, first) - cgroup_attribute_free(a); -} - -void cgroup_attribute_free_some(CGroupAttribute *first, const char *controller, const char *name) { - CGroupAttribute *a, *n; - - LIST_FOREACH_SAFE(by_unit, a, n, first) - if (cgroup_attribute_matches(a, controller, name)) - cgroup_attribute_free(a); -} diff --git a/src/core/cgroup-attr.h b/src/core/cgroup-attr.h deleted file mode 100644 index 3a13b7c92d..0000000000 --- a/src/core/cgroup-attr.h +++ /dev/null @@ -1,50 +0,0 @@ -/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ - -#pragma once - -/*** - This file is part of systemd. - - Copyright 2011 Lennart Poettering - - systemd is free software; you can redistribute it and/or modify it - under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1 of the License, or - (at your option) any later version. - - systemd is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with systemd; If not, see <http://www.gnu.org/licenses/>. -***/ - -typedef struct CGroupAttribute CGroupAttribute; - -#include "unit.h" -#include "cgroup.h" -#include "cgroup-semantics.h" - -struct CGroupAttribute { - char *controller; - char *name; - char *value; - - Unit *unit; - - const CGroupSemantics *semantics; - - LIST_FIELDS(CGroupAttribute, by_unit); -}; - -int cgroup_attribute_apply(CGroupAttribute *a, CGroupBonding *b); -int cgroup_attribute_apply_list(CGroupAttribute *first, CGroupBonding *b); - -bool cgroup_attribute_matches(CGroupAttribute *a, const char *controller, const char *name) _pure_; -CGroupAttribute *cgroup_attribute_find_list(CGroupAttribute *first, const char *controller, const char *name) _pure_; - -void cgroup_attribute_free(CGroupAttribute *a); -void cgroup_attribute_free_list(CGroupAttribute *first); -void cgroup_attribute_free_some(CGroupAttribute *first, const char *controller, const char *name); diff --git a/src/core/cgroup-semantics.c b/src/core/cgroup-semantics.c deleted file mode 100644 index 7df9d014e9..0000000000 --- a/src/core/cgroup-semantics.c +++ /dev/null @@ -1,333 +0,0 @@ -/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ - -/*** - This file is part of systemd. - - Copyright 2013 Lennart Poettering - - systemd is free software; you can redistribute it and/or modify it - under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1 of the License, or - (at your option) any later version. - - systemd is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with systemd; If not, see <http://www.gnu.org/licenses/>. -***/ - -#include "util.h" -#include "strv.h" -#include "path-util.h" -#include "cgroup-util.h" - -#include "cgroup-semantics.h" - -static int parse_cpu_shares(const CGroupSemantics *s, const char *value, char **ret) { - unsigned long ul; - - assert(s); - assert(value); - assert(ret); - - if (safe_atolu(value, &ul) < 0 || ul < 1) - return -EINVAL; - - if (asprintf(ret, "%lu", ul) < 0) - return -ENOMEM; - - return 1; -} - -static int parse_memory_limit(const CGroupSemantics *s, const char *value, char **ret) { - off_t sz; - - assert(s); - assert(value); - assert(ret); - - if (parse_bytes(value, &sz) < 0 || sz <= 0) - return -EINVAL; - - if (asprintf(ret, "%llu", (unsigned long long) sz) < 0) - return -ENOMEM; - - return 1; -} - -static int parse_device(const CGroupSemantics *s, const char *value, char **ret) { - _cleanup_strv_free_ char **l = NULL; - char *x; - unsigned k; - - assert(s); - assert(value); - assert(ret); - - l = strv_split_quoted(value); - if (!l) - return -ENOMEM; - - k = strv_length(l); - if (k < 1 || k > 2) - return -EINVAL; - - if (!streq(l[0], "*") && !path_startswith(l[0], "/dev")) - return -EINVAL; - - if (!isempty(l[1]) && !in_charset(l[1], "rwm")) - return -EINVAL; - - x = strdup(value); - if (!x) - return -ENOMEM; - - *ret = x; - return 1; -} - -static int parse_blkio_weight(const CGroupSemantics *s, const char *value, char **ret) { - _cleanup_strv_free_ char **l = NULL; - unsigned long ul; - - assert(s); - assert(value); - assert(ret); - - l = strv_split_quoted(value); - if (!l) - return -ENOMEM; - - if (strv_length(l) != 1) - return 0; /* Returning 0 will cause parse_blkio_weight_device() be tried instead */ - - if (safe_atolu(l[0], &ul) < 0 || ul < 10 || ul > 1000) - return -EINVAL; - - if (asprintf(ret, "%lu", ul) < 0) - return -ENOMEM; - - return 1; -} - -static int parse_blkio_weight_device(const CGroupSemantics *s, const char *value, char **ret) { - _cleanup_strv_free_ char **l = NULL; - unsigned long ul; - - assert(s); - assert(value); - assert(ret); - - l = strv_split_quoted(value); - if (!l) - return -ENOMEM; - - if (strv_length(l) != 2) - return -EINVAL; - - if (!path_startswith(l[0], "/dev")) - return -EINVAL; - - if (safe_atolu(l[1], &ul) < 0 || ul < 10 || ul > 1000) - return -EINVAL; - - if (asprintf(ret, "%s %lu", l[0], ul) < 0) - return -ENOMEM; - - return 1; -} - -static int parse_blkio_bandwidth(const CGroupSemantics *s, const char *value, char **ret) { - _cleanup_strv_free_ char **l = NULL; - off_t bytes; - - assert(s); - assert(value); - assert(ret); - - l = strv_split_quoted(value); - if (!l) - return -ENOMEM; - - if (strv_length(l) != 2) - return -EINVAL; - - if (!path_startswith(l[0], "/dev")) { - return -EINVAL; - } - - if (parse_bytes(l[1], &bytes) < 0 || bytes <= 0) - return -EINVAL; - - if (asprintf(ret, "%s %llu", l[0], (unsigned long long) bytes) < 0) - return -ENOMEM; - - return 0; -} - -static int map_device(const CGroupSemantics *s, const char *value, char **ret) { - _cleanup_strv_free_ char **l = NULL; - unsigned k; - - assert(s); - assert(value); - assert(ret); - - l = strv_split_quoted(value); - if (!l) - return -ENOMEM; - - k = strv_length(l); - if (k < 1 || k > 2) - return -EINVAL; - - if (streq(l[0], "*")) { - - if (asprintf(ret, "a *:*%s%s", - isempty(l[1]) ? "" : " ", strempty(l[1])) < 0) - return -ENOMEM; - } else { - struct stat st; - - if (stat(l[0], &st) < 0) { - log_warning("Couldn't stat device %s", l[0]); - return -errno; - } - - if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { - log_warning("%s is not a device.", l[0]); - return -ENODEV; - } - - if (asprintf(ret, "%c %u:%u%s%s", - S_ISCHR(st.st_mode) ? 'c' : 'b', - major(st.st_rdev), minor(st.st_rdev), - isempty(l[1]) ? "" : " ", strempty(l[1])) < 0) - return -ENOMEM; - } - - return 0; -} - -static int map_blkio(const CGroupSemantics *s, const char *value, char **ret) { - _cleanup_strv_free_ char **l = NULL; - struct stat st; - dev_t d; - - assert(s); - assert(value); - assert(ret); - - l = strv_split_quoted(value); - if (!l) - return log_oom(); - - if (strv_length(l) != 2) - return -EINVAL; - - if (stat(l[0], &st) < 0) { - log_warning("Couldn't stat device %s", l[0]); - return -errno; - } - - if (S_ISBLK(st.st_mode)) - d = st.st_rdev; - else if (major(st.st_dev) != 0) { - /* If this is not a device node then find the block - * device this file is stored on */ - d = st.st_dev; - - /* If this is a partition, try to get the originating - * block device */ - block_get_whole_disk(d, &d); - } else { - log_warning("%s is not a block device and file system block device cannot be determined or is not local.", l[0]); - return -ENODEV; - } - - if (asprintf(ret, "%u:%u %s", major(d), minor(d), l[1]) < 0) - return -ENOMEM; - - return 0; -} - -static const CGroupSemantics semantics[] = { - { "cpu", "cpu.shares", "CPUShares", false, parse_cpu_shares, NULL, NULL }, - { "memory", "memory.soft_limit_in_bytes", "MemorySoftLimit", false, parse_memory_limit, NULL, NULL }, - { "memory", "memory.limit_in_bytes", "MemoryLimit", false, parse_memory_limit, NULL, NULL }, - { "devices", "devices.allow", "DeviceAllow", true, parse_device, map_device, NULL }, - { "devices", "devices.deny", "DeviceDeny", true, parse_device, map_device, NULL }, - { "blkio", "blkio.weight", "BlockIOWeight", false, parse_blkio_weight, NULL, NULL }, - { "blkio", "blkio.weight_device", "BlockIOWeight", true, parse_blkio_weight_device, map_blkio, NULL }, - { "blkio", "blkio.read_bps_device", "BlockIOReadBandwidth", true, parse_blkio_bandwidth, map_blkio, NULL }, - { "blkio", "blkio.write_bps_device", "BlockIOWriteBandwidth", true, parse_blkio_bandwidth, map_blkio, NULL } -}; - -int cgroup_semantics_find( - const char *controller, - const char *name, - const char *value, - char **ret, - const CGroupSemantics **_s) { - - _cleanup_free_ char *c = NULL; - unsigned i; - int r; - - assert(name); - assert(_s); - assert(!value == !ret); - - if (!controller) { - r = cg_controller_from_attr(name, &c); - if (r < 0) - return r; - - controller = c; - } - - for (i = 0; i < ELEMENTSOF(semantics); i++) { - const CGroupSemantics *s = semantics + i; - bool matches_name, matches_pretty; - - if (controller && s->controller && !streq(s->controller, controller)) - continue; - - matches_name = s->name && streq(s->name, name); - matches_pretty = s->pretty && streq(s->pretty, name); - - if (!matches_name && !matches_pretty) - continue; - - if (value) { - if (matches_pretty && s->map_pretty) { - - r = s->map_pretty(s, value, ret); - if (r < 0) - return r; - - if (r == 0) - continue; - - } else { - char *x; - - x = strdup(value); - if (!x) - return -ENOMEM; - - *ret = x; - } - } - - *_s = s; - return 1; - } - - *ret = NULL; - *_s = NULL; - return 0; -} diff --git a/src/core/cgroup-semantics.h b/src/core/cgroup-semantics.h deleted file mode 100644 index 4f848f4bb7..0000000000 --- a/src/core/cgroup-semantics.h +++ /dev/null @@ -1,43 +0,0 @@ -/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ - -#pragma once - -/*** - This file is part of systemd. - - Copyright 2011 Lennart Poettering - - systemd is free software; you can redistribute it and/or modify it - under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1 of the License, or - (at your option) any later version. - - systemd is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with systemd; If not, see <http://www.gnu.org/licenses/>. -***/ - -typedef struct CGroupSemantics CGroupSemantics; - -struct CGroupSemantics { - const char *controller; - const char *name; - const char *pretty; - - bool multiple; - - /* This call is used for parsing the pretty value to the actual attribute value */ - int (*map_pretty)(const CGroupSemantics *semantics, const char *value, char **ret); - - /* Right before writing this attribute the attribute value is converted to a low-level value */ - int (*map_write)(const CGroupSemantics *semantics, const char *value, char **ret); - - /* If this attribute takes a list, this call can be used to reset the list to empty */ - int (*reset)(const CGroupSemantics *semantics, const char *group); -}; - -int cgroup_semantics_find(const char *controller, const char *name, const char *value, char **ret, const CGroupSemantics **semantics); diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 5065329739..79467a82ce 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -3,7 +3,7 @@ /*** This file is part of systemd. - Copyright 2010 Lennart Poettering + Copyright 2013 Lennart Poettering systemd is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by @@ -19,305 +19,554 @@ along with systemd; If not, see <http://www.gnu.org/licenses/>. ***/ -#include <errno.h> -#include <assert.h> -#include <unistd.h> -#include <sys/types.h> -#include <signal.h> -#include <sys/mount.h> #include <fcntl.h> -#include "cgroup.h" -#include "cgroup-util.h" -#include "log.h" -#include "strv.h" #include "path-util.h" #include "special.h" +#include "cgroup-util.h" +#include "cgroup.h" -int cgroup_bonding_realize(CGroupBonding *b) { - int r; +void cgroup_context_init(CGroupContext *c) { + assert(c); + + /* Initialize everything to the kernel defaults, assuming the + * structure is preinitialized to 0 */ + + c->cpu_shares = 1024; + c->memory_limit = c->memory_soft_limit = (uint64_t) -1; + c->blockio_weight = 1000; +} +void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) { + assert(c); + assert(a); + + LIST_REMOVE(CGroupDeviceAllow, device_allow, c->device_allow, a); + free(a->path); + free(a); +} + +void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) { + assert(c); + assert(w); + + LIST_REMOVE(CGroupBlockIODeviceWeight, device_weights, c->blockio_device_weights, w); + free(w->path); + free(w); +} + +void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) { + assert(c); assert(b); - assert(b->path); - assert(b->controller); - r = cg_create(b->controller, b->path, NULL); + LIST_REMOVE(CGroupBlockIODeviceBandwidth, device_bandwidths, c->blockio_device_bandwidths, b); + free(b->path); + free(b); +} + +void cgroup_context_done(CGroupContext *c) { + assert(c); + + while (c->blockio_device_weights) + cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights); + + while (c->blockio_device_bandwidths) + cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths); + + while (c->device_allow) + cgroup_context_free_device_allow(c, c->device_allow); +} + +void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { + CGroupBlockIODeviceBandwidth *b; + CGroupBlockIODeviceWeight *w; + CGroupDeviceAllow *a; + + assert(c); + assert(f); + + prefix = strempty(prefix); + + fprintf(f, + "%sCPUAccounting=%s\n" + "%sBlockIOAccounting=%s\n" + "%sMemoryAccounting=%s\n" + "%sCPUShares=%lu\n" + "%sBlockIOWeight%lu\n" + "%sMemoryLimit=%" PRIu64 "\n" + "%sMemorySoftLimit=%" PRIu64 "\n" + "%sDevicePolicy=%s\n", + prefix, yes_no(c->cpu_accounting), + prefix, yes_no(c->blockio_accounting), + prefix, yes_no(c->memory_accounting), + prefix, c->cpu_shares, + prefix, c->blockio_weight, + prefix, c->memory_limit, + prefix, c->memory_soft_limit, + prefix, cgroup_device_policy_to_string(c->device_policy)); + + LIST_FOREACH(device_allow, a, c->device_allow) + fprintf(f, + "%sDeviceAllow=%s %s%s%s\n", + prefix, + a->path, + a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : ""); + + LIST_FOREACH(device_weights, w, c->blockio_device_weights) + fprintf(f, + "%sBlockIOWeight=%s %lu", + prefix, + w->path, + w->weight); + + LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) { + char buf[FORMAT_BYTES_MAX]; + + fprintf(f, + "%s%s=%s %s\n", + prefix, + b->read ? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth", + b->path, + format_bytes(buf, sizeof(buf), b->bandwidth)); + } +} + +static int lookup_blkio_device(const char *p, dev_t *dev) { + struct stat st; + int r; + + assert(p); + assert(dev); + + r = stat(p, &st); if (r < 0) { - log_warning("Failed to create cgroup %s:%s: %s", b->controller, b->path, strerror(-r)); - return r; + log_warning("Couldn't stat device %s: %m", p); + return -errno; } - b->realized = true; + if (S_ISBLK(st.st_mode)) + *dev = st.st_rdev; + else if (major(st.st_dev) != 0) { + /* If this is not a device node then find the block + * device this file is stored on */ + *dev = st.st_dev; + + /* If this is a partition, try to get the originating + * block device */ + block_get_whole_disk(*dev, dev); + } else { + log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p); + return -ENODEV; + } return 0; } -int cgroup_bonding_realize_list(CGroupBonding *first) { - CGroupBonding *b; +static int whitelist_device(const char *path, const char *node, const char *acc) { + char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4]; + struct stat st; int r; - LIST_FOREACH(by_unit, b, first) - if ((r = cgroup_bonding_realize(b)) < 0 && b->essential) - return r; + assert(path); + assert(acc); - return 0; + if (stat(node, &st) < 0) { + log_warning("Couldn't stat device %s", node); + return -errno; + } + + if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { + log_warning("%s is not a device.", node); + return -ENODEV; + } + + sprintf(buf, + "%c %u:%u %s", + S_ISCHR(st.st_mode) ? 'c' : 'b', + major(st.st_rdev), minor(st.st_rdev), + acc); + + r = cg_set_attribute("devices", path, "devices.allow", buf); + if (r < 0) + log_warning("Failed to set devices.allow on %s: %s", path, strerror(-r)); + + return r; } -void cgroup_bonding_free(CGroupBonding *b, bool trim) { - assert(b); +void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const char *path) { + int r; + + assert(c); + assert(path); - if (b->unit) { - CGroupBonding *f; + if (mask == 0) + return; - LIST_REMOVE(CGroupBonding, by_unit, b->unit->cgroup_bondings, b); + if (mask & CGROUP_CPU) { + char buf[DECIMAL_STR_MAX(unsigned long) + 1]; - if (streq(b->controller, SYSTEMD_CGROUP_CONTROLLER)) { - assert_se(f = hashmap_get(b->unit->manager->cgroup_bondings, b->path)); - LIST_REMOVE(CGroupBonding, by_path, f, b); + sprintf(buf, "%lu\n", c->cpu_shares); + r = cg_set_attribute("cpu", path, "cpu.shares", buf); + if (r < 0) + log_warning("Failed to set cpu.shares on %s: %s", path, strerror(-r)); + } + + if (mask & CGROUP_BLKIO) { + char buf[MAX3(DECIMAL_STR_MAX(unsigned long)+1, + DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(unsigned long)*1, + DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)]; + CGroupBlockIODeviceWeight *w; + CGroupBlockIODeviceBandwidth *b; + + sprintf(buf, "%lu\n", c->blockio_weight); + r = cg_set_attribute("blkio", path, "blkio.weight", buf); + if (r < 0) + log_warning("Failed to set blkio.weight on %s: %s", path, strerror(-r)); + + /* FIXME: no way to reset this list */ + LIST_FOREACH(device_weights, w, c->blockio_device_weights) { + dev_t dev; + + r = lookup_blkio_device(w->path, &dev); + if (r < 0) + continue; - if (f) - hashmap_replace(b->unit->manager->cgroup_bondings, b->path, f); - else - hashmap_remove(b->unit->manager->cgroup_bondings, b->path); + sprintf(buf, "%u:%u %lu", major(dev), minor(dev), w->weight); + r = cg_set_attribute("blkio", path, "blkio.weight_device", buf); + if (r < 0) + log_error("Failed to set blkio.weight_device on %s: %s", path, strerror(-r)); + } + + /* FIXME: no way to reset this list */ + LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) { + const char *a; + dev_t dev; + + r = lookup_blkio_device(b->path, &dev); + if (r < 0) + continue; + + a = b->read ? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device"; + + sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), b->bandwidth); + r = cg_set_attribute("blkio", path, a, buf); + if (r < 0) + log_error("Failed to set %s on %s: %s", a, path, strerror(-r)); } } - if (b->realized && b->ours && trim) - cg_trim(b->controller, b->path, false); + if (mask & CGROUP_MEMORY) { + char buf[DECIMAL_STR_MAX(uint64_t) + 1]; - free(b->controller); - free(b->path); - free(b); -} + sprintf(buf, "%" PRIu64 "\n", c->memory_limit); + r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf); + if (r < 0) + log_error("Failed to set memory.limit_in_bytes on %s: %s", path, strerror(-r)); -void cgroup_bonding_free_list(CGroupBonding *first, bool remove_or_trim) { - CGroupBonding *b, *n; + sprintf(buf, "%" PRIu64 "\n", c->memory_soft_limit); + cg_set_attribute("memory", path, "memory.soft_limit_in_bytes", buf); + if (r < 0) + log_error("Failed to set memory.limit_in_bytes on %s: %s", path, strerror(-r)); + } - LIST_FOREACH_SAFE(by_unit, b, n, first) - cgroup_bonding_free(b, remove_or_trim); -} + if (mask & CGROUP_DEVICE) { + CGroupDeviceAllow *a; -void cgroup_bonding_trim(CGroupBonding *b, bool delete_root) { - assert(b); + if (c->device_allow || c->device_policy != CGROUP_AUTO) + r = cg_set_attribute("devices", path, "devices.deny", "a"); + else + r = cg_set_attribute("devices", path, "devices.allow", "a"); + if (r < 0) + log_error("Failed to reset devices.list on %s: %s", path, strerror(-r)); - if (b->realized && b->ours) - cg_trim(b->controller, b->path, delete_root); -} + if (c->device_policy == CGROUP_CLOSED || + (c->device_policy == CGROUP_AUTO && c->device_allow)) { + static const char auto_devices[] = + "/dev/null\0" "rw\0" + "/dev/zero\0" "rw\0" + "/dev/full\0" "rw\0" + "/dev/random\0" "rw\0" + "/dev/urandom\0" "rw\0"; + + const char *x, *y; + + NULSTR_FOREACH_PAIR(x, y, auto_devices) + whitelist_device(path, x, y); + } + + LIST_FOREACH(device_allow, a, c->device_allow) { + char acc[4]; + unsigned k = 0; + + if (a->r) + acc[k++] = 'r'; + if (a->w) + acc[k++] = 'w'; + if (a->m) + acc[k++] = 'm'; -void cgroup_bonding_trim_list(CGroupBonding *first, bool delete_root) { - CGroupBonding *b; + if (k == 0) + continue; - LIST_FOREACH(by_unit, b, first) - cgroup_bonding_trim(b, delete_root); + acc[k++] = 0; + whitelist_device(path, a->path, acc); + } + } } -int cgroup_bonding_install(CGroupBonding *b, pid_t pid, const char *cgroup_suffix) { - _cleanup_free_ char *p = NULL; - const char *path; - int r; +CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) { + CGroupControllerMask mask = 0; - assert(b); - assert(pid >= 0); + /* Figure out which controllers we need */ - if (cgroup_suffix) { - p = strjoin(b->path, "/", cgroup_suffix, NULL); - if (!p) - return -ENOMEM; + if (c->cpu_accounting || c->cpu_shares != 1024) + mask |= CGROUP_CPUACCT | CGROUP_CPU; - path = p; - } else - path = b->path; + if (c->blockio_accounting || + c->blockio_weight != 1000 || + c->blockio_device_weights || + c->blockio_device_bandwidths) + mask |= CGROUP_BLKIO; - r = cg_create_and_attach(b->controller, path, pid); - if (r < 0) - return r; + if (c->memory_accounting || + c->memory_limit != (uint64_t) -1 || + c->memory_soft_limit != (uint64_t) -1) + mask |= CGROUP_MEMORY; - b->realized = true; - return 0; + if (c->device_allow || c->device_policy != CGROUP_AUTO) + mask |= CGROUP_DEVICE; + + return mask; } -int cgroup_bonding_install_list(CGroupBonding *first, pid_t pid, const char *cgroup_suffix) { - CGroupBonding *b; - int r; +static CGroupControllerMask unit_get_cgroup_mask(Unit *u) { + CGroupContext *c; - LIST_FOREACH(by_unit, b, first) { - r = cgroup_bonding_install(b, pid, cgroup_suffix); - if (r < 0 && b->essential) - return r; - } + c = unit_get_cgroup_context(u); + if (!c) + return 0; - return 0; + return cgroup_context_get_mask(c); } -int cgroup_bonding_migrate(CGroupBonding *b, CGroupBonding *list) { - CGroupBonding *q; - int ret = 0; +static CGroupControllerMask unit_get_members_mask(Unit *u) { + CGroupControllerMask mask = 0; + Unit *m; + Iterator i; - LIST_FOREACH(by_unit, q, list) { - int r; + assert(u); - if (q == b) - continue; + SET_FOREACH(m, u->dependencies[UNIT_BEFORE], i) { - if (!q->ours) + if (UNIT_DEREF(m->slice) != u) continue; - r = cg_migrate_recursive(q->controller, q->path, b->controller, b->path, true, false); - if (r < 0 && ret == 0) - ret = r; + mask |= unit_get_cgroup_mask(m) | unit_get_members_mask(m); } - return ret; + return mask; } -int cgroup_bonding_migrate_to(CGroupBonding *b, const char *target, bool rem) { - assert(b); - assert(target); +static CGroupControllerMask unit_get_siblings_mask(Unit *u) { + assert(u); - return cg_migrate_recursive(b->controller, b->path, b->controller, target, true, rem); + if (!UNIT_ISSET(u->slice)) + return 0; + + /* Sibling propagation is only relevant for weight-based + * controllers, so let's mask out everything else */ + return unit_get_members_mask(UNIT_DEREF(u->slice)) & + (CGROUP_CPU|CGROUP_BLKIO|CGROUP_CPUACCT); } -int cgroup_bonding_set_group_access(CGroupBonding *b, mode_t mode, uid_t uid, gid_t gid) { - assert(b); +static int unit_create_cgroups(Unit *u, CGroupControllerMask mask) { + char *path = NULL; + int r; - if (!b->realized) - return -EINVAL; + assert(u); - return cg_set_group_access(b->controller, b->path, mode, uid, gid); -} + path = unit_default_cgroup_path(u); + if (!path) + return -ENOMEM; -int cgroup_bonding_set_group_access_list(CGroupBonding *first, mode_t mode, uid_t uid, gid_t gid) { - CGroupBonding *b; - int r; + /* First, create our own group */ + r = cg_create_with_mask(mask, path); + if (r < 0) + log_error("Failed to create cgroup %s: %s", path, strerror(-r)); - LIST_FOREACH(by_unit, b, first) { - r = cgroup_bonding_set_group_access(b, mode, uid, gid); + /* Then, possibly move things over */ + if (u->cgroup_path && !streq(path, u->cgroup_path)) { + r = cg_migrate_with_mask(mask, u->cgroup_path, path); if (r < 0) - return r; + log_error("Failed to migrate cgroup %s: %s", path, strerror(-r)); } + /* And remember the new data */ + free(u->cgroup_path); + u->cgroup_path = path; + u->cgroup_realized = true; + u->cgroup_mask = mask; + return 0; } -int cgroup_bonding_set_task_access(CGroupBonding *b, mode_t mode, uid_t uid, gid_t gid, int sticky) { - assert(b); +static void unit_realize_cgroup_now(Unit *u) { + CGroupControllerMask mask; - if (!b->realized) - return -EINVAL; + assert(u); - return cg_set_task_access(b->controller, b->path, mode, uid, gid, sticky); -} + if (u->in_cgroup_queue) { + LIST_REMOVE(Unit, cgroup_queue, u->manager->cgroup_queue, u); + u->in_cgroup_queue = false; + } -int cgroup_bonding_set_task_access_list(CGroupBonding *first, mode_t mode, uid_t uid, gid_t gid, int sticky) { - CGroupBonding *b; - int r; + mask = unit_get_cgroup_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u); + mask &= u->manager->cgroup_supported; - LIST_FOREACH(by_unit, b, first) { - r = cgroup_bonding_set_task_access(b, mode, uid, gid, sticky); - if (r < 0) - return r; - } + if (u->cgroup_realized && + u->cgroup_mask == mask) + return; - return 0; + /* First, realize parents */ + if (UNIT_ISSET(u->slice)) + unit_realize_cgroup_now(UNIT_DEREF(u->slice)); + + /* And then do the real work */ + unit_create_cgroups(u, mask); } -int cgroup_bonding_kill(CGroupBonding *b, int sig, bool sigcont, bool rem, Set *s, const char *cgroup_suffix) { - char *p = NULL; - const char *path; - int r; +static void unit_add_to_cgroup_queue(Unit *u) { - assert(b); - assert(sig >= 0); + if (u->in_cgroup_queue) + return; - /* Don't kill cgroups that aren't ours */ - if (!b->ours) - return 0; + LIST_PREPEND(Unit, cgroup_queue, u->manager->cgroup_queue, u); + u->in_cgroup_queue = true; +} - if (cgroup_suffix) { - p = strjoin(b->path, "/", cgroup_suffix, NULL); - if (!p) - return -ENOMEM; +unsigned manager_dispatch_cgroup_queue(Manager *m) { + Unit *i; + unsigned n = 0; - path = p; - } else - path = b->path; + while ((i = m->cgroup_queue)) { + assert(i->in_cgroup_queue); - r = cg_kill_recursive(b->controller, path, sig, sigcont, true, rem, s); - free(p); + unit_realize_cgroup_now(i); + cgroup_context_apply(unit_get_cgroup_context(i), i->cgroup_mask, i->cgroup_path); + n++; + } - return r; + return n; } -int cgroup_bonding_kill_list(CGroupBonding *first, int sig, bool sigcont, bool rem, Set *s, const char *cgroup_suffix) { - CGroupBonding *b; - Set *allocated_set = NULL; - int ret = -EAGAIN, r; +static void unit_queue_siblings(Unit *u) { + Unit *slice; - if (!first) - return 0; + /* This adds the siblings of the specified unit and the + * siblings of all parent units to the cgroup queue. (But + * neither the specified unit itself nor the parents.) */ + + while ((slice = UNIT_DEREF(u->slice))) { + Iterator i; + Unit *m; - if (!s) - if (!(s = allocated_set = set_new(trivial_hash_func, trivial_compare_func))) - return -ENOMEM; + SET_FOREACH(m, slice->dependencies[UNIT_BEFORE], i) { + if (m == u) + continue; - LIST_FOREACH(by_unit, b, first) { - r = cgroup_bonding_kill(b, sig, sigcont, rem, s, cgroup_suffix); - if (r < 0) { - if (r == -EAGAIN || r == -ESRCH) + if (UNIT_DEREF(m->slice) != slice) continue; - ret = r; - goto finish; + unit_add_to_cgroup_queue(m); } - if (ret < 0 || r > 0) - ret = r; + u = slice; } +} + +void unit_realize_cgroup(Unit *u) { + CGroupContext *c; + + assert(u); + + c = unit_get_cgroup_context(u); + if (!c) + return; -finish: - if (allocated_set) - set_free(allocated_set); + /* So, here's the deal: when realizing the cgroups for this + * unit, we need to first create all parents, but there's more + * actually: for the weight-based controllers we also need to + * make sure that all our siblings (i.e. units that are in the + * same slice as we are) have cgroup too. Otherwise things + * would become very uneven as each of their processes would + * get as much resources as all our group together. This call + * will synchronously create the parent cgroups, but will + * defer work on the siblings to the next event loop + * iteration. */ - return ret; + /* Add all sibling slices to the cgroup queue. */ + unit_queue_siblings(u); + + /* And realize this one now */ + unit_realize_cgroup_now(u); + + /* And apply the values */ + cgroup_context_apply(c, u->cgroup_mask, u->cgroup_path); } -/* Returns 1 if the group is empty, 0 if it is not, -EAGAIN if we - * cannot know */ -int cgroup_bonding_is_empty(CGroupBonding *b) { +void unit_destroy_cgroup(Unit *u) { int r; - assert(b); + assert(u); - if ((r = cg_is_empty_recursive(b->controller, b->path, true)) < 0) - return r; + if (!u->cgroup_path) + return; - /* If it is empty it is empty */ - if (r > 0) - return 1; + r = cg_trim_with_mask(u->cgroup_mask, u->cgroup_path, true); + if (r < 0) + log_error("Failed to destroy cgroup %s: %s", u->cgroup_path, strerror(-r)); - /* It's not only us using this cgroup, so we just don't know */ - return b->ours ? 0 : -EAGAIN; + free(u->cgroup_path); + u->cgroup_path = NULL; + u->cgroup_realized = false; + u->cgroup_mask = 0; } -int cgroup_bonding_is_empty_list(CGroupBonding *first) { - CGroupBonding *b; +pid_t unit_search_main_pid(Unit *u) { + _cleanup_fclose_ FILE *f = NULL; + pid_t pid = 0, npid, mypid; + + assert(u); + + if (!u->cgroup_path) + return 0; + + if (cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f) < 0) + return 0; + + mypid = getpid(); + while (cg_read_pid(f, &npid) > 0) { + pid_t ppid; + + if (npid == pid) + continue; - LIST_FOREACH(by_unit, b, first) { - int r; + /* Ignore processes that aren't our kids */ + if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid) + continue; - r = cgroup_bonding_is_empty(b); - if (r < 0) { - /* If this returned -EAGAIN, then we don't know if the - * group is empty, so let's see if another group can - * tell us */ + if (pid != 0) { + /* Dang, there's more than one daemonized PID + in this group, so we don't know what process + is the main process. */ + pid = 0; + break; + } - if (r != -EAGAIN) - return r; - } else - return r; + pid = npid; } - return -EAGAIN; + return pid; } int manager_setup_cgroup(Manager *m) { @@ -394,8 +643,8 @@ int manager_setup_cgroup(Manager *m) { return -errno; } - /* 6. Remove non-existing controllers from the default controllers list */ - cg_shorten_controllers(m->default_controllers); + /* 6. Figure out which controllers are supported */ + m->cgroup_supported = cg_mask_supported(); return 0; } @@ -417,201 +666,71 @@ void manager_shutdown_cgroup(Manager *m, bool delete) { m->cgroup_root = NULL; } -int cgroup_bonding_get(Manager *m, const char *cgroup, CGroupBonding **bonding) { - CGroupBonding *b; +Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) { char *p; + Unit *u; assert(m); assert(cgroup); - assert(bonding); - b = hashmap_get(m->cgroup_bondings, cgroup); - if (b) { - *bonding = b; - return 1; - } + u = hashmap_get(m->cgroup_unit, cgroup); + if (u) + return u; p = strdupa(cgroup); - if (!p) - return -ENOMEM; - for (;;) { char *e; e = strrchr(p, '/'); - if (e == p || !e) { - *bonding = NULL; - return 0; - } + if (e == p || !e) + return NULL; *e = 0; - b = hashmap_get(m->cgroup_bondings, p); - if (b) { - *bonding = b; - return 1; - } + u = hashmap_get(m->cgroup_unit, p); + if (u) + return u; } } -int cgroup_notify_empty(Manager *m, const char *group) { - CGroupBonding *l, *b; +Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) { + _cleanup_free_ char *cgroup = NULL; int r; assert(m); - assert(group); - - r = cgroup_bonding_get(m, group, &l); - if (r <= 0) - return r; - - LIST_FOREACH(by_path, b, l) { - int t; - - if (!b->unit) - continue; - - t = cgroup_bonding_is_empty_list(b); - if (t < 0) { - - /* If we don't know, we don't know */ - if (t != -EAGAIN) - log_warning("Failed to check whether cgroup is empty: %s", strerror(errno)); - - continue; - } - - if (t > 0) { - /* If it is empty, let's delete it */ - cgroup_bonding_trim_list(b->unit->cgroup_bondings, true); - - if (UNIT_VTABLE(b->unit)->cgroup_notify_empty) - UNIT_VTABLE(b->unit)->cgroup_notify_empty(b->unit); - } - } - - return 0; -} - -Unit* cgroup_unit_by_pid(Manager *m, pid_t pid) { - CGroupBonding *l, *b; - char *group = NULL; - - assert(m); if (pid <= 1) return NULL; - if (cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &group) < 0) - return NULL; - - l = hashmap_get(m->cgroup_bondings, group); - - if (!l) { - char *slash; - - while ((slash = strrchr(group, '/'))) { - if (slash == group) - break; - - *slash = 0; - - if ((l = hashmap_get(m->cgroup_bondings, group))) - break; - } - } - - free(group); - - LIST_FOREACH(by_path, b, l) { - - if (!b->unit) - continue; - - if (b->ours) - return b->unit; - } - - return NULL; -} - -CGroupBonding *cgroup_bonding_find_list(CGroupBonding *first, const char *controller) { - CGroupBonding *b; - - if (!controller) - controller = SYSTEMD_CGROUP_CONTROLLER; - - LIST_FOREACH(by_unit, b, first) - if (streq(b->controller, controller)) - return b; - - return NULL; -} - -char *cgroup_bonding_to_string(CGroupBonding *b) { - char *r; - - assert(b); - - if (asprintf(&r, "%s:%s", b->controller, b->path) < 0) + r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup); + if (r < 0) return NULL; - return r; + return manager_get_unit_by_cgroup(m, cgroup); } -pid_t cgroup_bonding_search_main_pid(CGroupBonding *b) { - FILE *f; - pid_t pid = 0, npid, mypid; - - assert(b); +int manager_notify_cgroup_empty(Manager *m, const char *cgroup) { + Unit *u; + int r; - if (!b->ours) - return 0; + assert(m); + assert(cgroup); - if (cg_enumerate_processes(b->controller, b->path, &f) < 0) + r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, cgroup, true); + if (r == 0) return 0; - mypid = getpid(); + u = manager_get_unit_by_cgroup(m, cgroup); + if (u && UNIT_VTABLE(u)->notify_cgroup_empty) + UNIT_VTABLE(u)->notify_cgroup_empty(u); - while (cg_read_pid(f, &npid) > 0) { - pid_t ppid; - - if (npid == pid) - continue; - - /* Ignore processes that aren't our kids */ - if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid) - continue; - - if (pid != 0) { - /* Dang, there's more than one daemonized PID - in this group, so we don't know what process - is the main process. */ - pid = 0; - break; - } - - pid = npid; - } - - fclose(f); - - return pid; + return 0; } -pid_t cgroup_bonding_search_main_pid_list(CGroupBonding *first) { - CGroupBonding *b; - pid_t pid; - - /* Try to find a main pid from this cgroup, but checking if - * there's only one PID in the cgroup and returning it. Later - * on we might want to add additional, smarter heuristics - * here. */ - - LIST_FOREACH(by_unit, b, first) - if ((pid = cgroup_bonding_search_main_pid(b)) != 0) - return pid; +static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = { + [CGROUP_AUTO] = "auto", + [CGROUP_CLOSED] = "closed", + [CGROUP_STRICT] = "strict", +}; - return 0; - -} +DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy); diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 6555d89e37..96f1d9f7b6 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -5,7 +5,7 @@ /*** This file is part of systemd. - Copyright 2010 Lennart Poettering + Copyright 2013 Lennart Poettering systemd is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by @@ -21,74 +21,96 @@ along with systemd; If not, see <http://www.gnu.org/licenses/>. ***/ -typedef struct CGroupBonding CGroupBonding; +#include "list.h" -#include "unit.h" +typedef struct CGroupContext CGroupContext; +typedef struct CGroupDeviceAllow CGroupDeviceAllow; +typedef struct CGroupBlockIODeviceWeight CGroupBlockIODeviceWeight; +typedef struct CGroupBlockIODeviceBandwidth CGroupBlockIODeviceBandwidth; -/* Binds a cgroup to a name */ -struct CGroupBonding { - char *controller; - char *path; +typedef enum CGroupDevicePolicy { - Unit *unit; + /* When devices listed, will allow those, plus built-in ones, + if none are listed will allow everything. */ + CGROUP_AUTO, - /* For the Unit::cgroup_bondings list */ - LIST_FIELDS(CGroupBonding, by_unit); + /* Everything forbidden, except built-in ones and listed ones. */ + CGROUP_CLOSED, - /* For the Manager::cgroup_bondings hashmap */ - LIST_FIELDS(CGroupBonding, by_path); + /* Everythings forbidden, except for the listed devices */ + CGROUP_STRICT, - /* When shutting down, remove cgroup? Are our own tasks the - * only ones in this group?*/ - bool ours:1; + _CGROUP_DEVICE_POLICY_MAX, + _CGROUP_DEVICE_POLICY_INVALID = -1 +} CGroupDevicePolicy; - /* If we cannot create this group, or add a process to it, is this fatal? */ - bool essential:1; +struct CGroupDeviceAllow { + LIST_FIELDS(CGroupDeviceAllow, device_allow); + char *path; + bool r:1; + bool w:1; + bool m:1; +}; - /* This cgroup is realized */ - bool realized:1; +struct CGroupBlockIODeviceWeight { + LIST_FIELDS(CGroupBlockIODeviceWeight, device_weights); + char *path; + unsigned long weight; }; -int cgroup_bonding_realize(CGroupBonding *b); -int cgroup_bonding_realize_list(CGroupBonding *first); +struct CGroupBlockIODeviceBandwidth { + LIST_FIELDS(CGroupBlockIODeviceBandwidth, device_bandwidths); + char *path; + uint64_t bandwidth; + bool read; +}; -void cgroup_bonding_free(CGroupBonding *b, bool trim); -void cgroup_bonding_free_list(CGroupBonding *first, bool trim); +struct CGroupContext { + bool cpu_accounting; + bool blockio_accounting; + bool memory_accounting; -int cgroup_bonding_install(CGroupBonding *b, pid_t pid, const char *suffix); -int cgroup_bonding_install_list(CGroupBonding *first, pid_t pid, const char *suffix); + unsigned long cpu_shares; -int cgroup_bonding_migrate(CGroupBonding *b, CGroupBonding *list); -int cgroup_bonding_migrate_to(CGroupBonding *b, const char *target, bool rem); + unsigned long blockio_weight; + LIST_HEAD(CGroupBlockIODeviceWeight, blockio_device_weights); + LIST_HEAD(CGroupBlockIODeviceBandwidth, blockio_device_bandwidths); -int cgroup_bonding_set_group_access(CGroupBonding *b, mode_t mode, uid_t uid, gid_t gid); -int cgroup_bonding_set_group_access_list(CGroupBonding *b, mode_t mode, uid_t uid, gid_t gid); + uint64_t memory_limit; + uint64_t memory_soft_limit; -int cgroup_bonding_set_task_access(CGroupBonding *b, mode_t mode, uid_t uid, gid_t gid, int sticky); -int cgroup_bonding_set_task_access_list(CGroupBonding *b, mode_t mode, uid_t uid, gid_t gid, int sticky); + CGroupDevicePolicy device_policy; + LIST_HEAD(CGroupDeviceAllow, device_allow); +}; -int cgroup_bonding_kill(CGroupBonding *b, int sig, bool sigcont, bool rem, Set *s, const char *suffix); -int cgroup_bonding_kill_list(CGroupBonding *first, int sig, bool sigcont, bool rem, Set *s, const char *suffix); +#include "unit.h" +#include "manager.h" +#include "cgroup-util.h" -void cgroup_bonding_trim(CGroupBonding *first, bool delete_root); -void cgroup_bonding_trim_list(CGroupBonding *first, bool delete_root); +void cgroup_context_init(CGroupContext *c); +void cgroup_context_done(CGroupContext *c); +void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix); +void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const char *path); +CGroupControllerMask cgroup_context_get_mask(CGroupContext *c); -int cgroup_bonding_is_empty(CGroupBonding *b); -int cgroup_bonding_is_empty_list(CGroupBonding *first); +void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a); +void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w); +void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b); -CGroupBonding *cgroup_bonding_find_list(CGroupBonding *first, const char *controller) _pure_; +void unit_realize_cgroup(Unit *u); +void unit_destroy_cgroup(Unit *u); -char *cgroup_bonding_to_string(CGroupBonding *b); +int manager_setup_cgroup(Manager *m); +void manager_shutdown_cgroup(Manager *m, bool delete); -pid_t cgroup_bonding_search_main_pid(CGroupBonding *b); -pid_t cgroup_bonding_search_main_pid_list(CGroupBonding *b); +unsigned manager_dispatch_cgroup_queue(Manager *m); -#include "manager.h" +Unit *manager_get_unit_by_cgroup(Manager *m, const char *cgroup); +Unit* manager_get_unit_by_pid(Manager *m, pid_t pid); -int manager_setup_cgroup(Manager *m); -void manager_shutdown_cgroup(Manager *m, bool delete); +pid_t unit_search_main_pid(Unit *u); -int cgroup_bonding_get(Manager *m, const char *cgroup, CGroupBonding **bonding); -int cgroup_notify_empty(Manager *m, const char *group); +int manager_notify_cgroup_empty(Manager *m, const char *group); -Unit* cgroup_unit_by_pid(Manager *m, pid_t pid); +const char* cgroup_device_policy_to_string(CGroupDevicePolicy i) _const_; +CGroupDevicePolicy cgroup_device_policy_from_string(const char *s) _pure_; diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c new file mode 100644 index 0000000000..08ee9c8db4 --- /dev/null +++ b/src/core/dbus-cgroup.c @@ -0,0 +1,139 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2013 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <dbus/dbus.h> + +#include "dbus-cgroup.h" + +static DEFINE_BUS_PROPERTY_APPEND_ENUM(bus_cgroup_append_device_policy, cgroup_device_policy, CGroupDevicePolicy); + +static int bus_cgroup_append_device_weights(DBusMessageIter *i, const char *property, void *data) { + DBusMessageIter sub, sub2; + CGroupContext *c = data; + CGroupBlockIODeviceWeight *w; + + assert(i); + assert(property); + assert(c); + + if (!dbus_message_iter_open_container(i, DBUS_TYPE_ARRAY, "(st)", &sub)) + return -ENOMEM; + + LIST_FOREACH(device_weights, w, c->blockio_device_weights) { + + if (!dbus_message_iter_open_container(&sub, DBUS_TYPE_STRUCT, NULL, &sub2) || + !dbus_message_iter_append_basic(&sub2, DBUS_TYPE_STRING, &w->path) || + !dbus_message_iter_append_basic(&sub2, DBUS_TYPE_UINT64, &w->weight) || + !dbus_message_iter_close_container(&sub, &sub2)) + return -ENOMEM; + } + + if (!dbus_message_iter_close_container(i, &sub)) + return -ENOMEM; + + return 0; +} + +static int bus_cgroup_append_device_bandwidths(DBusMessageIter *i, const char *property, void *data) { + DBusMessageIter sub, sub2; + CGroupContext *c = data; + CGroupBlockIODeviceBandwidth *b; + + assert(i); + assert(property); + assert(c); + + if (!dbus_message_iter_open_container(i, DBUS_TYPE_ARRAY, "(st)", &sub)) + return -ENOMEM; + + LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) { + + if (streq(property, "BlockIOReadBandwidth") != b->read) + continue; + + if (!dbus_message_iter_open_container(&sub, DBUS_TYPE_STRUCT, NULL, &sub2) || + !dbus_message_iter_append_basic(&sub2, DBUS_TYPE_STRING, &b->path) || + !dbus_message_iter_append_basic(&sub2, DBUS_TYPE_UINT64, &b->bandwidth) || + !dbus_message_iter_close_container(&sub, &sub2)) + return -ENOMEM; + } + + if (!dbus_message_iter_close_container(i, &sub)) + return -ENOMEM; + + return 0; +} + +static int bus_cgroup_append_device_allow(DBusMessageIter *i, const char *property, void *data) { + DBusMessageIter sub, sub2; + CGroupContext *c = data; + CGroupDeviceAllow *a; + + assert(i); + assert(property); + assert(c); + + if (!dbus_message_iter_open_container(i, DBUS_TYPE_ARRAY, "(ss)", &sub)) + return -ENOMEM; + + LIST_FOREACH(device_allow, a, c->device_allow) { + const char *rwm; + char buf[4]; + unsigned k = 0; + + if (a->r) + buf[k++] = 'r'; + if (a->w) + buf[k++] = 'w'; + if (a->m) + buf[k++] = 'm'; + + buf[k] = 0; + rwm = buf; + + if (!dbus_message_iter_open_container(&sub, DBUS_TYPE_STRUCT, NULL, &sub2) || + !dbus_message_iter_append_basic(&sub2, DBUS_TYPE_STRING, &a->path) || + !dbus_message_iter_append_basic(&sub2, DBUS_TYPE_STRING, &rwm) || + !dbus_message_iter_close_container(&sub, &sub2)) + return -ENOMEM; + } + + if (!dbus_message_iter_close_container(i, &sub)) + return -ENOMEM; + + return 0; +} + +const BusProperty bus_cgroup_context_properties[] = { + { "CPUAccounting", bus_property_append_bool, "b", offsetof(CGroupContext, cpu_accounting) }, + { "CPUShares", bus_property_append_ul, "t", offsetof(CGroupContext, cpu_shares) }, + { "BlockIOAccounting", bus_property_append_bool, "b", offsetof(CGroupContext, blockio_accounting) }, + { "BlockIOWeight", bus_property_append_ul, "t", offsetof(CGroupContext, blockio_weight) }, + { "BlockIODeviceWeight", bus_cgroup_append_device_weights, "a(st)", 0 }, + { "BlockIOReadBandwidth", bus_cgroup_append_device_bandwidths, "a(st)", 0 }, + { "BlockIOWriteBandwidth", bus_cgroup_append_device_bandwidths, "a(st)", 0 }, + { "MemoryAccounting", bus_property_append_bool, "b", offsetof(CGroupContext, memory_accounting) }, + { "MemoryLimit", bus_property_append_uint64, "t", offsetof(CGroupContext, memory_limit) }, + { "MemorySoftLimit", bus_property_append_uint64, "t", offsetof(CGroupContext, memory_soft_limit) }, + { "DevicePolicy", bus_cgroup_append_device_policy, "s", offsetof(CGroupContext, device_policy) }, + { "DeviceAllow", bus_cgroup_append_device_allow, "a(ss)", 0 }, + {} +}; diff --git a/src/core/dbus-cgroup.h b/src/core/dbus-cgroup.h new file mode 100644 index 0000000000..a0a7a7771e --- /dev/null +++ b/src/core/dbus-cgroup.h @@ -0,0 +1,44 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +#pragma once + +/*** + This file is part of systemd. + + Copyright 2013 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <dbus/dbus.h> + +#include "manager.h" +#include "dbus-common.h" +#include "cgroup.h" + +#define BUS_CGROUP_CONTEXT_INTERFACE \ + " <property name=\"CPUAccounting\" type=\"b\" access=\"read\"/>\n" \ + " <property name=\"CPUShares\" type=\"t\" access=\"read\"/>\n" \ + " <property name=\"BlockIOAccounting\" type=\"b\" access=\"read\"/>\n" \ + " <property name=\"BlockIOWeight\" type=\"t\" access=\"read\"/>\n" \ + " <property name=\"BlockIODeviceWeight\" type=\"a(st)\" access=\"read\"/>\n" \ + " <property name=\"BlockIOReadBandwidth=\" type=\"a(st)\" access=\"read\"/>\n" \ + " <property name=\"BlockIOWriteBandwidth=\" type=\"a(st)\" access=\"read\"/>\n" \ + " <property name=\"MemoryAccounting\" type=\"b\" access=\"read\"/>\n" \ + " <property name=\"MemoryLimit\" type=\"t\" access=\"read\"/>\n" \ + " <property name=\"MemorySoftLimit\" type=\"t\" access=\"read\"/>\n" \ + " <property name=\"DevicePolicy\" type=\"s\" access=\"read\"/>\n" \ + " <property name=\"DeviceAllow\" type=\"a(ss)\" access=\"read\"/>\n" + +extern const BusProperty bus_cgroup_context_properties[]; diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 2a8a0e1ac5..73590c82be 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -31,10 +31,10 @@ #include "syscall-list.h" #include "fileio.h" -DEFINE_BUS_PROPERTY_APPEND_ENUM(bus_execute_append_input, exec_input, ExecInput); -DEFINE_BUS_PROPERTY_APPEND_ENUM(bus_execute_append_output, exec_output, ExecOutput); +static DEFINE_BUS_PROPERTY_APPEND_ENUM(bus_execute_append_input, exec_input, ExecInput); +static DEFINE_BUS_PROPERTY_APPEND_ENUM(bus_execute_append_output, exec_output, ExecOutput); -int bus_execute_append_env_files(DBusMessageIter *i, const char *property, void *data) { +static int bus_execute_append_env_files(DBusMessageIter *i, const char *property, void *data) { char **env_files = data, **j; DBusMessageIter sub, sub2; @@ -66,7 +66,7 @@ int bus_execute_append_env_files(DBusMessageIter *i, const char *property, void return 0; } -int bus_execute_append_oom_score_adjust(DBusMessageIter *i, const char *property, void *data) { +static int bus_execute_append_oom_score_adjust(DBusMessageIter *i, const char *property, void *data) { ExecContext *c = data; int32_t n; @@ -92,7 +92,7 @@ int bus_execute_append_oom_score_adjust(DBusMessageIter *i, const char *property return 0; } -int bus_execute_append_nice(DBusMessageIter *i, const char *property, void *data) { +static int bus_execute_append_nice(DBusMessageIter *i, const char *property, void *data) { ExecContext *c = data; int32_t n; @@ -111,7 +111,7 @@ int bus_execute_append_nice(DBusMessageIter *i, const char *property, void *data return 0; } -int bus_execute_append_ioprio(DBusMessageIter *i, const char *property, void *data) { +static int bus_execute_append_ioprio(DBusMessageIter *i, const char *property, void *data) { ExecContext *c = data; int32_t n; @@ -130,7 +130,7 @@ int bus_execute_append_ioprio(DBusMessageIter *i, const char *property, void *da return 0; } -int bus_execute_append_cpu_sched_policy(DBusMessageIter *i, const char *property, void *data) { +static int bus_execute_append_cpu_sched_policy(DBusMessageIter *i, const char *property, void *data) { ExecContext *c = data; int32_t n; @@ -149,7 +149,7 @@ int bus_execute_append_cpu_sched_policy(DBusMessageIter *i, const char *property return 0; } -int bus_execute_append_cpu_sched_priority(DBusMessageIter *i, const char *property, void *data) { +static int bus_execute_append_cpu_sched_priority(DBusMessageIter *i, const char *property, void *data) { ExecContext *c = data; int32_t n; @@ -174,7 +174,7 @@ int bus_execute_append_cpu_sched_priority(DBusMessageIter *i, const char *proper return 0; } -int bus_execute_append_affinity(DBusMessageIter *i, const char *property, void *data) { +static int bus_execute_append_affinity(DBusMessageIter *i, const char *property, void *data) { ExecContext *c = data; dbus_bool_t b; DBusMessageIter sub; @@ -200,7 +200,7 @@ int bus_execute_append_affinity(DBusMessageIter *i, const char *property, void * return 0; } -int bus_execute_append_timer_slack_nsec(DBusMessageIter *i, const char *property, void *data) { +static int bus_execute_append_timer_slack_nsec(DBusMessageIter *i, const char *property, void *data) { ExecContext *c = data; uint64_t u; @@ -219,7 +219,7 @@ int bus_execute_append_timer_slack_nsec(DBusMessageIter *i, const char *property return 0; } -int bus_execute_append_capability_bs(DBusMessageIter *i, const char *property, void *data) { +static int bus_execute_append_capability_bs(DBusMessageIter *i, const char *property, void *data) { ExecContext *c = data; uint64_t normal, inverted; @@ -236,7 +236,7 @@ int bus_execute_append_capability_bs(DBusMessageIter *i, const char *property, v return bus_property_append_uint64(i, property, &inverted); } -int bus_execute_append_capabilities(DBusMessageIter *i, const char *property, void *data) { +static int bus_execute_append_capabilities(DBusMessageIter *i, const char *property, void *data) { ExecContext *c = data; char *t = NULL; const char *s; @@ -265,7 +265,7 @@ int bus_execute_append_capabilities(DBusMessageIter *i, const char *property, vo return 0; } -int bus_execute_append_rlimits(DBusMessageIter *i, const char *property, void *data) { +static int bus_execute_append_rlimits(DBusMessageIter *i, const char *property, void *data) { ExecContext *c = data; int r; uint64_t u; @@ -347,7 +347,7 @@ int bus_execute_append_command(DBusMessageIter *i, const char *property, void *d return 0; } -int bus_execute_append_syscall_filter(DBusMessageIter *i, const char *property, void *data) { +static int bus_execute_append_syscall_filter(DBusMessageIter *i, const char *property, void *data) { ExecContext *c = data; dbus_bool_t b; DBusMessageIter sub; @@ -430,10 +430,8 @@ const BusProperty bus_exec_context_properties[] = { { "PrivateNetwork", bus_property_append_bool, "b", offsetof(ExecContext, private_network) }, { "SameProcessGroup", bus_property_append_bool, "b", offsetof(ExecContext, same_pgrp) }, { "UtmpIdentifier", bus_property_append_string, "s", offsetof(ExecContext, utmp_id), true }, - { "ControlGroupModify", bus_property_append_bool, "b", offsetof(ExecContext, control_group_modify) }, - { "ControlGroupPersistent", bus_property_append_tristate_false, "b", offsetof(ExecContext, control_group_persistent) }, { "IgnoreSIGPIPE", bus_property_append_bool, "b", offsetof(ExecContext, ignore_sigpipe) }, { "NoNewPrivileges", bus_property_append_bool, "b", offsetof(ExecContext, no_new_privileges) }, { "SystemCallFilter", bus_execute_append_syscall_filter, "au", 0 }, - { NULL, } + {} }; diff --git a/src/core/dbus-execute.h b/src/core/dbus-execute.h index 91d70e535f..5a6a559c99 100644 --- a/src/core/dbus-execute.h +++ b/src/core/dbus-execute.h @@ -92,8 +92,6 @@ " <property name=\"PrivateNetwork\" type=\"b\" access=\"read\"/>\n" \ " <property name=\"SameProcessGroup\" type=\"b\" access=\"read\"/>\n" \ " <property name=\"UtmpIdentifier\" type=\"s\" access=\"read\"/>\n" \ - " <property name=\"ControlGroupModify\" type=\"b\" access=\"read\"/>\n" \ - " <property name=\"ControlGroupPersistent\" type=\"b\" access=\"read\"/>\n" \ " <property name=\"IgnoreSIGPIPE\" type=\"b\" access=\"read\"/>\n" \ " <property name=\"NoNewPrivileges\" type=\"b\" access=\"read\"/>\n" \ " <property name=\"SystemCallFilter\" type=\"au\" access=\"read\"/>\n" @@ -106,18 +104,4 @@ extern const BusProperty bus_exec_context_properties[]; #define BUS_EXEC_COMMAND_PROPERTY(name, command, indirect) \ { name, bus_execute_append_command, "a(sasbttttuii)", (command), (indirect), NULL } -int bus_execute_append_output(DBusMessageIter *i, const char *property, void *data); -int bus_execute_append_input(DBusMessageIter *i, const char *property, void *data); -int bus_execute_append_oom_score_adjust(DBusMessageIter *i, const char *property, void *data); -int bus_execute_append_nice(DBusMessageIter *i, const char *property, void *data); -int bus_execute_append_ioprio(DBusMessageIter *i, const char *property, void *data); -int bus_execute_append_cpu_sched_policy(DBusMessageIter *i, const char *property, void *data); -int bus_execute_append_cpu_sched_priority(DBusMessageIter *i, const char *property, void *data); -int bus_execute_append_affinity(DBusMessageIter *i, const char *property, void *data); -int bus_execute_append_timer_slack_nsec(DBusMessageIter *i, const char *property, void *data); -int bus_execute_append_capabilities(DBusMessageIter *i, const char *property, void *data); -int bus_execute_append_capability_bs(DBusMessageIter *i, const char *property, void *data); -int bus_execute_append_rlimits(DBusMessageIter *i, const char *property, void *data); int bus_execute_append_command(DBusMessageIter *u, const char *property, void *data); -int bus_execute_append_env_files(DBusMessageIter *i, const char *property, void *data); -int bus_execute_append_syscall_filter(DBusMessageIter *i, const char *property, void *data); diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c index d41b6ae15f..c081ff5d16 100644 --- a/src/core/dbus-manager.c +++ b/src/core/dbus-manager.c @@ -613,7 +613,6 @@ static const BusProperty bus_manager_properties[] = { { "ConfirmSpawn", bus_property_append_bool, "b", offsetof(Manager, confirm_spawn) }, { "ShowStatus", bus_property_append_bool, "b", offsetof(Manager, show_status) }, { "UnitPath", bus_property_append_strv, "as", offsetof(Manager, lookup_paths.unit_path), true }, - { "DefaultControllers", bus_property_append_strv, "as", offsetof(Manager, default_controllers), true }, { "DefaultStandardOutput", bus_manager_append_exec_output, "s", offsetof(Manager, default_std_output) }, { "DefaultStandardError", bus_manager_append_exec_output, "s", offsetof(Manager, default_std_error) }, { "RuntimeWatchdogUSec", bus_property_append_usec, "t", offsetof(Manager, runtime_watchdog), false, bus_manager_set_runtime_watchdog_usec }, @@ -683,7 +682,7 @@ static DBusHandlerResult bus_manager_message_handler(DBusConnection *connection, DBUS_TYPE_INVALID)) return bus_send_error_reply(connection, message, &error, -EINVAL); - u = cgroup_unit_by_pid(m, (pid_t) pid); + u = manager_get_unit_by_pid(m, (pid_t) pid); if (!u) { dbus_set_error(&error, BUS_ERROR_NO_SUCH_UNIT, "No unit for PID %lu is loaded.", (unsigned long) pid); return bus_send_error_reply(connection, message, &error, -ENOENT); @@ -896,151 +895,6 @@ static DBusHandlerResult bus_manager_message_handler(DBusConnection *connection, if (!reply) goto oom; - } else if (dbus_message_is_method_call(message, "org.freedesktop.systemd1.Manager", "SetUnitControlGroup")) { - const char *name; - Unit *u; - DBusMessageIter iter; - - if (!dbus_message_iter_init(message, &iter)) - goto oom; - - r = bus_iter_get_basic_and_next(&iter, DBUS_TYPE_STRING, &name, true); - if (r < 0) - return bus_send_error_reply(connection, message, NULL, r); - - u = manager_get_unit(m, name); - if (!u) { - dbus_set_error(&error, BUS_ERROR_NO_SUCH_UNIT, "Unit %s is not loaded.", name); - return bus_send_error_reply(connection, message, &error, -ENOENT); - } - - SELINUX_UNIT_ACCESS_CHECK(u, connection, message, "start"); - - r = bus_unit_cgroup_set(u, &iter); - if (r < 0) - return bus_send_error_reply(connection, message, NULL, r); - - reply = dbus_message_new_method_return(message); - if (!reply) - goto oom; - - } else if (dbus_message_is_method_call(message, "org.freedesktop.systemd1.Manager", "UnsetUnitControlGroup")) { - const char *name; - Unit *u; - DBusMessageIter iter; - - if (!dbus_message_iter_init(message, &iter)) - goto oom; - - r = bus_iter_get_basic_and_next(&iter, DBUS_TYPE_STRING, &name, true); - if (r < 0) - return bus_send_error_reply(connection, message, NULL, r); - - u = manager_get_unit(m, name); - if (!u) { - dbus_set_error(&error, BUS_ERROR_NO_SUCH_UNIT, "Unit %s is not loaded.", name); - return bus_send_error_reply(connection, message, &error, -ENOENT); - } - - SELINUX_UNIT_ACCESS_CHECK(u, connection, message, "stop"); - - r = bus_unit_cgroup_unset(u, &iter); - if (r < 0) - return bus_send_error_reply(connection, message, NULL, r); - - reply = dbus_message_new_method_return(message); - if (!reply) - goto oom; - - } else if (dbus_message_is_method_call(message, "org.freedesktop.systemd1.Manager", "SetUnitControlGroupAttribute")) { - const char *name; - Unit *u; - DBusMessageIter iter; - - if (!dbus_message_iter_init(message, &iter)) - goto oom; - - r = bus_iter_get_basic_and_next(&iter, DBUS_TYPE_STRING, &name, true); - if (r < 0) - return bus_send_error_reply(connection, message, NULL, r); - - u = manager_get_unit(m, name); - if (!u) { - dbus_set_error(&error, BUS_ERROR_NO_SUCH_UNIT, "Unit %s is not loaded.", name); - return bus_send_error_reply(connection, message, &error, -ENOENT); - } - - SELINUX_UNIT_ACCESS_CHECK(u, connection, message, "start"); - - r = bus_unit_cgroup_attribute_set(u, &iter); - if (r < 0) - return bus_send_error_reply(connection, message, NULL, r); - - reply = dbus_message_new_method_return(message); - if (!reply) - goto oom; - - } else if (dbus_message_is_method_call(message, "org.freedesktop.systemd1.Manager", "UnsetUnitControlGroupAttribute")) { - const char *name; - Unit *u; - DBusMessageIter iter; - - if (!dbus_message_iter_init(message, &iter)) - goto oom; - - r = bus_iter_get_basic_and_next(&iter, DBUS_TYPE_STRING, &name, true); - if (r < 0) - return bus_send_error_reply(connection, message, NULL, r); - - u = manager_get_unit(m, name); - if (!u) { - dbus_set_error(&error, BUS_ERROR_NO_SUCH_UNIT, "Unit %s is not loaded.", name); - return bus_send_error_reply(connection, message, &error, -ENOENT); - } - - SELINUX_UNIT_ACCESS_CHECK(u, connection, message, "stop"); - - r = bus_unit_cgroup_attribute_unset(u, &iter); - if (r < 0) - return bus_send_error_reply(connection, message, NULL, r); - - reply = dbus_message_new_method_return(message); - if (!reply) - goto oom; - - } else if (dbus_message_is_method_call(message, "org.freedesktop.systemd1.Manager", "GetUnitControlGroupAttribute")) { - const char *name; - Unit *u; - DBusMessageIter iter; - _cleanup_strv_free_ char **list = NULL; - - if (!dbus_message_iter_init(message, &iter)) - goto oom; - - r = bus_iter_get_basic_and_next(&iter, DBUS_TYPE_STRING, &name, true); - if (r < 0) - return bus_send_error_reply(connection, message, NULL, r); - - u = manager_get_unit(m, name); - if (!u) { - dbus_set_error(&error, BUS_ERROR_NO_SUCH_UNIT, "Unit %s is not loaded.", name); - return bus_send_error_reply(connection, message, &error, -ENOENT); - } - - SELINUX_UNIT_ACCESS_CHECK(u, connection, message, "status"); - - r = bus_unit_cgroup_attribute_get(u, &iter, &list); - if (r < 0) - return bus_send_error_reply(connection, message, NULL, r); - - reply = dbus_message_new_method_return(message); - if (!reply) - goto oom; - - dbus_message_iter_init_append(reply, &iter); - if (bus_append_strv_iter(&iter, list) < 0) - goto oom; - } else if (dbus_message_is_method_call(message, "org.freedesktop.systemd1.Manager", "ListUnits")) { DBusMessageIter iter, sub; Iterator i; diff --git a/src/core/dbus-mount.c b/src/core/dbus-mount.c index 0fcceb500d..16b7afe8b7 100644 --- a/src/core/dbus-mount.c +++ b/src/core/dbus-mount.c @@ -22,11 +22,12 @@ #include <errno.h> #include "dbus-unit.h" -#include "dbus-mount.h" -#include "dbus-kill.h" #include "dbus-execute.h" +#include "dbus-kill.h" +#include "dbus-cgroup.h" #include "dbus-common.h" #include "selinux-access.h" +#include "dbus-mount.h" #define BUS_MOUNT_INTERFACE \ " <interface name=\"org.freedesktop.systemd1.Mount\">\n" \ @@ -40,7 +41,6 @@ BUS_EXEC_COMMAND_INTERFACE("ExecRemount") \ BUS_EXEC_CONTEXT_INTERFACE \ BUS_KILL_CONTEXT_INTERFACE \ - BUS_UNIT_CGROUP_INTERFACE \ " <property name=\"ControlPID\" type=\"u\" access=\"read\"/>\n" \ " <property name=\"DirectoryMode\" type=\"u\" access=\"read\"/>\n" \ " <property name=\"Result\" type=\"s\" access=\"read\"/>\n" \ @@ -156,11 +156,11 @@ DBusHandlerResult bus_mount_message_handler(Unit *u, DBusConnection *c, DBusMess Mount *m = MOUNT(u); const BusBoundProperties bps[] = { - { "org.freedesktop.systemd1.Unit", bus_unit_properties, u }, - { "org.freedesktop.systemd1.Mount", bus_mount_properties, m }, - { "org.freedesktop.systemd1.Mount", bus_exec_context_properties, &m->exec_context }, - { "org.freedesktop.systemd1.Mount", bus_kill_context_properties, &m->kill_context }, - { "org.freedesktop.systemd1.Mount", bus_unit_cgroup_properties, u }, + { "org.freedesktop.systemd1.Unit", bus_unit_properties, u }, + { "org.freedesktop.systemd1.Mount", bus_mount_properties, m }, + { "org.freedesktop.systemd1.Mount", bus_exec_context_properties, &m->exec_context }, + { "org.freedesktop.systemd1.Mount", bus_kill_context_properties, &m->kill_context }, + { "org.freedesktop.systemd1.Mount", bus_cgroup_context_properties, &m->cgroup_context }, { NULL, } }; diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c index 98bcdcb402..bbac1b8167 100644 --- a/src/core/dbus-service.c +++ b/src/core/dbus-service.c @@ -24,9 +24,10 @@ #include "dbus-unit.h" #include "dbus-execute.h" #include "dbus-kill.h" -#include "dbus-service.h" +#include "dbus-cgroup.h" #include "dbus-common.h" #include "selinux-access.h" +#include "dbus-service.h" #define BUS_SERVICE_INTERFACE \ " <interface name=\"org.freedesktop.systemd1.Service\">\n" \ @@ -50,7 +51,6 @@ BUS_EXEC_COMMAND_INTERFACE("ExecStopPost") \ BUS_EXEC_CONTEXT_INTERFACE \ BUS_KILL_CONTEXT_INTERFACE \ - BUS_UNIT_CGROUP_INTERFACE \ " <property name=\"PermissionsStartOnly\" type=\"b\" access=\"read\"/>\n" \ " <property name=\"RootDirectoryStartOnly\" type=\"b\" access=\"read\"/>\n" \ " <property name=\"RemainAfterExit\" type=\"b\" access=\"read\"/>\n" \ @@ -152,8 +152,8 @@ DBusHandlerResult bus_service_message_handler(Unit *u, DBusConnection *connectio { "org.freedesktop.systemd1.Service", bus_service_properties, s }, { "org.freedesktop.systemd1.Service", bus_exec_context_properties, &s->exec_context }, { "org.freedesktop.systemd1.Service", bus_kill_context_properties, &s->kill_context }, + { "org.freedesktop.systemd1.Service", bus_cgroup_context_properties, &s->cgroup_context }, { "org.freedesktop.systemd1.Service", bus_exec_main_status_properties, &s->main_exec_status }, - { "org.freedesktop.systemd1.Service", bus_unit_cgroup_properties, u }, { NULL, } }; diff --git a/src/core/dbus-slice.c b/src/core/dbus-slice.c index 8a318faa55..8243305848 100644 --- a/src/core/dbus-slice.c +++ b/src/core/dbus-slice.c @@ -22,13 +22,13 @@ #include <errno.h> #include "dbus-unit.h" -#include "dbus-slice.h" #include "dbus-common.h" +#include "dbus-cgroup.h" #include "selinux-access.h" +#include "dbus-slice.h" #define BUS_SLICE_INTERFACE \ " <interface name=\"org.freedesktop.systemd1.Slice\">\n" \ - BUS_UNIT_CGROUP_INTERFACE \ " </interface>\n" #define INTROSPECTION \ @@ -48,9 +48,11 @@ const char bus_slice_interface[] _introspect_("Slice") = BUS_SLICE_INTERFACE; DBusHandlerResult bus_slice_message_handler(Unit *u, DBusConnection *c, DBusMessage *message) { + Slice *s = SLICE(u); + const BusBoundProperties bps[] = { - { "org.freedesktop.systemd1.Unit", bus_unit_properties, u }, - { "org.freedesktop.systemd1.Slice", bus_unit_cgroup_properties, u }, + { "org.freedesktop.systemd1.Unit", bus_unit_properties, u }, + { "org.freedesktop.systemd1.Slice", bus_cgroup_context_properties, &s->cgroup_context }, { NULL, } }; diff --git a/src/core/dbus-socket.c b/src/core/dbus-socket.c index 973f905149..7ec4f33924 100644 --- a/src/core/dbus-socket.c +++ b/src/core/dbus-socket.c @@ -22,11 +22,12 @@ #include <errno.h> #include "dbus-unit.h" -#include "dbus-socket.h" #include "dbus-execute.h" #include "dbus-kill.h" +#include "dbus-cgroup.h" #include "dbus-common.h" #include "selinux-access.h" +#include "dbus-socket.h" #define BUS_SOCKET_INTERFACE \ " <interface name=\"org.freedesktop.systemd1.Socket\">\n" \ @@ -39,7 +40,6 @@ BUS_EXEC_COMMAND_INTERFACE("ExecStopPost") \ BUS_EXEC_CONTEXT_INTERFACE \ BUS_KILL_CONTEXT_INTERFACE \ - BUS_UNIT_CGROUP_INTERFACE \ " <property name=\"ControlPID\" type=\"u\" access=\"read\"/>\n" \ " <property name=\"BindToDevice\" type=\"s\" access=\"read\"/>\n" \ " <property name=\"DirectoryMode\" type=\"u\" access=\"read\"/>\n" \ @@ -201,11 +201,11 @@ static const BusProperty bus_socket_properties[] = { DBusHandlerResult bus_socket_message_handler(Unit *u, DBusConnection *c, DBusMessage *message) { Socket *s = SOCKET(u); const BusBoundProperties bps[] = { - { "org.freedesktop.systemd1.Unit", bus_unit_properties, u }, - { "org.freedesktop.systemd1.Socket", bus_socket_properties, s }, - { "org.freedesktop.systemd1.Socket", bus_exec_context_properties, &s->exec_context }, - { "org.freedesktop.systemd1.Socket", bus_kill_context_properties, &s->kill_context }, - { "org.freedesktop.systemd1.Socket", bus_unit_cgroup_properties, u }, + { "org.freedesktop.systemd1.Unit", bus_unit_properties, u }, + { "org.freedesktop.systemd1.Socket", bus_socket_properties, s }, + { "org.freedesktop.systemd1.Socket", bus_exec_context_properties, &s->exec_context }, + { "org.freedesktop.systemd1.Socket", bus_kill_context_properties, &s->kill_context }, + { "org.freedesktop.systemd1.Socket", bus_cgroup_context_properties, &s->cgroup_context }, { NULL, } }; diff --git a/src/core/dbus-swap.c b/src/core/dbus-swap.c index 2e99fba7db..4ae1cd08e0 100644 --- a/src/core/dbus-swap.c +++ b/src/core/dbus-swap.c @@ -23,11 +23,12 @@ #include <errno.h> #include "dbus-unit.h" -#include "dbus-swap.h" #include "dbus-execute.h" #include "dbus-kill.h" +#include "dbus-cgroup.h" #include "dbus-common.h" #include "selinux-access.h" +#include "dbus-swap.h" #define BUS_SWAP_INTERFACE \ " <interface name=\"org.freedesktop.systemd1.Swap\">\n" \ @@ -38,7 +39,6 @@ BUS_EXEC_COMMAND_INTERFACE("ExecDeactivate") \ BUS_EXEC_CONTEXT_INTERFACE \ BUS_KILL_CONTEXT_INTERFACE \ - BUS_UNIT_CGROUP_INTERFACE \ " <property name=\"ControlPID\" type=\"u\" access=\"read\"/>\n" \ " <property name=\"Result\" type=\"s\" access=\"read\"/>\n" \ " </interface>\n" @@ -103,11 +103,11 @@ static const BusProperty bus_swap_properties[] = { DBusHandlerResult bus_swap_message_handler(Unit *u, DBusConnection *c, DBusMessage *message) { Swap *s = SWAP(u); const BusBoundProperties bps[] = { - { "org.freedesktop.systemd1.Unit", bus_unit_properties, u }, - { "org.freedesktop.systemd1.Swap", bus_swap_properties, s }, - { "org.freedesktop.systemd1.Swap", bus_exec_context_properties, &s->exec_context }, - { "org.freedesktop.systemd1.Swap", bus_kill_context_properties, &s->kill_context }, - { "org.freedesktop.systemd1.Swap", bus_unit_cgroup_properties, u }, + { "org.freedesktop.systemd1.Unit", bus_unit_properties, u }, + { "org.freedesktop.systemd1.Swap", bus_swap_properties, s }, + { "org.freedesktop.systemd1.Swap", bus_exec_context_properties, &s->exec_context }, + { "org.freedesktop.systemd1.Swap", bus_kill_context_properties, &s->kill_context }, + { "org.freedesktop.systemd1.Swap", bus_cgroup_context_properties, &s->cgroup_context }, { NULL, } }; diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c index 8a7ab349d1..cbd41342f4 100644 --- a/src/core/dbus-unit.c +++ b/src/core/dbus-unit.c @@ -295,90 +295,6 @@ static int bus_unit_append_job(DBusMessageIter *i, const char *property, void *d return 0; } -static int bus_unit_append_default_cgroup(DBusMessageIter *i, const char *property, void *data) { - Unit *u = data; - char *t; - CGroupBonding *cgb; - bool success; - - assert(i); - assert(property); - assert(u); - - cgb = unit_get_default_cgroup(u); - if (cgb) { - t = cgroup_bonding_to_string(cgb); - if (!t) - return -ENOMEM; - } else - t = (char*) ""; - - success = dbus_message_iter_append_basic(i, DBUS_TYPE_STRING, &t); - - if (cgb) - free(t); - - return success ? 0 : -ENOMEM; -} - -static int bus_unit_append_cgroups(DBusMessageIter *i, const char *property, void *data) { - Unit *u = data; - CGroupBonding *cgb; - DBusMessageIter sub; - - if (!dbus_message_iter_open_container(i, DBUS_TYPE_ARRAY, "s", &sub)) - return -ENOMEM; - - LIST_FOREACH(by_unit, cgb, u->cgroup_bondings) { - _cleanup_free_ char *t = NULL; - bool success; - - t = cgroup_bonding_to_string(cgb); - if (!t) - return -ENOMEM; - - success = dbus_message_iter_append_basic(&sub, DBUS_TYPE_STRING, &t); - if (!success) - return -ENOMEM; - } - - if (!dbus_message_iter_close_container(i, &sub)) - return -ENOMEM; - - return 0; -} - -static int bus_unit_append_cgroup_attrs(DBusMessageIter *i, const char *property, void *data) { - Unit *u = data; - CGroupAttribute *a; - DBusMessageIter sub, sub2; - - if (!dbus_message_iter_open_container(i, DBUS_TYPE_ARRAY, "(sss)", &sub)) - return -ENOMEM; - - LIST_FOREACH(by_unit, a, u->cgroup_attributes) { - _cleanup_free_ char *v = NULL; - bool success; - - if (a->semantics && a->semantics->map_write) - a->semantics->map_write(a->semantics, a->value, &v); - - success = - dbus_message_iter_open_container(&sub, DBUS_TYPE_STRUCT, NULL, &sub2) && - dbus_message_iter_append_basic(&sub2, DBUS_TYPE_STRING, &a->controller) && - dbus_message_iter_append_basic(&sub2, DBUS_TYPE_STRING, &a->name) && - dbus_message_iter_append_basic(&sub2, DBUS_TYPE_STRING, v ? &v : &a->value) && - dbus_message_iter_close_container(&sub, &sub2); - if (!success) - return -ENOMEM; - } - - if (!dbus_message_iter_close_container(i, &sub)) - return -ENOMEM; - - return 0; -} - static int bus_unit_append_need_daemon_reload(DBusMessageIter *i, const char *property, void *data) { Unit *u = data; dbus_bool_t b; @@ -488,90 +404,6 @@ static DBusHandlerResult bus_unit_message_dispatch(Unit *u, DBusConnection *conn if (!reply) goto oom; - } else if (streq_ptr(dbus_message_get_member(message), "SetControlGroup")) { - DBusMessageIter iter; - - SELINUX_UNIT_ACCESS_CHECK(u, connection, message, "start"); - - if (!dbus_message_iter_init(message, &iter)) - goto oom; - - r = bus_unit_cgroup_set(u, &iter); - if (r < 0) - return bus_send_error_reply(connection, message, NULL, r); - - reply = dbus_message_new_method_return(message); - if (!reply) - goto oom; - - } else if (streq_ptr(dbus_message_get_member(message), "UnsetControlGroup")) { - DBusMessageIter iter; - - SELINUX_UNIT_ACCESS_CHECK(u, connection, message, "stop"); - - if (!dbus_message_iter_init(message, &iter)) - goto oom; - - r = bus_unit_cgroup_unset(u, &iter); - if (r < 0) - return bus_send_error_reply(connection, message, NULL, r); - - reply = dbus_message_new_method_return(message); - if (!reply) - goto oom; - } else if (streq_ptr(dbus_message_get_member(message), "GetControlGroupAttribute")) { - DBusMessageIter iter; - _cleanup_strv_free_ char **list = NULL; - - SELINUX_UNIT_ACCESS_CHECK(u, connection, message, "status"); - - if (!dbus_message_iter_init(message, &iter)) - goto oom; - - r = bus_unit_cgroup_attribute_get(u, &iter, &list); - if (r < 0) - return bus_send_error_reply(connection, message, NULL, r); - - reply = dbus_message_new_method_return(message); - if (!reply) - goto oom; - - dbus_message_iter_init_append(reply, &iter); - if (bus_append_strv_iter(&iter, list) < 0) - goto oom; - - } else if (streq_ptr(dbus_message_get_member(message), "SetControlGroupAttribute")) { - DBusMessageIter iter; - - SELINUX_UNIT_ACCESS_CHECK(u, connection, message, "start"); - - if (!dbus_message_iter_init(message, &iter)) - goto oom; - - r = bus_unit_cgroup_attribute_set(u, &iter); - if (r < 0) - return bus_send_error_reply(connection, message, NULL, r); - - reply = dbus_message_new_method_return(message); - if (!reply) - goto oom; - - } else if (streq_ptr(dbus_message_get_member(message), "UnsetControlGroupAttribute")) { - DBusMessageIter iter; - - SELINUX_UNIT_ACCESS_CHECK(u, connection, message, "stop"); - - if (!dbus_message_iter_init(message, &iter)) - goto oom; - - r = bus_unit_cgroup_attribute_unset(u, &iter); - if (r < 0) - return bus_send_error_reply(connection, message, NULL, r); - - reply = dbus_message_new_method_return(message); - if (!reply) - goto oom; - } else if (UNIT_VTABLE(u)->bus_message_handler) return UNIT_VTABLE(u)->bus_message_handler(u, connection, message); else @@ -913,360 +745,6 @@ oom: return DBUS_HANDLER_RESULT_NEED_MEMORY; } -static int parse_mode(DBusMessageIter *iter, bool *runtime, bool next) { - const char *mode; - int r; - - assert(iter); - assert(runtime); - - r = bus_iter_get_basic_and_next(iter, DBUS_TYPE_STRING, &mode, next); - if (r < 0) - return r; - - if (streq(mode, "runtime")) - *runtime = true; - else if (streq(mode, "persistent")) - *runtime = false; - else - return -EINVAL; - - return 0; -} - -int bus_unit_cgroup_set(Unit *u, DBusMessageIter *iter) { - _cleanup_free_ char *controller = NULL, *old_path = NULL, *new_path = NULL, *contents = NULL; - const char *name; - CGroupBonding *b; - bool runtime; - int r; - - assert(u); - assert(iter); - - if (!unit_get_exec_context(u)) - return -EINVAL; - - r = bus_iter_get_basic_and_next(iter, DBUS_TYPE_STRING, &name, true); - if (r < 0) - return r; - - r = parse_mode(iter, &runtime, false); - if (r < 0) - return r; - - r = cg_split_spec(name, &controller, &new_path); - if (r < 0) - return r; - - if (!new_path) { - new_path = unit_default_cgroup_path(u); - if (!new_path) - return -ENOMEM; - } - - if (!controller || streq(controller, SYSTEMD_CGROUP_CONTROLLER)) - return -EINVAL; - - b = cgroup_bonding_find_list(u->cgroup_bondings, controller); - if (b) { - if (streq(b->path, new_path)) - return 0; - - if (b->essential) - return -EINVAL; - - old_path = strdup(b->path); - if (!old_path) - return -ENOMEM; - } - - r = unit_add_cgroup_from_text(u, name, true, &b); - if (r < 0) - return r; - if (r > 0) { - CGroupAttribute *a; - - /* Try to move things to the new place, and clean up the old place */ - cgroup_bonding_realize(b); - cgroup_bonding_migrate(b, u->cgroup_bondings); - - if (old_path) - cg_trim(controller, old_path, true); - - /* Apply the attributes to the new group */ - LIST_FOREACH(by_unit, a, u->cgroup_attributes) - if (streq(a->controller, controller)) - cgroup_attribute_apply(a, b); - } - - contents = strjoin("[", UNIT_VTABLE(u)->exec_section, "]\n" - "ControlGroup=", name, "\n", NULL); - if (!contents) - return -ENOMEM; - - return unit_write_drop_in(u, runtime, controller, contents); -} - -int bus_unit_cgroup_unset(Unit *u, DBusMessageIter *iter) { - _cleanup_free_ char *controller = NULL, *path = NULL, *target = NULL; - const char *name; - CGroupAttribute *a, *n; - CGroupBonding *b; - bool runtime; - int r; - - assert(u); - assert(iter); - - if (!unit_get_exec_context(u)) - return -EINVAL; - - r = bus_iter_get_basic_and_next(iter, DBUS_TYPE_STRING, &name, true); - if (r < 0) - return r; - - r = parse_mode(iter, &runtime, false); - if (r < 0) - return r; - - r = cg_split_spec(name, &controller, &path); - if (r < 0) - return r; - - if (!controller || streq(controller, SYSTEMD_CGROUP_CONTROLLER)) - return -EINVAL; - - b = cgroup_bonding_find_list(u->cgroup_bondings, controller); - if (!b) - return -ENOENT; - - if (path && !path_equal(path, b->path)) - return -ENOENT; - - if (b->essential) - return -EINVAL; - - unit_remove_drop_in(u, runtime, controller); - - /* Try to migrate the old group away */ - if (cg_pid_get_path(controller, 0, &target) >= 0) - cgroup_bonding_migrate_to(u->cgroup_bondings, target, false); - - cgroup_bonding_free(b, true); - - /* Drop all attributes of this controller */ - LIST_FOREACH_SAFE(by_unit, a, n, u->cgroup_attributes) { - if (!streq(a->controller, controller)) - continue; - - unit_remove_drop_in(u, runtime, a->name); - cgroup_attribute_free(a); - } - - return 0; -} - -int bus_unit_cgroup_attribute_get(Unit *u, DBusMessageIter *iter, char ***_result) { - _cleanup_free_ char *controller = NULL; - CGroupAttribute *a; - CGroupBonding *b; - const char *name; - char **l = NULL; - int r; - - assert(u); - assert(iter); - assert(_result); - - if (!unit_get_exec_context(u)) - return -EINVAL; - - r = bus_iter_get_basic_and_next(iter, DBUS_TYPE_STRING, &name, false); - if (r < 0) - return r; - - r = cg_controller_from_attr(name, &controller); - if (r < 0) - return r; - - /* First attempt, read the value from the kernel */ - b = cgroup_bonding_find_list(u->cgroup_bondings, controller); - if (b) { - _cleanup_free_ char *p = NULL, *v = NULL; - - r = cg_get_path(b->controller, b->path, name, &p); - if (r < 0) - return r; - - r = read_full_file(p, &v, NULL); - if (r >= 0) { - /* Split on new lines */ - l = strv_split_newlines(v); - if (!l) - return -ENOMEM; - - *_result = l; - return 0; - - } - } - - /* If that didn't work, read our cached value */ - LIST_FOREACH(by_unit, a, u->cgroup_attributes) { - - if (!cgroup_attribute_matches(a, controller, name)) - continue; - - r = strv_extend(&l, a->value); - if (r < 0) { - strv_free(l); - return r; - } - } - - if (!l) - return -ENOENT; - - *_result = l; - return 0; -} - -static int update_attribute_drop_in(Unit *u, bool runtime, const char *name) { - _cleanup_free_ char *buf = NULL; - CGroupAttribute *a; - - assert(u); - assert(name); - - LIST_FOREACH(by_unit, a, u->cgroup_attributes) { - if (!cgroup_attribute_matches(a, NULL, name)) - continue; - - if (!buf) { - buf = strjoin("[", UNIT_VTABLE(u)->exec_section, "]\n" - "ControlGroupAttribute=", a->name, " ", a->value, "\n", NULL); - - if (!buf) - return -ENOMEM; - } else { - char *b; - - b = strjoin(buf, - "ControlGroupAttribute=", a->name, " ", a->value, "\n", NULL); - - if (!b) - return -ENOMEM; - - free(buf); - buf = b; - } - } - - if (buf) - return unit_write_drop_in(u, runtime, name, buf); - else - return unit_remove_drop_in(u, runtime, name); -} - -int bus_unit_cgroup_attribute_set(Unit *u, DBusMessageIter *iter) { - _cleanup_strv_free_ char **l = NULL; - int r; - bool runtime = false; - char **value; - const char *name; - - assert(u); - assert(iter); - - if (!unit_get_exec_context(u)) - return -EINVAL; - - r = bus_iter_get_basic_and_next(iter, DBUS_TYPE_STRING, &name, true); - if (r < 0) - return r; - - r = bus_parse_strv_iter(iter, &l); - if (r < 0) - return r; - - if (!dbus_message_iter_next(iter)) - return -EINVAL; - - r = parse_mode(iter, &runtime, false); - if (r < 0) - return r; - - STRV_FOREACH(value, l) { - _cleanup_free_ char *v = NULL; - CGroupAttribute *a; - const CGroupSemantics *s; - - r = cgroup_semantics_find(NULL, name, *value, &v, &s); - if (r < 0) - return r; - - if (s && !s->multiple && l[1]) - return -EINVAL; - - r = unit_add_cgroup_attribute(u, s, NULL, name, v ? v : *value, &a); - if (r < 0) - return r; - - if (r > 0) { - CGroupBonding *b; - - b = cgroup_bonding_find_list(u->cgroup_bondings, a->controller); - if (!b) { - /* Doesn't exist yet? Then let's add it */ - r = unit_add_cgroup_from_text(u, a->controller, false, &b); - if (r < 0) - return r; - - if (r > 0) { - cgroup_bonding_realize(b); - cgroup_bonding_migrate(b, u->cgroup_bondings); - } - } - - /* Make it count */ - cgroup_attribute_apply(a, u->cgroup_bondings); - } - - } - - r = update_attribute_drop_in(u, runtime, name); - if (r < 0) - return r; - - return 0; -} - -int bus_unit_cgroup_attribute_unset(Unit *u, DBusMessageIter *iter) { - const char *name; - bool runtime; - int r; - - assert(u); - assert(iter); - - if (!unit_get_exec_context(u)) - return -EINVAL; - - r = bus_iter_get_basic_and_next(iter, DBUS_TYPE_STRING, &name, true); - if (r < 0) - return r; - - r = parse_mode(iter, &runtime, false); - if (r < 0) - return r; - - cgroup_attribute_free_some(u->cgroup_attributes, NULL, name); - update_attribute_drop_in(u, runtime, name); - - return 0; -} - const BusProperty bus_unit_properties[] = { { "Id", bus_property_append_string, "s", offsetof(Unit, id), true }, { "Names", bus_unit_append_names, "as", 0 }, @@ -1330,12 +808,6 @@ const BusProperty bus_unit_properties[] = { { "ConditionTimestampMonotonic", bus_property_append_usec, "t", offsetof(Unit, condition_timestamp.monotonic) }, { "ConditionResult", bus_property_append_bool, "b", offsetof(Unit, condition_result) }, { "LoadError", bus_unit_append_load_error, "(ss)", 0 }, - { NULL, } -}; - -const BusProperty bus_unit_cgroup_properties[] = { - { "DefaultControlGroup", bus_unit_append_default_cgroup, "s", 0 }, - { "ControlGroups", bus_unit_append_cgroups, "as", 0 }, - { "ControlGroupAttributes", bus_unit_append_cgroup_attrs, "a(sss)", 0 }, + { "ControlGroup", bus_property_append_string, "s", offsetof(Unit, cgroup_path), true }, { NULL, } }; diff --git a/src/core/dbus-unit.h b/src/core/dbus-unit.h index 83932c5a6c..1e226ef451 100644 --- a/src/core/dbus-unit.h +++ b/src/core/dbus-unit.h @@ -123,40 +123,14 @@ " <property name=\"ConditionTimestampMonotonic\" type=\"t\" access=\"read\"/>\n" \ " <property name=\"ConditionResult\" type=\"b\" access=\"read\"/>\n" \ " <property name=\"LoadError\" type=\"(ss)\" access=\"read\"/>\n" \ + " <property name=\"ControlGroup\" type=\"s\" access=\"read\"/>\n" \ " </interface>\n" -#define BUS_UNIT_CGROUP_INTERFACE \ - " <property name=\"DefaultControlGroup\" type=\"s\" access=\"read\"/>\n" \ - " <property name=\"ControlGroups\" type=\"as\" access=\"read\"/>\n" \ - " <property name=\"ControlGroupAttributes\" type=\"a(sss)\" access=\"read\"/>\n" \ - " <method name=\"SetControlGroup\">\n" \ - " <arg name=\"group\" type=\"s\" direction=\"in\"/>\n" \ - " <arg name=\"mode\" type=\"s\" direction=\"in\"/>\n" \ - " </method>\n" \ - " <method name=\"UnsetControlGroup\">\n" \ - " <arg name=\"group\" type=\"s\" direction=\"in\"/>\n" \ - " <arg name=\"mode\" type=\"s\" direction=\"in\"/>\n" \ - " </method>\n" \ - " <method name=\"GetControlGroupAttribute\">\n" \ - " <arg name=\"attribute\" type=\"s\" direction=\"in\"/>\n" \ - " <arg name=\"values\" type=\"as\" direction=\"out\"/>\n" \ - " </method>\n" \ - " <method name=\"SetControlGroupAttribute\">\n" \ - " <arg name=\"attribute\" type=\"s\" direction=\"in\"/>\n" \ - " <arg name=\"values\" type=\"as\" direction=\"in\"/>\n" \ - " <arg name=\"mode\" type=\"s\" direction=\"in\"/>\n" \ - " </method>\n" \ - " <method name=\"UnsetControlGroupAttribute\">\n" \ - " <arg name=\"attribute\" type=\"s\" direction=\"in\"/>\n" \ - " <arg name=\"mode\" type=\"s\" direction=\"in\"/>\n" \ - " </method>\n" - #define BUS_UNIT_INTERFACES_LIST \ BUS_GENERIC_INTERFACES_LIST \ "org.freedesktop.systemd1.Unit\0" extern const BusProperty bus_unit_properties[]; -extern const BusProperty bus_unit_cgroup_properties[]; void bus_unit_send_change_signal(Unit *u); void bus_unit_send_removed_signal(Unit *u); diff --git a/src/core/dbus.c b/src/core/dbus.c index 1272c938cf..c2097a4dbf 100644 --- a/src/core/dbus.c +++ b/src/core/dbus.c @@ -28,7 +28,6 @@ #include "dbus.h" #include "log.h" #include "strv.h" -#include "cgroup.h" #include "mkdir.h" #include "missing.h" #include "dbus-unit.h" @@ -453,7 +452,7 @@ static DBusHandlerResult system_bus_message_filter(DBusConnection *connection, D DBUS_TYPE_INVALID)) log_error("Failed to parse Released message: %s", bus_error_message(&error)); else - cgroup_notify_empty(m, cgroup); + manager_notify_cgroup_empty(m, cgroup); } dbus_error_free(&error); @@ -489,7 +488,7 @@ static DBusHandlerResult private_bus_message_filter(DBusConnection *connection, DBUS_TYPE_INVALID)) log_error("Failed to parse Released message: %s", bus_error_message(&error)); else - cgroup_notify_empty(m, cgroup); + manager_notify_cgroup_empty(m, cgroup); /* Forward the message to the system bus, so that user * instances are notified as well */ diff --git a/src/core/execute.c b/src/core/execute.c index 9148d06df4..5e342f8d47 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -55,7 +55,6 @@ #include "sd-messages.h" #include "ioprio.h" #include "securebits.h" -#include "cgroup.h" #include "namespace.h" #include "tcpwrap.h" #include "exit-status.h" @@ -67,6 +66,7 @@ #include "syscall-list.h" #include "env-util.h" #include "fileio.h" +#include "unit.h" #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC) @@ -986,18 +986,17 @@ int exec_spawn(ExecCommand *command, bool apply_chroot, bool apply_tty_stdin, bool confirm_spawn, - CGroupBonding *cgroup_bondings, - CGroupAttribute *cgroup_attributes, - const char *cgroup_suffix, + CGroupControllerMask cgroup_mask, + const char *cgroup_path, const char *unit_id, int idle_pipe[2], pid_t *ret) { + _cleanup_strv_free_ char **files_env = NULL; + int socket_fd; + char *line; pid_t pid; int r; - char *line; - int socket_fd; - _cleanup_strv_free_ char **files_env = NULL; assert(command); assert(context); @@ -1042,17 +1041,6 @@ int exec_spawn(ExecCommand *command, NULL); free(line); - r = cgroup_bonding_realize_list(cgroup_bondings); - if (r < 0) - return r; - - /* We must initialize the attributes in the parent, before we - fork, because we really need them initialized before making - the process a member of the group (which we do in both the - child and the parent), and we cannot really apply them twice - (due to 'append' style attributes) */ - cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings); - if (context->private_tmp && !context->tmp_dir && !context->var_tmp_dir) { r = setup_tmpdirs(&context->tmp_dir, &context->var_tmp_dir); if (r < 0) @@ -1072,7 +1060,6 @@ int exec_spawn(ExecCommand *command, _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL; unsigned n_env = 0; - bool set_access = false; /* child */ @@ -1185,8 +1172,8 @@ int exec_spawn(ExecCommand *command, goto fail_child; } - if (cgroup_bondings) { - err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix); + if (cgroup_path) { + err = cg_attach_with_mask(cgroup_mask, cgroup_path, 0); if (err < 0) { r = EXIT_CGROUP; goto fail_child; @@ -1269,36 +1256,6 @@ int exec_spawn(ExecCommand *command, goto fail_child; } } - - if (cgroup_bondings && context->control_group_modify) { - err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid); - if (err >= 0) - err = cgroup_bonding_set_task_access_list( - cgroup_bondings, - 0644, - uid, - gid, - context->control_group_persistent); - if (err < 0) { - r = EXIT_CGROUP; - goto fail_child; - } - - set_access = true; - } - } - - if (cgroup_bondings && !set_access && context->control_group_persistent >= 0) { - err = cgroup_bonding_set_task_access_list( - cgroup_bondings, - (mode_t) -1, - (uid_t) -1, - (uid_t) -1, - context->control_group_persistent); - if (err < 0) { - r = EXIT_CGROUP; - goto fail_child; - } } if (apply_permissions) { @@ -1562,7 +1519,8 @@ int exec_spawn(ExecCommand *command, * outside of the cgroup) and in the parent (so that we can be * sure that when we kill the cgroup the process will be * killed too). */ - cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix); + if (cgroup_path) + cg_attach(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, pid); exec_status_start(&command->exec_status, pid); @@ -1578,7 +1536,6 @@ void exec_context_init(ExecContext *c) { c->cpu_sched_policy = SCHED_OTHER; c->syslog_priority = LOG_DAEMON|LOG_INFO; c->syslog_level_prefix = true; - c->control_group_persistent = -1; c->ignore_sigpipe = true; c->timer_slack_nsec = (nsec_t) -1; } @@ -1843,8 +1800,7 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { assert(c); assert(f); - if (!prefix) - prefix = ""; + prefix = strempty(prefix); fprintf(f, "%sUMask: %04o\n" @@ -1852,8 +1808,6 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { "%sRootDirectory: %s\n" "%sNonBlocking: %s\n" "%sPrivateTmp: %s\n" - "%sControlGroupModify: %s\n" - "%sControlGroupPersistent: %s\n" "%sPrivateNetwork: %s\n" "%sIgnoreSIGPIPE: %s\n", prefix, c->umask, @@ -1861,8 +1815,6 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { prefix, c->root_directory ? c->root_directory : "/", prefix, yes_no(c->non_blocking), prefix, yes_no(c->private_tmp), - prefix, yes_no(c->control_group_modify), - prefix, yes_no(c->control_group_persistent), prefix, yes_no(c->private_network), prefix, yes_no(c->ignore_sigpipe)); diff --git a/src/core/execute.h b/src/core/execute.h index 15574dc97e..c1e9717dc8 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -33,14 +33,11 @@ typedef struct ExecContext ExecContext; #include <stdio.h> #include <sched.h> -struct CGroupBonding; -struct CGroupAttribute; - -typedef struct Unit Unit; - #include "list.h" #include "util.h" +typedef struct Unit Unit; + typedef enum ExecInput { EXEC_INPUT_NULL, EXEC_INPUT_TTY, @@ -148,9 +145,6 @@ struct ExecContext { bool no_new_privileges; - bool control_group_modify; - int control_group_persistent; - /* This is not exposed to the user but available * internally. We need it to make sure that whenever we spawn * /bin/mount it is run in the same process group as us so @@ -166,6 +160,8 @@ struct ExecContext { bool cpu_sched_set:1; }; +#include "cgroup.h" + int exec_spawn(ExecCommand *command, char **argv, ExecContext *context, @@ -175,9 +171,8 @@ int exec_spawn(ExecCommand *command, bool apply_chroot, bool apply_tty_stdin, bool confirm_spawn, - struct CGroupBonding *cgroup_bondings, - struct CGroupAttribute *cgroup_attributes, - const char *cgroup_suffix, + CGroupControllerMask cgroup_mask, + const char *cgroup_path, const char *unit_id, int pipe_fd[2], pid_t *ret); diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 9e5a408a30..aa07de0517 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -75,9 +75,7 @@ $1.MountFlags, config_parse_exec_mount_flags, 0, $1.TCPWrapName, config_parse_unit_string_printf, 0, offsetof($1, exec_context.tcpwrap_name) $1.PAMName, config_parse_unit_string_printf, 0, offsetof($1, exec_context.pam_name) $1.IgnoreSIGPIPE, config_parse_bool, 0, offsetof($1, exec_context.ignore_sigpipe) -$1.UtmpIdentifier, config_parse_unit_string_printf, 0, offsetof($1, exec_context.utmp_id) -$1.ControlGroupModify, config_parse_bool, 0, offsetof($1, exec_context.control_group_modify) -$1.ControlGroupPersistent, config_parse_tristate, 0, offsetof($1, exec_context.control_group_persistent)' +$1.UtmpIdentifier, config_parse_unit_string_printf, 0, offsetof($1, exec_context.utmp_id)' )m4_dnl m4_define(`KILL_CONTEXT_CONFIG_ITEMS', `$1.SendSIGKILL, config_parse_bool, 0, offsetof($1, kill_context.send_sigkill) @@ -85,16 +83,17 @@ $1.KillMode, config_parse_kill_mode, 0, $1.KillSignal, config_parse_kill_signal, 0, offsetof($1, kill_context.kill_signal)' )m4_dnl m4_define(`CGROUP_CONTEXT_CONFIG_ITEMS', -`$1.ControlGroup, config_parse_unit_cgroup, 0, 0 -$1.ControlGroupAttribute, config_parse_unit_cgroup_attr, 0, 0 -$1.CPUShares, config_parse_unit_cgroup_attr_pretty, 0, 0 -$1.MemoryLimit, config_parse_unit_cgroup_attr_pretty, 0, 0 -$1.MemorySoftLimit, config_parse_unit_cgroup_attr_pretty, 0, 0 -$1.DeviceAllow, config_parse_unit_cgroup_attr_pretty, 0, 0 -$1.DeviceDeny, config_parse_unit_cgroup_attr_pretty, 0, 0 -$1.BlockIOWeight, config_parse_unit_cgroup_attr_pretty, 0, 0 -$1.BlockIOReadBandwidth, config_parse_unit_cgroup_attr_pretty, 0, 0 -$1.BlockIOWriteBandwidth, config_parse_unit_cgroup_attr_pretty, 0, 0' +`$1.CPUAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.cpu_accounting) +$1.CPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context) +$1.MemoryAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.memory_accounting) +$1.MemoryLimit, config_parse_memory_limit, 0, offsetof($1, cgroup_context) +$1.MemorySoftLimit, config_parse_memory_limit, 0, offsetof($1, cgroup_context) +$1.DeviceAllow, config_parse_device_allow, 0, offsetof($1, cgroup_context) +$1.DevicePolicy, config_parse_device_policy, 0, offsetof($1, cgroup_context.device_policy) +$1.BlockIOAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.blockio_accounting) +$1.BlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context) +$1.BlockIOReadBandwidth, config_parse_blockio_bandwidth, 0, offsetof($1, cgroup_context) +$1.BlockIOWriteBandwidth, config_parse_blockio_bandwidth, 0, offsetof($1, cgroup_context)' )m4_dnl Unit.Description, config_parse_unit_string_printf, 0, offsetof(Unit, description) Unit.Documentation, config_parse_documentation, 0, offsetof(Unit, documentation) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 15fabe860e..57c8156fdd 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -51,6 +51,7 @@ #include "path-util.h" #include "syscall-list.h" #include "env-util.h" +#include "cgroup.h" #ifndef HAVE_SYSV_COMPAT int config_parse_warn_compat(const char *unit, @@ -996,58 +997,6 @@ int config_parse_limit(const char *unit, return 0; } -int config_parse_unit_cgroup(const char *unit, - const char *filename, - unsigned line, - const char *section, - const char *lvalue, - int ltype, - const char *rvalue, - void *data, - void *userdata) { - - Unit *u = userdata; - char *w; - size_t l; - char *state; - - if (isempty(rvalue)) { - /* An empty assignment resets the list */ - cgroup_bonding_free_list(u->cgroup_bondings, false); - u->cgroup_bondings = NULL; - return 0; - } - - FOREACH_WORD_QUOTED(w, l, rvalue, state) { - _cleanup_free_ char *t = NULL, *k = NULL, *ku = NULL; - int r; - - t = strndup(w, l); - if (!t) - return log_oom(); - - k = unit_full_printf(u, t); - if (!k) - log_syntax(unit, LOG_ERR, filename, line, EINVAL, - "Failed to resolve unit specifiers on %s. Ignoring.", - t); - - ku = cunescape(k ? k : t); - if (!ku) - return log_oom(); - - r = unit_add_cgroup_from_text(u, ku, true, NULL); - if (r < 0) { - log_syntax(unit, LOG_ERR, filename, line, -r, - "Failed to parse cgroup value %s, ignoring: %s", - k, rvalue); - return 0; - } - } - - return 0; -} - #ifdef HAVE_SYSV_COMPAT int config_parse_sysv_priority(const char *unit, const char *filename, @@ -1793,108 +1742,6 @@ int config_parse_unit_condition_null(const char *unit, DEFINE_CONFIG_PARSE_ENUM(config_parse_notify_access, notify_access, NotifyAccess, "Failed to parse notify access specifier"); DEFINE_CONFIG_PARSE_ENUM(config_parse_start_limit_action, start_limit_action, StartLimitAction, "Failed to parse start limit action specifier"); -int config_parse_unit_cgroup_attr(const char *unit, - const char *filename, - unsigned line, - const char *section, - const char *lvalue, - int ltype, - const char *rvalue, - void *data, - void *userdata) { - - Unit *u = data; - size_t a, b; - _cleanup_free_ char *n = NULL, *v = NULL; - const CGroupSemantics *s; - int r; - - assert(filename); - assert(lvalue); - assert(rvalue); - assert(data); - - if (isempty(rvalue)) { - /* Empty assignment clears the list */ - cgroup_attribute_free_list(u->cgroup_attributes); - u->cgroup_attributes = NULL; - return 0; - } - - a = strcspn(rvalue, WHITESPACE); - b = strspn(rvalue + a, WHITESPACE); - if (a <= 0 || b <= 0) { - log_syntax(unit, LOG_ERR, filename, line, EINVAL, - "Failed to parse cgroup attribute value, ignoring: %s", - rvalue); - return 0; - } - - n = strndup(rvalue, a); - if (!n) - return log_oom(); - - r = cgroup_semantics_find(NULL, n, rvalue + a + b, &v, &s); - if (r < 0) { - log_syntax(unit, LOG_ERR, filename, line, -r, - "Failed to parse cgroup attribute value, ignoring: %s", - rvalue); - return 0; - } - - r = unit_add_cgroup_attribute(u, s, NULL, n, v ? v : rvalue + a + b, NULL); - if (r < 0) { - log_syntax(unit, LOG_ERR, filename, line, -r, - "Failed to add cgroup attribute value, ignoring: %s", rvalue); - return 0; - } - - return 0; -} - -int config_parse_unit_cgroup_attr_pretty(const char *unit, - const char *filename, - unsigned line, - const char *section, - const char *lvalue, - int ltype, - const char *rvalue, - void *data, - void *userdata) { - - Unit *u = data; - _cleanup_free_ char *v = NULL; - const CGroupSemantics *s; - int r; - - assert(filename); - assert(lvalue); - assert(rvalue); - assert(data); - - r = cgroup_semantics_find(NULL, lvalue, rvalue, &v, &s); - if (r < 0) { - log_syntax(unit, LOG_ERR, filename, line, -r, - "Failed to parse cgroup attribute value, ignoring: %s", - rvalue); - return 0; - } else if (r == 0) { - log_syntax(unit, LOG_ERR, filename, line, ENOTSUP, - "Unknown or unsupported cgroup attribute %s, ignoring: %s", - lvalue, rvalue); - return 0; - } - - r = unit_add_cgroup_attribute(u, s, NULL, NULL, v, NULL); - if (r < 0) { - log_syntax(unit, LOG_ERR, filename, line, -r, - "Failed to add cgroup attribute value, ignoring: %s", rvalue); - return 0; - } - - return 0; -} - int config_parse_unit_requires_mounts_for(const char *unit, const char *filename, unsigned line, @@ -2104,6 +1951,285 @@ int config_parse_unit_slice( return 0; } +DEFINE_CONFIG_PARSE_ENUM(config_parse_device_policy, cgroup_device_policy, CGroupDevicePolicy, "Failed to parse device policy"); + +int config_parse_cpu_shares( + const char *unit, + const char *filename, + unsigned line, + const char *section, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + CGroupContext *c = data; + unsigned long lu; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + if (isempty(rvalue)) { + c->cpu_shares = 1024; + return 0; + } + + r = safe_atolu(rvalue, &lu); + if (r < 0 || lu <= 0) { + log_syntax(unit, LOG_ERR, filename, line, EINVAL, + "CPU shares '%s' invalid. Ignoring.", rvalue); + return 0; + } + + c->cpu_shares = lu; + return 0; +} + +int config_parse_memory_limit( + const char *unit, + const char *filename, + unsigned line, + const char *section, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + CGroupContext *c = data; + uint64_t *limit; + off_t bytes; + int r; + + limit = streq(lvalue, "MemoryLimit") ? &c->memory_limit : &c->memory_soft_limit; + + if (isempty(rvalue)) { + *limit = (uint64_t) -1; + return 0; + } + + assert_cc(sizeof(uint64_t) == sizeof(off_t)); + + r = parse_bytes(rvalue, &bytes); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, EINVAL, + "Memory limit '%s' invalid. Ignoring.", rvalue); + return 0; + } + + *limit = (uint64_t) bytes; + return 0; +} + +int config_parse_device_allow( + const char *unit, + const char *filename, + unsigned line, + const char *section, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_free_ char *path = NULL; + CGroupContext *c = data; + CGroupDeviceAllow *a; + const char *m; + size_t n; + + if (isempty(rvalue)) { + while (c->device_allow) + cgroup_context_free_device_allow(c, c->device_allow); + + return 0; + } + + n = strcspn(rvalue, WHITESPACE); + path = strndup(rvalue, n); + if (!path) + return log_oom(); + + if (!path_startswith(path, "/dev")) { + log_syntax(unit, LOG_ERR, filename, line, EINVAL, + "Invalid device node path '%s'. Ignoring.", path); + return 0; + } + + m = rvalue + n + strspn(rvalue + n, WHITESPACE); + if (isempty(m)) + m = "rwm"; + + if (!in_charset(m, "rwm")) { + log_syntax(unit, LOG_ERR, filename, line, EINVAL, + "Invalid device rights '%s'. Ignoring.", m); + return 0; + } + + a = new0(CGroupDeviceAllow, 1); + if (!a) + return log_oom(); + + a->path = path; + path = NULL; + a->r = !!strchr(m, 'r'); + a->w = !!strchr(m, 'w'); + a->m = !!strchr(m, 'm'); + + LIST_PREPEND(CGroupDeviceAllow, device_allow, c->device_allow, a); + return 0; +} + +int config_parse_blockio_weight( + const char *unit, + const char *filename, + unsigned line, + const char *section, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_free_ char *path = NULL; + CGroupContext *c = data; + unsigned long lu; + const char *weight; + size_t n; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + if (isempty(rvalue)) { + c->blockio_weight = 1000; + + while (c->blockio_device_weights) + cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights); + + return 0; + } + + n = strcspn(rvalue, WHITESPACE); + weight = rvalue + n; + if (*weight) { + /* Two params, first device name, then weight */ + path = strndup(rvalue, n); + if (!path) + return log_oom(); + + if (!path_startswith(path, "/dev")) { + log_syntax(unit, LOG_ERR, filename, line, EINVAL, + "Invalid device node path '%s'. Ignoring.", path); + return 0; + } + + weight += strspn(weight, WHITESPACE); + } else + /* One param, only weight */ + weight = rvalue; + + r = safe_atolu(weight, &lu); + if (r < 0 || lu < 10 || lu > 1000) { + log_syntax(unit, LOG_ERR, filename, line, EINVAL, + "Block IO weight '%s' invalid. Ignoring.", rvalue); + return 0; + } + + if (!path) + c->blockio_weight = lu; + else { + CGroupBlockIODeviceWeight *w; + + w = new0(CGroupBlockIODeviceWeight, 1); + if (!w) + return log_oom(); + + w->path = path; + path = NULL; + + w->weight = lu; + + LIST_PREPEND(CGroupBlockIODeviceWeight, device_weights, c->blockio_device_weights, w); + } + + return 0; +} + +int config_parse_blockio_bandwidth( + const char *unit, + const char *filename, + unsigned line, + const char *section, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_free_ char *path = NULL; + CGroupBlockIODeviceBandwidth *b; + CGroupContext *c = data; + const char *bandwidth; + off_t bytes; + size_t n; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + if (isempty(rvalue)) { + while (c->blockio_device_bandwidths) + cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths); + + return 0; + } + + n = strcspn(rvalue, WHITESPACE); + bandwidth = rvalue + n; + bandwidth += strspn(bandwidth, WHITESPACE); + + if (!*bandwidth) { + log_syntax(unit, LOG_ERR, filename, line, EINVAL, + "Expected space separated pair of device node and bandwidth. Ignoring."); + return 0; + } + + path = strndup(rvalue, n); + if (!path) + return log_oom(); + + if (!path_startswith(path, "/dev")) { + log_syntax(unit, LOG_ERR, filename, line, EINVAL, + "Invalid device node path '%s'. Ignoring.", path); + return 0; + } + + r = parse_bytes(bandwidth, &bytes); + if (r < 0 || bytes <= 0) { + log_syntax(unit, LOG_ERR, filename, line, EINVAL, + "Block IO Bandwidth '%s' invalid. Ignoring.", rvalue); + return 0; + } + + b = new0(CGroupBlockIODeviceBandwidth, 1); + if (!b) + return log_oom(); + + b->path = path; + path = NULL; + b->bandwidth = (uint64_t) bytes; + + LIST_PREPEND(CGroupBlockIODeviceBandwidth, device_bandwidths, c->blockio_device_bandwidths, b); + + return 0; +} + #define FOLLOW_MAX 8 static int open_follow(char **filename, FILE **_f, Set *names, char **_final) { @@ -2463,7 +2589,6 @@ void unit_dump_config_items(FILE *f) { { config_parse_exec_secure_bits, "SECUREBITS" }, { config_parse_bounding_set, "BOUNDINGSET" }, { config_parse_limit, "LIMIT" }, - { config_parse_unit_cgroup, "CGROUP [...]" }, { config_parse_unit_deps, "UNIT [...]" }, { config_parse_exec, "PATH [ARGUMENT [...]]" }, { config_parse_service_type, "SERVICETYPE" }, diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index f9677baa0f..5e36f3538a 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -55,7 +55,6 @@ int config_parse_exec_capabilities(const char *unit, const char *filename, unsig int config_parse_exec_secure_bits(const char *unit, const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_bounding_set(const char *unit, const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_limit(const char *unit, const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); -int config_parse_unit_cgroup(const char *unit, const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_sysv_priority(const char *unit, const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_fsck_passno(const char *unit, const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_kill_signal(const char *unit, const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); @@ -73,12 +72,16 @@ int config_parse_unit_condition_null(const char *unit, const char *filename, uns int config_parse_kill_mode(const char *unit, const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_notify_access(const char *unit, const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_start_limit_action(const char *unit, const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); -int config_parse_unit_cgroup_attr(const char *unit, const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); -int config_parse_unit_cgroup_attr_pretty(const char *unit, const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_unit_requires_mounts_for(const char *unit, const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_syscall_filter(const char *unit, const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_environ(const char *unit, const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_unit_slice(const char *unit, const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_cpu_shares(const char *unit, const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_memory_limit(const char *unit, const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_device_policy(const char *unit, const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_device_allow(const char *unit, const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_blockio_weight(const char *unit, const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_blockio_bandwidth(const char *unit, const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); /* gperf prototypes */ const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length); diff --git a/src/core/main.c b/src/core/main.c index c123de91ce..3c6fccf527 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -88,7 +88,6 @@ static int arg_crash_chvt = -1; static bool arg_confirm_spawn = false; static bool arg_show_status = true; static bool arg_switched_root = false; -static char **arg_default_controllers = NULL; static char ***arg_join_controllers = NULL; static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL; static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT; @@ -642,7 +641,6 @@ static int parse_config_file(void) { { "Manager", "ShowStatus", config_parse_bool, 0, &arg_show_status }, { "Manager", "CrashChVT", config_parse_int, 0, &arg_crash_chvt }, { "Manager", "CPUAffinity", config_parse_cpu_affinity2, 0, NULL }, - { "Manager", "DefaultControllers", config_parse_strv, 0, &arg_default_controllers }, { "Manager", "DefaultStandardOutput", config_parse_output, 0, &arg_default_std_output }, { "Manager", "DefaultStandardError", config_parse_output, 0, &arg_default_std_error }, { "Manager", "JoinControllers", config_parse_join_controllers, 0, &arg_join_controllers }, @@ -1632,9 +1630,6 @@ int main(int argc, char *argv[]) { manager_set_default_rlimits(m, arg_default_rlimit); - if (arg_default_controllers) - manager_set_default_controllers(m, arg_default_controllers); - if (arg_default_environment) manager_set_default_environment(m, arg_default_environment); @@ -1807,7 +1802,6 @@ finish: free(arg_default_rlimit[j]); free(arg_default_unit); - strv_free(arg_default_controllers); free_join_controllers(); dbus_shutdown(); diff --git a/src/core/manager.c b/src/core/manager.c index 2416dd0f1e..6ba51a4116 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -55,7 +55,6 @@ #include "util.h" #include "mkdir.h" #include "ratelimit.h" -#include "cgroup.h" #include "mount-setup.h" #include "unit-name.h" #include "dbus-unit.h" @@ -467,12 +466,6 @@ int manager_new(SystemdRunningAs running_as, Manager **_m) { manager_strip_environment(m); - if (running_as == SYSTEMD_SYSTEM) { - m->default_controllers = strv_new("cpu", NULL); - if (!m->default_controllers) - goto fail; - } - if (!(m->units = hashmap_new(string_hash_func, string_compare_func))) goto fail; @@ -482,7 +475,8 @@ int manager_new(SystemdRunningAs running_as, Manager **_m) { if (!(m->watch_pids = hashmap_new(trivial_hash_func, trivial_compare_func))) goto fail; - if (!(m->cgroup_bondings = hashmap_new(string_hash_func, string_compare_func))) + m->cgroup_unit = hashmap_new(string_hash_func, string_compare_func); + if (!m->cgroup_unit) goto fail; if (!(m->watch_bus = hashmap_new(string_hash_func, string_compare_func))) @@ -712,9 +706,7 @@ void manager_free(Manager *m) { lookup_paths_free(&m->lookup_paths); strv_free(m->environment); - strv_free(m->default_controllers); - - hashmap_free(m->cgroup_bondings); + hashmap_free(m->cgroup_unit); set_free_free(m->unit_path_cache); close_pipe(m->idle_pipe); @@ -1220,7 +1212,7 @@ static int manager_process_notify_fd(Manager *m) { u = hashmap_get(m->watch_pids, LONG_TO_PTR(ucred->pid)); if (!u) { - u = cgroup_unit_by_pid(m, ucred->pid); + u = manager_get_unit_by_pid(m, ucred->pid); if (!u) { log_warning("Cannot find unit for notify message of PID %lu.", (unsigned long) ucred->pid); continue; @@ -1285,7 +1277,7 @@ static int manager_dispatch_sigchld(Manager *m) { /* And now figure out the unit this belongs to */ u = hashmap_get(m->watch_pids, LONG_TO_PTR(si.si_pid)); if (!u) - u = cgroup_unit_by_pid(m, si.si_pid); + u = manager_get_unit_by_pid(m, si.si_pid); /* And now, we actually reap the zombie. */ if (waitid(P_PID, si.si_pid, &si, WEXITED) < 0) { @@ -1741,6 +1733,9 @@ int manager_loop(Manager *m) { if (manager_dispatch_gc_queue(m) > 0) continue; + if (manager_dispatch_cgroup_queue(m) > 0) + continue; + if (manager_dispatch_dbus_queue(m) > 0) continue; @@ -2586,23 +2581,6 @@ int manager_set_default_environment(Manager *m, char **environment) { return 0; } -int manager_set_default_controllers(Manager *m, char **controllers) { - char **l; - - assert(m); - - l = strv_copy(controllers); - if (!l) - return -ENOMEM; - - strv_free(m->default_controllers); - m->default_controllers = l; - - cg_shorten_controllers(m->default_controllers); - - return 0; -} - int manager_set_default_rlimits(Manager *m, struct rlimit **default_rlimit) { int i; diff --git a/src/core/manager.h b/src/core/manager.h index f0bb2eb035..68cb2e4a3d 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -27,6 +27,7 @@ #include <dbus/dbus.h> #include "fdset.h" +#include "cgroup-util.h" /* Enforce upper limit how many names we allow */ #define MANAGER_MAX_NAMES 131072 /* 128K */ @@ -86,6 +87,7 @@ struct Watch { #include "dbus.h" #include "path-lookup.h" #include "execute.h" +#include "unit-name.h" struct Manager { /* Note that the set of units we know of is allowed to be @@ -122,6 +124,9 @@ struct Manager { /* Units to check when doing GC */ LIST_HEAD(Unit, gc_queue); + /* Units that should be realized */ + LIST_HEAD(Unit, cgroup_queue); + Hashmap *watch_pids; /* pid => Unit object n:1 */ char *notify_socket; @@ -139,7 +144,6 @@ struct Manager { Set *unit_path_cache; char **environment; - char **default_controllers; usec_t runtime_watchdog; usec_t shutdown_watchdog; @@ -198,7 +202,8 @@ struct Manager { int dev_autofs_fd; /* Data specific to the cgroup subsystem */ - Hashmap *cgroup_bondings; /* path string => CGroupBonding object 1:n */ + Hashmap *cgroup_unit; + CGroupControllerMask cgroup_supported; char *cgroup_root; usec_t gc_queue_timestamp; @@ -273,7 +278,6 @@ unsigned manager_dispatch_run_queue(Manager *m); unsigned manager_dispatch_dbus_queue(Manager *m); int manager_set_default_environment(Manager *m, char **environment); -int manager_set_default_controllers(Manager *m, char **controllers); int manager_set_default_rlimits(Manager *m, struct rlimit **default_rlimit); int manager_loop(Manager *m); diff --git a/src/core/mount.c b/src/core/mount.c index e21e774d4d..c71d51bfa4 100644 --- a/src/core/mount.c +++ b/src/core/mount.c @@ -82,6 +82,7 @@ static void mount_init(Unit *u) { } kill_context_init(&m->kill_context); + cgroup_context_init(&m->cgroup_context); /* We need to make sure that /bin/mount is always called in * the same process group as us, so that the autofs kernel @@ -127,6 +128,7 @@ static void mount_done(Unit *u) { mount_parameters_done(&m->parameters_proc_self_mountinfo); mount_parameters_done(&m->parameters_fragment); + cgroup_context_done(&m->cgroup_context); exec_context_done(&m->exec_context, manager_is_reloading_or_reexecuting(u->manager)); exec_command_done_array(m->exec_command, _MOUNT_EXEC_COMMAND_MAX); m->control_command = NULL; @@ -651,10 +653,6 @@ static int mount_add_extras(Mount *m) { if (r < 0) return r; - r = unit_add_default_cgroups(u); - if (r < 0) - return r; - r = mount_fix_timeouts(m); if (r < 0) return r; @@ -848,28 +846,31 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) { assert(c); assert(_pid); + unit_realize_cgroup(UNIT(m)); + r = unit_watch_timer(UNIT(m), CLOCK_MONOTONIC, true, m->timeout_usec, &m->timer_watch); if (r < 0) goto fail; - if ((r = exec_spawn(c, - NULL, - &m->exec_context, - NULL, 0, - UNIT(m)->manager->environment, - true, - true, - true, - UNIT(m)->manager->confirm_spawn, - UNIT(m)->cgroup_bondings, - UNIT(m)->cgroup_attributes, - NULL, - UNIT(m)->id, - NULL, - &pid)) < 0) + r = exec_spawn(c, + NULL, + &m->exec_context, + NULL, 0, + UNIT(m)->manager->environment, + true, + true, + true, + UNIT(m)->manager->confirm_spawn, + UNIT(m)->cgroup_mask, + UNIT(m)->cgroup_path, + UNIT(m)->id, + NULL, + &pid); + if (r < 0) goto fail; - if ((r = unit_watch_pid(UNIT(m), pid)) < 0) + r = unit_watch_pid(UNIT(m), pid); + if (r < 0) /* FIXME: we need to do something here */ goto fail; @@ -1878,8 +1879,9 @@ const UnitVTable mount_vtable = { "Mount\0" "Install\0", + .private_section = "Mount", .exec_context_offset = offsetof(Mount, exec_context), - .exec_section = "Mount", + .cgroup_context_offset = offsetof(Mount, cgroup_context), .no_alias = true, .no_instances = true, diff --git a/src/core/mount.h b/src/core/mount.h index bcc10ee0d4..7cd4320d94 100644 --- a/src/core/mount.h +++ b/src/core/mount.h @@ -25,6 +25,8 @@ typedef struct Mount Mount; #include "unit.h" #include "kill.h" +#include "execute.h" +#include "cgroup.h" typedef enum MountState { MOUNT_DEAD, @@ -95,8 +97,10 @@ struct Mount { usec_t timeout_usec; ExecCommand exec_command[_MOUNT_EXEC_COMMAND_MAX]; + ExecContext exec_context; KillContext kill_context; + CGroupContext cgroup_context; MountState state, deserialized_state; diff --git a/src/core/service.c b/src/core/service.c index a0c648a85b..5fdbdb13a3 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -141,6 +141,7 @@ static void service_init(Unit *u) { exec_context_init(&s->exec_context); kill_context_init(&s->kill_context); + cgroup_context_init(&s->cgroup_context); RATELIMIT_INIT(s->start_limit, 10*USEC_PER_SEC, 5); @@ -283,6 +284,7 @@ static void service_done(Unit *u) { free(s->status_text); s->status_text = NULL; + cgroup_context_done(&s->cgroup_context); exec_context_done(&s->exec_context, manager_is_reloading_or_reexecuting(u->manager)); exec_command_free_array(s->exec_command, _SERVICE_EXEC_COMMAND_MAX); s->control_command = NULL; @@ -1229,10 +1231,6 @@ static int service_load(Unit *u) { if (r < 0) return r; - r = unit_add_default_cgroups(u); - if (r < 0) - return r; - #ifdef HAVE_SYSV_COMPAT r = sysv_fix_order(s); if (r < 0) @@ -1457,7 +1455,7 @@ static int service_search_main_pid(Service *s) { assert(s->main_pid <= 0); - pid = cgroup_bonding_search_main_pid_list(UNIT(s)->cgroup_bondings); + pid = unit_search_main_pid(UNIT(s)); if (pid <= 0) return -ENOENT; @@ -1582,7 +1580,7 @@ static void service_set_state(Service *s, ServiceState state) { /* For the inactive states unit_notify() will trim the cgroup, * but for exit we have to do that ourselves... */ if (state == SERVICE_EXITED && UNIT(s)->manager->n_reloading <= 0) - cgroup_bonding_trim_list(UNIT(s)->cgroup_bondings, true); + unit_destroy_cgroup(UNIT(s)); if (old_state != state) log_debug_unit(UNIT(s)->id, @@ -1751,11 +1749,14 @@ static int service_spawn( unsigned n_fds = 0, n_env = 0; _cleanup_strv_free_ char **argv = NULL, **final_env = NULL, **our_env = NULL; + const char *path; assert(s); assert(c); assert(_pid); + unit_realize_cgroup(UNIT(s)); + if (pass_fds || s->exec_context.std_input == EXEC_INPUT_SOCKET || s->exec_context.std_output == EXEC_OUTPUT_SOCKET || @@ -1811,7 +1812,7 @@ static int service_spawn( goto fail; } - if (s->meta.manager->running_as != SYSTEMD_SYSTEM) + if (UNIT(s)->manager->running_as != SYSTEMD_SYSTEM) if (asprintf(our_env + n_env++, "MANAGERPID=%lu", (unsigned long) getpid()) < 0) { r = -ENOMEM; goto fail; @@ -1823,6 +1824,12 @@ static int service_spawn( goto fail; } + if (is_control && UNIT(s)->cgroup_path) { + path = strappenda(UNIT(s)->cgroup_path, "/control"); + cg_create(SYSTEMD_CGROUP_CONTROLLER, path); + } else + path = UNIT(s)->cgroup_path; + r = exec_spawn(c, argv, &s->exec_context, @@ -1832,9 +1839,8 @@ static int service_spawn( apply_chroot, apply_tty_stdin, UNIT(s)->manager->confirm_spawn, - UNIT(s)->cgroup_bondings, - UNIT(s)->cgroup_attributes, - is_control ? "control" : NULL, + UNIT(s)->cgroup_mask, + path, UNIT(s)->id, s->type == SERVICE_IDLE ? UNIT(s)->manager->idle_pipe : NULL, &pid); @@ -1893,7 +1899,10 @@ static int cgroup_good(Service *s) { assert(s); - r = cgroup_bonding_is_empty_list(UNIT(s)->cgroup_bondings); + if (!UNIT(s)->cgroup_path) + return 0; + + r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, UNIT(s)->cgroup_path, true); if (r < 0) return r; @@ -2123,10 +2132,21 @@ fail: service_enter_stop(s, SERVICE_FAILURE_RESOURCES); } +static void service_kill_control_processes(Service *s) { + char *p; + + if (!UNIT(s)->cgroup_path) + return; + + p = strappenda(UNIT(s)->cgroup_path, "/control"); + + cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, p, SIGKILL, true, true, true, NULL); +} + static void service_enter_start(Service *s) { + ExecCommand *c; pid_t pid; int r; - ExecCommand *c; assert(s); @@ -2141,7 +2161,7 @@ static void service_enter_start(Service *s) { /* We want to ensure that nobody leaks processes from * START_PRE here, so let's go on a killing spree, People * should not spawn long running processes from START_PRE. */ - cgroup_bonding_kill_list(UNIT(s)->cgroup_bondings, SIGKILL, true, true, NULL, "control"); + service_kill_control_processes(s); if (s->type == SERVICE_FORKING) { s->control_command_id = SERVICE_EXEC_START; @@ -2217,11 +2237,9 @@ static void service_enter_start_pre(Service *s) { s->control_command = s->exec_command[SERVICE_EXEC_START_PRE]; if (s->control_command) { - /* Before we start anything, let's clear up what might * be left from previous runs. */ - cgroup_bonding_kill_list(UNIT(s)->cgroup_bondings, SIGKILL, - true,true, NULL, "control"); + service_kill_control_processes(s); s->control_command_id = SERVICE_EXEC_START_PRE; @@ -3045,7 +3063,6 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { } } else if (s->control_pid == pid) { - s->control_pid = 0; if (s->control_command) { @@ -3066,8 +3083,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { /* Immediately get rid of the cgroup, so that the * kernel doesn't delay the cgroup empty messages for * the service cgroup any longer than necessary */ - cgroup_bonding_kill_list(UNIT(s)->cgroup_bondings, SIGKILL, - true, true, NULL, "control"); + service_kill_control_processes(s); if (s->control_command && s->control_command->command_next && @@ -3296,7 +3312,7 @@ static void service_timer_event(Unit *u, uint64_t elapsed, Watch* w) { } } -static void service_cgroup_notify_event(Unit *u) { +static void service_notify_cgroup_empty_event(Unit *u) { Service *s = SERVICE(u); assert(u); @@ -3823,8 +3839,9 @@ const UnitVTable service_vtable = { "Service\0" "Install\0", + .private_section = "Service", .exec_context_offset = offsetof(Service, exec_context), - .exec_section = "Service", + .cgroup_context_offset = offsetof(Service, cgroup_context), .init = service_init, .done = service_done, @@ -3857,7 +3874,7 @@ const UnitVTable service_vtable = { .reset_failed = service_reset_failed, - .cgroup_notify_empty = service_cgroup_notify_event, + .notify_cgroup_empty = service_notify_cgroup_empty_event, .notify_message = service_notify_message, .bus_name_owner_change = service_bus_name_owner_change, diff --git a/src/core/service.h b/src/core/service.h index 703d3faa45..182cba1333 100644 --- a/src/core/service.h +++ b/src/core/service.h @@ -135,6 +135,7 @@ struct Service { ExecContext exec_context; KillContext kill_context; + CGroupContext cgroup_context; ServiceState state, deserialized_state; diff --git a/src/core/slice.c b/src/core/slice.c index c1c33fe5c6..df2d91e473 100644 --- a/src/core/slice.c +++ b/src/core/slice.c @@ -36,6 +36,23 @@ static const UnitActiveState state_translation_table[_SLICE_STATE_MAX] = { [SLICE_ACTIVE] = UNIT_ACTIVE }; +static void slice_init(Unit *u) { + Slice *s = SLICE(u); + + assert(u); + assert(u->load_state == UNIT_STUB); + + cgroup_context_init(&s->cgroup_context); +} + +static void slice_done(Unit *u) { + Slice *s = SLICE(u); + + assert(u); + + cgroup_context_done(&s->cgroup_context); +} + static void slice_set_state(Slice *t, SliceState state) { SliceState old_state; assert(t); @@ -52,23 +69,25 @@ static void slice_set_state(Slice *t, SliceState state) { unit_notify(UNIT(t), state_translation_table[old_state], state_translation_table[state], true); } -static int slice_add_slice_link(Slice *s) { +static int slice_add_parent_slice(Slice *s) { char *a, *dash; - int r; Unit *parent; + int r; assert(s); - if (UNIT_DEREF(UNIT(s)->slice)) + if (UNIT_ISSET(UNIT(s)->slice)) return 0; - a = strdupa(UNIT(s)->id); - - dash = strrchr(a, '-'); - if (!dash) + if (unit_has_name(UNIT(s), SPECIAL_ROOT_SLICE)) return 0; - strcpy(dash, ".slice"); + a = strdupa(UNIT(s)->id); + dash = strrchr(a, '-'); + if (dash) + strcpy(dash, ".slice"); + else + a = (char*) SPECIAL_ROOT_SLICE; r = manager_load_unit(UNIT(s)->manager, a, NULL, NULL, &parent); if (r < 0) @@ -102,14 +121,15 @@ static int slice_verify(Slice *s) { a = strdupa(UNIT(s)->id); dash = strrchr(a, '-'); - if (dash) { + if (dash) strcpy(dash, ".slice"); + else + a = (char*) SPECIAL_ROOT_SLICE; - if (!unit_has_name(UNIT_DEREF(UNIT(s)->slice), a)) { - log_error_unit(UNIT(s)->id, - "%s located outside its parent slice. Refusing.", UNIT(s)->id); - return -EINVAL; - } + if (!unit_has_name(UNIT_DEREF(UNIT(s)->slice), a)) { + log_error_unit(UNIT(s)->id, + "%s located outside its parent slice. Refusing.", UNIT(s)->id); + return -EINVAL; } } @@ -122,14 +142,14 @@ static int slice_load(Unit *u) { assert(s); - r = unit_load_fragment_and_dropin(u); + r = unit_load_fragment_and_dropin_optional(u); if (r < 0) return r; /* This is a new unit? Then let's add in some extras */ if (u->load_state == UNIT_LOADED) { - r = slice_add_slice_link(s); + r = slice_add_parent_slice(s); if (r < 0) return r; @@ -138,10 +158,6 @@ static int slice_load(Unit *u) { if (r < 0) return r; } - - r = unit_add_default_cgroups(UNIT(s)); - if (r < 0) - return r; } return slice_verify(s); @@ -168,20 +184,17 @@ static void slice_dump(Unit *u, FILE *f, const char *prefix) { fprintf(f, "%sSlice State: %s\n", prefix, slice_state_to_string(t->state)); + + cgroup_context_dump(&t->cgroup_context, f, prefix); } static int slice_start(Unit *u) { Slice *t = SLICE(u); - int r; assert(t); assert(t->state == SLICE_DEAD); - r = cgroup_bonding_realize_list(u->cgroup_bondings); - if (r < 0) - return r; - - cgroup_attribute_apply_list(u->cgroup_attributes, u->cgroup_bondings); + unit_realize_cgroup(u); slice_set_state(t, SLICE_ACTIVE); return 0; @@ -193,8 +206,8 @@ static int slice_stop(Unit *u) { assert(t); assert(t->state == SLICE_ACTIVE); - /* We do not need to trim the cgroup explicitly, unit_notify() - * will do that for us anyway. */ + /* We do not need to destroy the cgroup explicitly, + * unit_notify() will do that for us anyway. */ slice_set_state(t, SLICE_DEAD); return 0; @@ -264,10 +277,16 @@ const UnitVTable slice_vtable = { "Slice\0" "Install\0", + .private_section = "Slice", + .cgroup_context_offset = offsetof(Slice, cgroup_context), + .no_alias = true, .no_instances = true, + .init = slice_init, .load = slice_load, + .done = slice_done, + .coldplug = slice_coldplug, .dump = slice_dump, @@ -288,11 +307,11 @@ const UnitVTable slice_vtable = { .status_message_formats = { .finished_start_job = { - [JOB_DONE] = "Installed slice %s.", + [JOB_DONE] = "Created slice %s.", [JOB_DEPENDENCY] = "Dependency failed for %s.", }, .finished_stop_job = { - [JOB_DONE] = "Deinstalled slice %s.", + [JOB_DONE] = "Removed slice %s.", }, }, }; diff --git a/src/core/slice.h b/src/core/slice.h index 4320a6354b..ad0c63902b 100644 --- a/src/core/slice.h +++ b/src/core/slice.h @@ -36,6 +36,8 @@ struct Slice { Unit meta; SliceState state, deserialized_state; + + CGroupContext cgroup_context; }; extern const UnitVTable slice_vtable; diff --git a/src/core/socket.c b/src/core/socket.c index 2f25e25aa6..c1bbaec447 100644 --- a/src/core/socket.c +++ b/src/core/socket.c @@ -88,6 +88,7 @@ static void socket_init(Unit *u) { s->exec_context.std_output = u->manager->default_std_output; s->exec_context.std_error = u->manager->default_std_error; kill_context_init(&s->kill_context); + cgroup_context_init(&s->cgroup_context); s->control_command_id = _SOCKET_EXEC_COMMAND_INVALID; } @@ -128,6 +129,8 @@ static void socket_done(Unit *u) { socket_free_ports(s); exec_context_done(&s->exec_context, manager_is_reloading_or_reexecuting(u->manager)); + cgroup_context_init(&s->cgroup_context); + exec_command_free_array(s->exec_command, _SOCKET_EXEC_COMMAND_MAX); s->control_command = NULL; @@ -399,10 +402,6 @@ static int socket_load(Unit *u) { if (r < 0) return r; - r = unit_add_default_cgroups(u); - if (r < 0) - return r; - if (UNIT(s)->default_dependencies) if ((r = socket_add_default_dependencies(s)) < 0) return r; @@ -1210,6 +1209,8 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) { assert(c); assert(_pid); + unit_realize_cgroup(UNIT(s)); + r = unit_watch_timer(UNIT(s), CLOCK_MONOTONIC, true, s->timeout_usec, &s->timer_watch); if (r < 0) goto fail; @@ -1229,9 +1230,8 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) { true, true, UNIT(s)->manager->confirm_spawn, - UNIT(s)->cgroup_bondings, - UNIT(s)->cgroup_attributes, - NULL, + UNIT(s)->cgroup_mask, + UNIT(s)->cgroup_path, UNIT(s)->id, NULL, &pid); @@ -2361,8 +2361,9 @@ const UnitVTable socket_vtable = { "Socket\0" "Install\0", + .private_section = "Socket", .exec_context_offset = offsetof(Socket, exec_context), - .exec_section = "Socket", + .cgroup_context_offset = offsetof(Socket, cgroup_context), .init = socket_init, .done = socket_done, diff --git a/src/core/socket.h b/src/core/socket.h index 9d48cde0a6..15942c1c90 100644 --- a/src/core/socket.h +++ b/src/core/socket.h @@ -102,6 +102,7 @@ struct Socket { ExecCommand* exec_command[_SOCKET_EXEC_COMMAND_MAX]; ExecContext exec_context; KillContext kill_context; + CGroupContext cgroup_context; /* For Accept=no sockets refers to the one service we'll activate. For Accept=yes sockets is either NULL, or filled diff --git a/src/core/special.h b/src/core/special.h index 337a0a43e9..6d252e7baa 100644 --- a/src/core/special.h +++ b/src/core/special.h @@ -118,3 +118,4 @@ #define SPECIAL_SYSTEM_SLICE "system.slice" #define SPECIAL_USER_SLICE "user.slice" #define SPECIAL_MACHINE_SLICE "machine.slice" +#define SPECIAL_ROOT_SLICE "-.slice" diff --git a/src/core/swap.c b/src/core/swap.c index d6721a6b31..0d4b4fa4f9 100644 --- a/src/core/swap.c +++ b/src/core/swap.c @@ -92,6 +92,7 @@ static void swap_init(Unit *u) { s->exec_context.std_output = u->manager->default_std_output; s->exec_context.std_error = u->manager->default_std_error; kill_context_init(&s->kill_context); + cgroup_context_init(&s->cgroup_context); s->parameters_proc_swaps.priority = s->parameters_fragment.priority = -1; @@ -129,6 +130,8 @@ static void swap_done(Unit *u) { exec_command_done_array(s->exec_command, _SWAP_EXEC_COMMAND_MAX); s->control_command = NULL; + cgroup_context_done(&s->cgroup_context); + swap_unwatch_control_pid(s); unit_unwatch_timer(u, &s->timer_watch); @@ -291,10 +294,6 @@ static int swap_load(Unit *u) { if (r < 0) return r; - r = unit_add_default_cgroups(u); - if (r < 0) - return r; - if (UNIT(s)->default_dependencies) { r = swap_add_default_dependencies(s); if (r < 0) @@ -593,6 +592,8 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) { assert(c); assert(_pid); + unit_realize_cgroup(UNIT(s)); + r = unit_watch_timer(UNIT(s), CLOCK_MONOTONIC, true, s->timeout_usec, &s->timer_watch); if (r < 0) goto fail; @@ -606,9 +607,8 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) { true, true, UNIT(s)->manager->confirm_spawn, - UNIT(s)->cgroup_bondings, - UNIT(s)->cgroup_attributes, - NULL, + UNIT(s)->cgroup_mask, + UNIT(s)->cgroup_path, UNIT(s)->id, NULL, &pid); @@ -1327,8 +1327,9 @@ const UnitVTable swap_vtable = { "Swap\0" "Install\0", + .private_section = "Swap", .exec_context_offset = offsetof(Swap, exec_context), - .exec_section = "Swap", + .cgroup_context_offset = offsetof(Swap, cgroup_context), .no_alias = true, .no_instances = true, diff --git a/src/core/swap.h b/src/core/swap.h index 121889d1d5..7e48c0ea3b 100644 --- a/src/core/swap.h +++ b/src/core/swap.h @@ -88,6 +88,7 @@ struct Swap { ExecCommand exec_command[_SWAP_EXEC_COMMAND_MAX]; ExecContext exec_context; KillContext kill_context; + CGroupContext cgroup_context; SwapState state, deserialized_state; diff --git a/src/core/unit.c b/src/core/unit.c index f75045dc48..0dcf85b5e0 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -44,7 +44,6 @@ #include "special.h" #include "cgroup-util.h" #include "missing.h" -#include "cgroup-attr.h" #include "mkdir.h" #include "label.h" #include "fileio-label.h" @@ -402,9 +401,10 @@ void unit_free(Unit *u) { u->manager->n_in_gc_queue--; } - cgroup_bonding_free_list(u->cgroup_bondings, u->manager->n_reloading <= 0); - cgroup_attribute_free_list(u->cgroup_attributes); + if (u->in_cgroup_queue) + LIST_REMOVE(Unit, cgroup_queue, u->manager->cgroup_queue, u); + free(u->cgroup_path); free(u->description); strv_free(u->documentation); free(u->fragment_path); @@ -673,7 +673,10 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) { "%s\tInactive Enter Timestamp: %s\n" "%s\tGC Check Good: %s\n" "%s\tNeed Daemon Reload: %s\n" - "%s\tSlice: %s\n", + "%s\tSlice: %s\n" + "%s\tCGroup: %s\n" + "%s\tCGroup realized: %s\n" + "%s\tCGroup mask: 0x%x\n", prefix, u->id, prefix, unit_description(u), prefix, strna(u->instance), @@ -685,7 +688,10 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) { prefix, strna(format_timestamp(timestamp4, sizeof(timestamp4), u->inactive_enter_timestamp.realtime)), prefix, yes_no(unit_check_gc(u)), prefix, yes_no(unit_need_daemon_reload(u)), - prefix, strna(unit_slice_name(u))); + prefix, strna(unit_slice_name(u)), + prefix, strna(u->cgroup_path), + prefix, yes_no(u->cgroup_realized), + prefix, u->cgroup_mask); SET_FOREACH(t, u->names, i) fprintf(f, "%s\tName: %s\n", prefix, t); @@ -735,8 +741,6 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) { } if (u->load_state == UNIT_LOADED) { - CGroupBonding *b; - CGroupAttribute *a; fprintf(f, "%s\tStopWhenUnneeded: %s\n" @@ -754,20 +758,6 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) { prefix, yes_no(u->ignore_on_isolate), prefix, yes_no(u->ignore_on_snapshot)); - LIST_FOREACH(by_unit, b, u->cgroup_bondings) - fprintf(f, "%s\tControlGroup: %s:%s\n", - prefix, b->controller, b->path); - - LIST_FOREACH(by_unit, a, u->cgroup_attributes) { - _cleanup_free_ char *v = NULL; - - if (a->semantics && a->semantics->map_write) - a->semantics->map_write(a->semantics, a->value, &v); - - fprintf(f, "%s\tControlGroupAttribute: %s %s \"%s\"\n", - prefix, a->controller, a->name, v ? v : a->value); - } - if (UNIT_VTABLE(u)->dump) UNIT_VTABLE(u)->dump(u, f, prefix2); @@ -795,14 +785,16 @@ int unit_load_fragment_and_dropin(Unit *u) { assert(u); /* Load a .service file */ - if ((r = unit_load_fragment(u)) < 0) + r = unit_load_fragment(u); + if (r < 0) return r; if (u->load_state == UNIT_STUB) return -ENOENT; /* Load drop-in directory data */ - if ((r = unit_load_dropin(unit_follow_merge(u))) < 0) + r = unit_load_dropin(unit_follow_merge(u)); + if (r < 0) return r; return 0; @@ -818,14 +810,16 @@ int unit_load_fragment_and_dropin_optional(Unit *u) { * something can be loaded or not doesn't matter. */ /* Load a .service file */ - if ((r = unit_load_fragment(u)) < 0) + r = unit_load_fragment(u); + if (r < 0) return r; if (u->load_state == UNIT_STUB) u->load_state = UNIT_LOADED; /* Load drop-in directory data */ - if ((r = unit_load_dropin(unit_follow_merge(u))) < 0) + r = unit_load_dropin(unit_follow_merge(u)); + if (r < 0) return r; return 0; @@ -880,8 +874,12 @@ static int unit_add_default_dependencies(Unit *u) { return r; } - if (u->default_dependencies && UNIT_ISSET(u->slice)) { - r = unit_add_two_dependencies(u, UNIT_AFTER, UNIT_WANTS, UNIT_DEREF(u->slice), true); + if (u->default_dependencies && unit_get_cgroup_context(u)) { + if (UNIT_ISSET(u->slice)) + r = unit_add_two_dependencies(u, UNIT_AFTER, UNIT_WANTS, UNIT_DEREF(u->slice), true); + else + r = unit_add_two_dependencies_by_name(u, UNIT_AFTER, UNIT_WANTS, SPECIAL_ROOT_SLICE, NULL, true); + if (r < 0) return r; } @@ -1382,7 +1380,7 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su } if (UNIT_IS_INACTIVE_OR_FAILED(ns)) - cgroup_bonding_trim_list(u->cgroup_bondings, true); + unit_destroy_cgroup(u); if (UNIT_IS_INACTIVE_OR_FAILED(os) != UNIT_IS_INACTIVE_OR_FAILED(ns)) { ExecContext *ec = unit_get_exec_context(u); @@ -1952,51 +1950,16 @@ char *unit_dbus_path(Unit *u) { return unit_dbus_path_from_name(u->id); } -static int unit_add_cgroup(Unit *u, CGroupBonding *b) { - int r; - - assert(u); - assert(b); - - assert(b->path); - - if (!b->controller) { - b->controller = strdup(SYSTEMD_CGROUP_CONTROLLER); - if (!b->controller) - return log_oom(); - - b->ours = true; - } - - /* Ensure this hasn't been added yet */ - assert(!b->unit); - - if (streq(b->controller, SYSTEMD_CGROUP_CONTROLLER)) { - CGroupBonding *l; - - l = hashmap_get(u->manager->cgroup_bondings, b->path); - LIST_PREPEND(CGroupBonding, by_path, l, b); - - r = hashmap_replace(u->manager->cgroup_bondings, b->path, l); - if (r < 0) { - LIST_REMOVE(CGroupBonding, by_path, l, b); - return r; - } - } - - LIST_PREPEND(CGroupBonding, by_unit, u->cgroup_bondings, b); - b->unit = u; - - return 0; -} - char *unit_default_cgroup_path(Unit *u) { _cleanup_free_ char *escaped_instance = NULL, *slice = NULL; int r; assert(u); - if (UNIT_ISSET(u->slice)) { + if (unit_has_name(u, SPECIAL_ROOT_SLICE)) + return strdup(u->manager->cgroup_root); + + if (UNIT_ISSET(u->slice) && !unit_has_name(UNIT_DEREF(u->slice), SPECIAL_ROOT_SLICE)) { r = cg_slice_to_path(UNIT_DEREF(u->slice)->id, &slice); if (r < 0) return NULL; @@ -2026,148 +1989,6 @@ char *unit_default_cgroup_path(Unit *u) { escaped_instance, NULL); } -int unit_add_cgroup_from_text(Unit *u, const char *name, bool overwrite, CGroupBonding **ret) { - char *controller = NULL, *path = NULL; - CGroupBonding *b = NULL; - bool ours = false; - int r; - - assert(u); - assert(name); - - r = cg_split_spec(name, &controller, &path); - if (r < 0) - return r; - - if (!path) { - path = unit_default_cgroup_path(u); - ours = true; - } - - if (!controller) { - controller = strdup("systemd"); - ours = true; - } - - if (!path || !controller) { - free(path); - free(controller); - return log_oom(); - } - - if (streq(controller, "systemd")) { - /* Within the systemd unit hierarchy we do not allow changes. */ - if (path_startswith(path, "/system")) { - log_warning_unit(u->id, "Manipulating the systemd:/system cgroup hierarchy is not permitted."); - free(path); - free(controller); - return -EPERM; - } - } - - b = cgroup_bonding_find_list(u->cgroup_bondings, controller); - if (b) { - if (streq(path, b->path)) { - free(path); - free(controller); - - if (ret) - *ret = b; - return 0; - } - - if (overwrite && !b->essential) { - free(controller); - - free(b->path); - b->path = path; - - b->ours = ours; - b->realized = false; - - if (ret) - *ret = b; - - return 1; - } - - r = -EEXIST; - b = NULL; - goto fail; - } - - b = new0(CGroupBonding, 1); - if (!b) { - r = -ENOMEM; - goto fail; - } - - b->controller = controller; - b->path = path; - b->ours = ours; - b->essential = streq(controller, SYSTEMD_CGROUP_CONTROLLER); - - r = unit_add_cgroup(u, b); - if (r < 0) - goto fail; - - if (ret) - *ret = b; - - return 1; - -fail: - free(path); - free(controller); - free(b); - - return r; -} - -static int unit_add_one_default_cgroup(Unit *u, const char *controller) { - CGroupBonding *b = NULL; - int r = -ENOMEM; - - assert(u); - - if (controller && !cg_controller_is_valid(controller, true)) - return -EINVAL; - - if (!controller) - controller = SYSTEMD_CGROUP_CONTROLLER; - - if (cgroup_bonding_find_list(u->cgroup_bondings, controller)) - return 0; - - b = new0(CGroupBonding, 1); - if (!b) - return -ENOMEM; - - b->controller = strdup(controller); - if (!b->controller) - goto fail; - - b->path = unit_default_cgroup_path(u); - if (!b->path) - goto fail; - - b->ours = true; - b->essential = streq(controller, SYSTEMD_CGROUP_CONTROLLER); - - r = unit_add_cgroup(u, b); - if (r < 0) - goto fail; - - return 1; - -fail: - free(b->path); - free(b->controller); - free(b); - - return r; -} - int unit_add_default_slice(Unit *u) { Unit *slice; int r; @@ -2177,10 +1998,10 @@ int unit_add_default_slice(Unit *u) { if (UNIT_ISSET(u->slice)) return 0; - if (u->manager->running_as != SYSTEMD_SYSTEM) + if (!unit_get_cgroup_context(u)) return 0; - r = manager_load_unit(u->manager, SPECIAL_SYSTEM_SLICE, NULL, NULL, &slice); + r = manager_load_unit(u->manager, u->manager->running_as == SYSTEMD_SYSTEM ? SPECIAL_SYSTEM_SLICE : SPECIAL_ROOT_SLICE, NULL, NULL, &slice); if (r < 0) return r; @@ -2197,148 +2018,6 @@ const char *unit_slice_name(Unit *u) { return UNIT_DEREF(u->slice)->id; } -int unit_add_default_cgroups(Unit *u) { - CGroupAttribute *a; - char **c; - int r; - - assert(u); - - /* Adds in the default cgroups, if they weren't specified - * otherwise. */ - - if (!u->manager->cgroup_root) - return 0; - - r = unit_add_one_default_cgroup(u, NULL); - if (r < 0) - return r; - - STRV_FOREACH(c, u->manager->default_controllers) - unit_add_one_default_cgroup(u, *c); - - LIST_FOREACH(by_unit, a, u->cgroup_attributes) - unit_add_one_default_cgroup(u, a->controller); - - return 0; -} - -CGroupBonding* unit_get_default_cgroup(Unit *u) { - assert(u); - - return cgroup_bonding_find_list(u->cgroup_bondings, NULL); -} - -int unit_add_cgroup_attribute( - Unit *u, - const CGroupSemantics *semantics, - const char *controller, - const char *name, - const char *value, - CGroupAttribute **ret) { - - _cleanup_free_ char *c = NULL; - CGroupAttribute *a; - int r; - - assert(u); - assert(value); - - if (semantics) { - /* Semantics always take precedence */ - if (semantics->name) - name = semantics->name; - - if (semantics->controller) - controller = semantics->controller; - } - - if (!name) - return -EINVAL; - - if (!controller) { - r = cg_controller_from_attr(name, &c); - if (r < 0) - return -EINVAL; - - controller = c; - } - - if (!controller || - streq(controller, SYSTEMD_CGROUP_CONTROLLER) || - streq(controller, "systemd")) - return -EINVAL; - - if (!filename_is_safe(name)) - return -EINVAL; - - if (!cg_controller_is_valid(controller, false)) - return -EINVAL; - - /* Check if this attribute already exists. Note that we will - * explicitly check for the value here too, as there are - * attributes which accept multiple values. */ - a = cgroup_attribute_find_list(u->cgroup_attributes, controller, name); - if (a) { - if (streq(value, a->value)) { - /* Exactly the same value is always OK, let's ignore this */ - if (ret) - *ret = a; - - return 0; - } - - if (semantics && !semantics->multiple) { - char *v; - - /* If this is a single-item entry, we can - * simply patch the existing attribute */ - - v = strdup(value); - if (!v) - return -ENOMEM; - - free(a->value); - a->value = v; - - if (ret) - *ret = a; - return 1; - } - } - - a = new0(CGroupAttribute, 1); - if (!a) - return -ENOMEM; - - if (c) { - a->controller = c; - c = NULL; - } else - a->controller = strdup(controller); - - a->name = strdup(name); - a->value = strdup(value); - - if (!a->controller || !a->name || !a->value) { - free(a->controller); - free(a->name); - free(a->value); - free(a); - return -ENOMEM; - } - - a->semantics = semantics; - a->unit = u; - - LIST_PREPEND(CGroupAttribute, by_unit, u->cgroup_attributes, a); - - if (ret) - *ret = a; - - return 1; -} - int unit_load_related_unit(Unit *u, const char *type, Unit **_found) { _cleanup_free_ char *t = NULL; int r; @@ -2804,7 +2483,7 @@ int unit_kill_common( if (kill(main_pid, signo) < 0) r = -errno; - if (who == KILL_ALL) { + if (who == KILL_ALL && u->cgroup_path) { _cleanup_set_free_ Set *pid_set = NULL; int q; @@ -2825,7 +2504,7 @@ int unit_kill_common( return q; } - q = cgroup_bonding_kill_list(u->cgroup_bondings, signo, false, false, pid_set, NULL); + q = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, signo, false, true, false, pid_set); if (q < 0 && q != -EAGAIN && q != -ESRCH && q != -ENOENT) r = q; } @@ -2924,7 +2603,6 @@ int unit_exec_context_defaults(Unit *u, ExecContext *c) { assert(c); /* This only copies in the ones that need memory */ - for (i = 0; i < RLIMIT_NLIMITS; i++) if (u->manager->rlimit[i] && !c->rlimit[i]) { c->rlimit[i] = newdup(struct rlimit, u->manager->rlimit[i], 1); @@ -2954,6 +2632,16 @@ ExecContext *unit_get_exec_context(Unit *u) { return (ExecContext*) ((uint8_t*) u + offset); } +CGroupContext *unit_get_cgroup_context(Unit *u) { + size_t offset; + + offset = UNIT_VTABLE(u)->cgroup_context_offset; + if (offset <= 0) + return NULL; + + return (CGroupContext*) ((uint8_t*) u + offset); +} + static int drop_in_file(Unit *u, bool runtime, const char *name, char **_p, char **_q) { char *p, *q; int r; @@ -3072,7 +2760,7 @@ int unit_kill_context( wait_for_exit = true; } - if (c->kill_mode == KILL_CONTROL_GROUP) { + if (c->kill_mode == KILL_CONTROL_GROUP && u->cgroup_path) { _cleanup_set_free_ Set *pid_set = NULL; pid_set = set_new(trivial_hash_func, trivial_compare_func); @@ -3092,7 +2780,7 @@ int unit_kill_context( return r; } - r = cgroup_bonding_kill_list(u->cgroup_bondings, sig, true, false, pid_set, NULL); + r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, sig, true, true, false, pid_set); if (r < 0) { if (r != -EAGAIN && r != -ESRCH && r != -ENOENT) log_warning_unit(u->id, "Failed to kill control group: %s", strerror(-r)); diff --git a/src/core/unit.h b/src/core/unit.h index da52101bd2..fbcaabe167 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -37,10 +37,10 @@ typedef struct UnitStatusMessageFormats UnitStatusMessageFormats; #include "list.h" #include "socket-util.h" #include "execute.h" +#include "cgroup.h" #include "condition.h" #include "install.h" #include "unit-name.h" -#include "cgroup-semantics.h" enum UnitActiveState { UNIT_ACTIVE, @@ -115,8 +115,6 @@ enum UnitDependency { #include "manager.h" #include "job.h" -#include "cgroup.h" -#include "cgroup-attr.h" struct UnitRef { /* Keeps tracks of references to a unit. This is useful so @@ -174,8 +172,9 @@ struct Unit { dual_timestamp inactive_enter_timestamp; /* Counterparts in the cgroup filesystem */ - CGroupBonding *cgroup_bondings; - CGroupAttribute *cgroup_attributes; + char *cgroup_path; + bool cgroup_realized; + CGroupControllerMask cgroup_mask; UnitRef slice; @@ -197,6 +196,9 @@ struct Unit { /* GC queue */ LIST_FIELDS(Unit, gc_queue); + /* CGroup realize members queue */ + LIST_FIELDS(Unit, cgroup_queue); + /* Used during GC sweeps */ unsigned gc_marker; @@ -243,6 +245,7 @@ struct Unit { bool in_dbus_queue:1; bool in_cleanup_queue:1; bool in_gc_queue:1; + bool in_cgroup_queue:1; bool sent_dbus_new_signal:1; @@ -277,8 +280,12 @@ struct UnitVTable { * ExecContext is found, if the unit type has that */ size_t exec_context_offset; - /* The name of the section with the exec settings of ExecContext */ - const char *exec_section; + /* If greater than 0, the offset into the object where + * CGroupContext is found, if the unit type has that */ + size_t cgroup_context_offset; + + /* The name of the configuration file section with the private settings of this unit*/ + const char *private_section; /* Config file sections this unit type understands, separated * by NUL chars */ @@ -350,7 +357,7 @@ struct UnitVTable { /* Called whenever any of the cgroups this unit watches for * ran empty */ - void (*cgroup_notify_empty)(Unit *u); + void (*notify_cgroup_empty)(Unit *u); /* Called whenever a process of this unit sends us a message */ void (*notify_message)(Unit *u, pid_t pid, char **tags); @@ -454,11 +461,6 @@ int unit_add_two_dependencies_by_name_inverse(Unit *u, UnitDependency d, UnitDep int unit_add_exec_dependencies(Unit *u, ExecContext *c); -int unit_add_cgroup_from_text(Unit *u, const char *name, bool overwrite, CGroupBonding **ret); -int unit_add_default_cgroups(Unit *u); -CGroupBonding* unit_get_default_cgroup(Unit *u); -int unit_add_cgroup_attribute(Unit *u, const CGroupSemantics *semantics, const char *controller, const char *name, const char *value, CGroupAttribute **ret); - int unit_choose_id(Unit *u, const char *name); int unit_set_description(Unit *u, const char *description); @@ -573,6 +575,7 @@ int unit_add_mount_links(Unit *u); int unit_exec_context_defaults(Unit *u, ExecContext *c); ExecContext *unit_get_exec_context(Unit *u) _pure_; +CGroupContext *unit_get_cgroup_context(Unit *u) _pure_; int unit_write_drop_in(Unit *u, bool runtime, const char *name, const char *data); int unit_remove_drop_in(Unit *u, bool runtime, const char *name); diff --git a/src/login/logind-machine.c b/src/login/logind-machine.c index 347e5aa022..0b35a9e2d0 100644 --- a/src/login/logind-machine.c +++ b/src/login/logind-machine.c @@ -223,7 +223,7 @@ static int machine_create_one_group(Machine *m, const char *controller, const ch r = -EINVAL; if (r < 0) { - r = cg_create(controller, path, NULL); + r = cg_create(controller, path); if (r < 0) return r; } diff --git a/src/login/logind-session.c b/src/login/logind-session.c index aba517d1f7..760425329b 100644 --- a/src/login/logind-session.c +++ b/src/login/logind-session.c @@ -472,12 +472,12 @@ static int session_create_one_group(Session *s, const char *controller, const ch r = -EINVAL; if (r < 0) { - r = cg_create(controller, path, NULL); + r = cg_create(controller, path); if (r < 0) return r; } - r = cg_set_task_access(controller, path, 0644, s->user->uid, s->user->gid, -1); + r = cg_set_task_access(controller, path, 0644, s->user->uid, s->user->gid); if (r >= 0) r = cg_set_group_access(controller, path, 0755, s->user->uid, s->user->gid); diff --git a/src/login/logind-user.c b/src/login/logind-user.c index fb0c9b75d7..9f7b924a24 100644 --- a/src/login/logind-user.c +++ b/src/login/logind-user.c @@ -349,7 +349,7 @@ static int user_create_cgroup(User *u) { return log_oom(); } - r = cg_create(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL); + r = cg_create(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path); if (r < 0) { log_error("Failed to create cgroup "SYSTEMD_CGROUP_CONTROLLER":%s: %s", u->cgroup_path, strerror(-r)); return r; @@ -360,7 +360,7 @@ static int user_create_cgroup(User *u) { if (strv_contains(u->manager->reset_controllers, *k)) continue; - r = cg_create(*k, u->cgroup_path, NULL); + r = cg_create(*k, u->cgroup_path); if (r < 0) log_warning("Failed to create cgroup %s:%s: %s", *k, u->cgroup_path, strerror(-r)); } diff --git a/src/shared/cgroup-label.c b/src/shared/cgroup-label.c index 5b5163c250..574a7be3ee 100644 --- a/src/shared/cgroup-label.c +++ b/src/shared/cgroup-label.c @@ -36,15 +36,18 @@ #include "util.h" #include "mkdir.h" -int cg_create(const char *controller, const char *path, const char *suffix) { +/* This is split out since it needs label calls, either directly or + * indirectly. */ + +int cg_create(const char *controller, const char *path) { _cleanup_free_ char *fs = NULL; int r; - r = cg_get_path_and_check(controller, path, suffix, &fs); + r = cg_get_path_and_check(controller, path, NULL, &fs); if (r < 0) return r; - r = mkdir_parents_label(fs, 0755); + r = mkdir_parents_prefix("/sys/fs/cgroup", fs, 0755); if (r < 0) return r; @@ -64,7 +67,7 @@ int cg_create_and_attach(const char *controller, const char *path, pid_t pid) { assert(pid >= 0); - r = cg_create(controller, path, NULL); + r = cg_create(controller, path); if (r < 0) return r; diff --git a/src/shared/cgroup-show.c b/src/shared/cgroup-show.c index 83cc0731b8..e971f36190 100644 --- a/src/shared/cgroup-show.c +++ b/src/shared/cgroup-show.c @@ -241,7 +241,6 @@ static int show_extra_pids(const char *controller, const char *path, const char unsigned i, j; int r; - assert(controller); assert(path); if (n_pids <= 0) diff --git a/src/shared/cgroup-util.c b/src/shared/cgroup-util.c index 9cbc64a541..5816b7d4d6 100644 --- a/src/shared/cgroup-util.c +++ b/src/shared/cgroup-util.c @@ -132,7 +132,7 @@ int cg_read_subgroup(DIR *d, char **fn) { return 0; } -int cg_rmdir(const char *controller, const char *path, bool honour_sticky) { +int cg_rmdir(const char *controller, const char *path) { _cleanup_free_ char *p = NULL; int r; @@ -140,34 +140,6 @@ int cg_rmdir(const char *controller, const char *path, bool honour_sticky) { if (r < 0) return r; - if (honour_sticky) { - char *fn; - - /* If the sticky bit is set on cgroup.procs, don't - * remove the directory */ - - fn = strappend(p, "/cgroup.procs"); - if (!fn) - return -ENOMEM; - - r = file_is_priv_sticky(fn); - free(fn); - - if (r > 0) - return 0; - - /* Compatibility ... */ - fn = strappend(p, "/tasks"); - if (!fn) - return -ENOMEM; - - r = file_is_priv_sticky(fn); - free(fn); - - if (r > 0) - return 0; - } - r = rmdir(p); if (r < 0 && errno != ENOENT) return -errno; @@ -298,7 +270,7 @@ int cg_kill_recursive(const char *controller, const char *path, int sig, bool si ret = r; if (rem) { - r = cg_rmdir(controller, path, true); + r = cg_rmdir(controller, path); if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY) return r; } @@ -407,7 +379,14 @@ int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char return ret; } -int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self, bool rem) { +int cg_migrate_recursive( + const char *cfrom, + const char *pfrom, + const char *cto, + const char *pto, + bool ignore_self, + bool rem) { + _cleanup_closedir_ DIR *d = NULL; int r, ret = 0; char *fn; @@ -448,7 +427,7 @@ int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, ret = r; if (rem) { - r = cg_rmdir(cfrom, pfrom, true); + r = cg_rmdir(cfrom, pfrom); if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY) return r; } @@ -558,8 +537,9 @@ int cg_get_path_and_check(const char *controller, const char *path, const char * } static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) { - char *p; - bool is_sticky; + assert(path); + assert(sb); + assert(ftwbuf); if (typeflag != FTW_DP) return 0; @@ -567,31 +547,6 @@ static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct if (ftwbuf->level < 1) return 0; - p = strappend(path, "/cgroup.procs"); - if (!p) { - errno = ENOMEM; - return 1; - } - - is_sticky = file_is_priv_sticky(p) > 0; - free(p); - - if (is_sticky) - return 0; - - /* Compatibility */ - p = strappend(path, "/tasks"); - if (!p) { - errno = ENOMEM; - return 1; - } - - is_sticky = file_is_priv_sticky(p) > 0; - free(p); - - if (is_sticky) - return 0; - rmdir(path); return 0; } @@ -611,28 +566,8 @@ int cg_trim(const char *controller, const char *path, bool delete_root) { r = errno ? -errno : -EIO; if (delete_root) { - bool is_sticky; - char *p; - - p = strappend(fs, "/cgroup.procs"); - if (!p) - return -ENOMEM; - - is_sticky = file_is_priv_sticky(p) > 0; - free(p); - - if (!is_sticky) { - p = strappend(fs, "/tasks"); - if (!p) - return -ENOMEM; - - is_sticky = file_is_priv_sticky(p) > 0; - free(p); - } - - if (!is_sticky) - if (rmdir(fs) < 0 && errno != ENOENT && r == 0) - return -errno; + if (rmdir(fs) < 0 && errno != ENOENT) + return -errno; } return r; @@ -699,15 +634,14 @@ int cg_set_task_access( const char *path, mode_t mode, uid_t uid, - gid_t gid, - int sticky) { + gid_t gid) { _cleanup_free_ char *fs = NULL, *procs = NULL; int r; assert(path); - if (mode == (mode_t) -1 && uid == (uid_t) -1 && gid == (gid_t) -1 && sticky < 0) + if (mode == (mode_t) -1 && uid == (uid_t) -1 && gid == (gid_t) -1) return 0; if (mode != (mode_t) -1) @@ -717,28 +651,6 @@ int cg_set_task_access( if (r < 0) return r; - if (sticky >= 0 && mode != (mode_t) -1) - /* Both mode and sticky param are passed */ - mode |= (sticky ? S_ISVTX : 0); - else if ((sticky >= 0 && mode == (mode_t) -1) || - (mode != (mode_t) -1 && sticky < 0)) { - struct stat st; - - /* Only one param is passed, hence read the current - * mode from the file itself */ - - r = lstat(fs, &st); - if (r < 0) - return -errno; - - if (mode == (mode_t) -1) - /* No mode set, we just shall set the sticky bit */ - mode = (st.st_mode & ~S_ISVTX) | (sticky ? S_ISVTX : 0); - else - /* Only mode set, leave sticky bit untouched */ - mode = (st.st_mode & ~0777) | mode; - } - r = chmod_and_chown(fs, mode, uid, gid); if (r < 0) return r; @@ -1688,3 +1600,148 @@ int cg_slice_to_path(const char *unit, char **ret) { return 0; } + +int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) { + _cleanup_free_ char *p = NULL; + int r; + + r = cg_get_path(controller, path, attribute, &p); + if (r < 0) + return r; + + return write_string_file(p, value); +} + +static const char mask_names[] = + "cpu\0" + "cpuacct\0" + "blkio\0" + "memory\0" + "devices\0"; + +int cg_create_with_mask(CGroupControllerMask mask, const char *path) { + CGroupControllerMask bit = 1; + const char *n; + int r; + + /* This one will create a cgroup in our private tree, but also + * duplicate it in the trees specified in mask, and remove it + * in all others */ + + /* First create the cgroup in our own hierarchy. */ + r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path); + if (r < 0) + return r; + + /* Then, do the same in the other hierarchies */ + NULSTR_FOREACH(n, mask_names) { + if (bit & mask) + cg_create(n, path); + else + cg_trim(n, path, true); + + bit <<= 1; + } + + return r; +} + +int cg_attach_with_mask(CGroupControllerMask mask, const char *path, pid_t pid) { + CGroupControllerMask bit = 1; + const char *n; + int r; + + r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid); + + NULSTR_FOREACH(n, mask_names) { + if (bit & mask) + cg_attach(n, path, pid); + else { + char prefix[strlen(path) + 1], *slash; + + /* OK, this one is a bit harder... Now we need + * to add to the closest parent cgroup we + * can find */ + strcpy(prefix, path); + while ((slash = strrchr(prefix, '/'))) { + int q; + *slash = 0; + + q = cg_attach(n, prefix, pid); + if (q >= 0) + break; + } + } + + bit <<= 1; + } + + return r; +} + +int cg_migrate_with_mask(CGroupControllerMask mask, const char *from, const char *to) { + CGroupControllerMask bit = 1; + const char *n; + int r; + + if (path_equal(from, to)) + return 0; + + r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true); + + NULSTR_FOREACH(n, mask_names) { + if (bit & mask) + cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, to, n, to, false, false); + else { + char prefix[strlen(to) + 1], *slash; + + strcpy(prefix, to); + while ((slash = strrchr(prefix, '/'))) { + int q; + + *slash = 0; + + q = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, to, n, prefix, false, false); + if (q >= 0) + break; + } + } + + bit <<= 1; + } + + return r; +} + +int cg_trim_with_mask(CGroupControllerMask mask, const char *path, bool delete_root) { + CGroupControllerMask bit = 1; + const char *n; + int r; + + r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root); + if (r < 0) + return r; + + NULSTR_FOREACH(n, mask_names) { + if (bit & mask) + cg_trim(n, path, delete_root); + + bit <<= 1; + } + + return r; +} + +CGroupControllerMask cg_mask_supported(void) { + CGroupControllerMask bit = 1, mask = 0; + const char *n; + + NULSTR_FOREACH(n, mask_names) { + if (check_hierarchy(n) >= 0) + mask |= bit; + + bit <<= 1; + } + + return mask; +} diff --git a/src/shared/cgroup-util.h b/src/shared/cgroup-util.h index 2d00bb3fff..9883d941c2 100644 --- a/src/shared/cgroup-util.h +++ b/src/shared/cgroup-util.h @@ -28,6 +28,15 @@ #include "set.h" #include "def.h" +/* A bit mask of well known cgroup controllers */ +typedef enum CGroupControllerMask { + CGROUP_CPU = 1, + CGROUP_CPUACCT = 2, + CGROUP_BLKIO = 4, + CGROUP_MEMORY = 8, + CGROUP_DEVICE = 16 +} CGroupControllerMask; + /* * General rules: * @@ -67,15 +76,17 @@ int cg_pid_get_path(const char *controller, pid_t pid, char **path); int cg_trim(const char *controller, const char *path, bool delete_root); -int cg_rmdir(const char *controller, const char *path, bool honour_sticky); +int cg_rmdir(const char *controller, const char *path); int cg_delete(const char *controller, const char *path); -int cg_create(const char *controller, const char *path, const char *suffix); +int cg_create(const char *controller, const char *path); int cg_attach(const char *controller, const char *path, pid_t pid); int cg_create_and_attach(const char *controller, const char *path, pid_t pid); +int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value); + int cg_set_group_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid); -int cg_set_task_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid, int sticky); +int cg_set_task_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid); int cg_install_release_agent(const char *controller, const char *agent); @@ -113,3 +124,10 @@ char *cg_unescape(const char *p) _pure_; bool cg_controller_is_valid(const char *p, bool allow_named); int cg_slice_to_path(const char *unit, char **ret); + +int cg_create_with_mask(CGroupControllerMask mask, const char *path); +int cg_attach_with_mask(CGroupControllerMask mask, const char *path, pid_t pid); +int cg_migrate_with_mask(CGroupControllerMask mask, const char *from, const char *to); +int cg_trim_with_mask(CGroupControllerMask mask, const char *path, bool delete_root); + +CGroupControllerMask cg_mask_supported(void); diff --git a/src/shared/fileio.c b/src/shared/fileio.c index ad068bf30d..dc13c9ee63 100644 --- a/src/shared/fileio.c +++ b/src/shared/fileio.c @@ -24,7 +24,6 @@ #include "util.h" #include "strv.h" - int write_string_to_file(FILE *f, const char *line) { errno = 0; fputs(line, f); diff --git a/src/shared/mkdir.c b/src/shared/mkdir.c index 0e51b64f69..e21a0f3989 100644 --- a/src/shared/mkdir.c +++ b/src/shared/mkdir.c @@ -26,15 +26,16 @@ #include <stdlib.h> #include <stdio.h> -#include "mkdir.h" #include "label.h" #include "util.h" +#include "path-util.h" +#include "mkdir.h" int mkdir_label(const char *path, mode_t mode) { return label_mkdir(path, mode, true); } -static int makedir_safe(const char *path, mode_t mode, uid_t uid, gid_t gid, bool apply) { +static int mkdir_safe_internal(const char *path, mode_t mode, uid_t uid, gid_t gid, bool apply) { struct stat st; if (label_mkdir(path, mode, apply) >= 0) @@ -56,36 +57,50 @@ static int makedir_safe(const char *path, mode_t mode, uid_t uid, gid_t gid, boo } int mkdir_safe(const char *path, mode_t mode, uid_t uid, gid_t gid) { - return makedir_safe(path, mode, uid, gid, false); + return mkdir_safe_internal(path, mode, uid, gid, false); } int mkdir_safe_label(const char *path, mode_t mode, uid_t uid, gid_t gid) { - return makedir_safe(path, mode, uid, gid, true); + return mkdir_safe_internal(path, mode, uid, gid, true); } -static int makedir_parents(const char *path, mode_t mode, bool apply) { +static int is_dir(const char* path) { struct stat st; + + if (stat(path, &st) < 0) + return -errno; + + return S_ISDIR(st.st_mode); +} + +static int mkdir_parents_internal(const char *prefix, const char *path, mode_t mode, bool apply) { const char *p, *e; + int r; assert(path); + if (prefix && !path_startswith(path, prefix)) + return -ENOTDIR; + /* return immediately if directory exists */ e = strrchr(path, '/'); if (!e) return -EINVAL; + + if (e == path) + return 0; + p = strndupa(path, e - path); - if (stat(p, &st) >= 0) { - if ((st.st_mode & S_IFMT) == S_IFDIR) - return 0; - else - return -ENOTDIR; - } + r = is_dir(p); + if (r > 0) + return 0; + if (r == 0) + return -ENOTDIR; /* create every parent directory in the path, except the last component */ p = path + strspn(path, "/"); for (;;) { - int r; - char *t; + char t[strlen(path) + 1]; e = p + strcspn(p, "/"); p = e + strspn(e, "/"); @@ -95,39 +110,36 @@ static int makedir_parents(const char *path, mode_t mode, bool apply) { if (*p == 0) return 0; - t = strndup(path, e - path); - if (!t) - return -ENOMEM; + memcpy(t, path, e - path); + t[e-path] = 0; - r = label_mkdir(t, mode, apply); - free(t); + if (prefix && path_startswith(prefix, t)) + continue; + r = label_mkdir(t, mode, apply); if (r < 0 && errno != EEXIST) return -errno; } } int mkdir_parents(const char *path, mode_t mode) { - return makedir_parents(path, mode, false); + return mkdir_parents_internal(NULL, path, mode, false); } int mkdir_parents_label(const char *path, mode_t mode) { - return makedir_parents(path, mode, true); + return mkdir_parents_internal(NULL, path, mode, true); } -static int is_dir(const char* path) { - struct stat st; - if (stat(path, &st) < 0) - return -errno; - return S_ISDIR(st.st_mode); +int mkdir_parents_prefix(const char *prefix, const char *path, mode_t mode) { + return mkdir_parents_internal(prefix, path, mode, true); } -static int makedir_p(const char *path, mode_t mode, bool apply) { +static int mkdir_p_internal(const char *prefix, const char *path, mode_t mode, bool apply) { int r; /* Like mkdir -p */ - r = makedir_parents(path, mode, apply); + r = mkdir_parents_internal(prefix, path, mode, apply); if (r < 0) return r; @@ -139,9 +151,13 @@ static int makedir_p(const char *path, mode_t mode, bool apply) { } int mkdir_p(const char *path, mode_t mode) { - return makedir_p(path, mode, false); + return mkdir_p_internal(NULL, path, mode, false); } int mkdir_p_label(const char *path, mode_t mode) { - return makedir_p(path, mode, true); + return mkdir_p_internal(NULL, path, mode, true); +} + +int mkdir_p_prefix(const char *prefix, const char *path, mode_t mode) { + return mkdir_p_internal(prefix, path, mode, false); } diff --git a/src/shared/mkdir.h b/src/shared/mkdir.h index ce1c35e9ba..3d39b2910f 100644 --- a/src/shared/mkdir.h +++ b/src/shared/mkdir.h @@ -22,11 +22,19 @@ along with systemd; If not, see <http://www.gnu.org/licenses/>. ***/ +#include <sys/types.h> + int mkdir_label(const char *path, mode_t mode); + int mkdir_safe(const char *path, mode_t mode, uid_t uid, gid_t gid); int mkdir_safe_label(const char *path, mode_t mode, uid_t uid, gid_t gid); + int mkdir_parents(const char *path, mode_t mode); int mkdir_parents_label(const char *path, mode_t mode); +int mkdir_parents_prefix(const char *prefix, const char *path, mode_t mode); + int mkdir_p(const char *path, mode_t mode); int mkdir_p_label(const char *path, mode_t mode); +int mkdir_p_prefix(const char *prefix, const char *path, mode_t mode); + #endif diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c index 24543ee06d..a4f8f2326e 100644 --- a/src/systemctl/systemctl.c +++ b/src/systemctl/systemctl.c @@ -2378,150 +2378,6 @@ static int kill_unit(DBusConnection *bus, char **args) { return 0; } -static int set_cgroup(DBusConnection *bus, char **args) { - _cleanup_free_ char *n = NULL; - const char *method, *runtime; - char **argument; - int r; - - assert(bus); - assert(args); - - method = - streq(args[0], "set-cgroup") ? "SetUnitControlGroup" : - streq(args[0], "unset-cgroup") ? "UnsetUnitControlGroup" - : "UnsetUnitControlGroupAttribute"; - - runtime = arg_runtime ? "runtime" : "persistent"; - - n = unit_name_mangle(args[1]); - if (!n) - return log_oom(); - - STRV_FOREACH(argument, args + 2) { - - r = bus_method_call_with_reply( - bus, - "org.freedesktop.systemd1", - "/org/freedesktop/systemd1", - "org.freedesktop.systemd1.Manager", - method, - NULL, - NULL, - DBUS_TYPE_STRING, &n, - DBUS_TYPE_STRING, argument, - DBUS_TYPE_STRING, &runtime, - DBUS_TYPE_INVALID); - if (r < 0) - return r; - } - - return 0; -} - -static int set_cgroup_attr(DBusConnection *bus, char **args) { - _cleanup_dbus_message_unref_ DBusMessage *m = NULL, *reply = NULL; - DBusError error; - DBusMessageIter iter; - _cleanup_free_ char *n = NULL; - const char *runtime; - int r; - - assert(bus); - assert(args); - - dbus_error_init(&error); - - runtime = arg_runtime ? "runtime" : "persistent"; - - n = unit_name_mangle(args[1]); - if (!n) - return log_oom(); - - m = dbus_message_new_method_call( - "org.freedesktop.systemd1", - "/org/freedesktop/systemd1", - "org.freedesktop.systemd1.Manager", - "SetUnitControlGroupAttribute"); - if (!m) - return log_oom(); - - dbus_message_iter_init_append(m, &iter); - if (!dbus_message_iter_append_basic(&iter, DBUS_TYPE_STRING, &n) || - !dbus_message_iter_append_basic(&iter, DBUS_TYPE_STRING, &args[2])) - return log_oom(); - - r = bus_append_strv_iter(&iter, args + 3); - if (r < 0) - return log_oom(); - - if (!dbus_message_iter_append_basic(&iter, DBUS_TYPE_STRING, &runtime)) - return log_oom(); - - reply = dbus_connection_send_with_reply_and_block(bus, m, -1, &error); - if (!reply) { - log_error("Failed to issue method call: %s", bus_error_message(&error)); - dbus_error_free(&error); - return -EIO; - } - - return 0; -} - -static int get_cgroup_attr(DBusConnection *bus, char **args) { - _cleanup_dbus_message_unref_ DBusMessage *reply = NULL; - _cleanup_free_ char *n = NULL; - char **argument; - int r; - - assert(bus); - assert(args); - - n = unit_name_mangle(args[1]); - if (!n) - return log_oom(); - - STRV_FOREACH(argument, args + 2) { - _cleanup_strv_free_ char **list = NULL; - DBusMessageIter iter; - char **a; - - r = bus_method_call_with_reply( - bus, - "org.freedesktop.systemd1", - "/org/freedesktop/systemd1", - "org.freedesktop.systemd1.Manager", - "GetUnitControlGroupAttribute", - &reply, - NULL, - DBUS_TYPE_STRING, &n, - DBUS_TYPE_STRING, argument, - DBUS_TYPE_INVALID); - if (r < 0) - return r; - - if (!dbus_message_iter_init(reply, &iter)) { - log_error("Failed to initialize iterator."); - return -EIO; - } - - r = bus_parse_strv_iter(&iter, &list); - if (r < 0) { - log_error("Failed to parse value list."); - return r; - } - - STRV_FOREACH(a, list) { - if (endswith(*a, "\n")) - fputs(*a, stdout); - else - puts(*a); - } - } - - return 0; -} - typedef struct ExecStatusInfo { char *name; @@ -2639,7 +2495,7 @@ typedef struct UnitStatusInfo { const char *fragment_path; const char *source_path; - const char *default_control_group; + const char *control_group; char **dropin_paths; @@ -2922,11 +2778,11 @@ static void print_status_info(UnitStatusInfo *i) { if (i->status_text) printf(" Status: \"%s\"\n", i->status_text); - if (i->default_control_group && - (i->main_pid > 0 || i->control_pid > 0 || cg_is_empty_by_spec(i->default_control_group, false) == 0)) { + if (i->control_group && + (i->main_pid > 0 || i->control_pid > 0 || cg_is_empty_by_spec(i->control_group, false) == 0)) { unsigned c; - printf(" CGroup: %s\n", i->default_control_group); + printf(" CGroup: %s\n", i->control_group); if (arg_transport != TRANSPORT_SSH) { unsigned k = 0; @@ -2945,7 +2801,7 @@ static void print_status_info(UnitStatusInfo *i) { if (i->control_pid > 0) extra[k++] = i->control_pid; - show_cgroup_and_extra_by_spec(i->default_control_group, prefix, + show_cgroup_and_extra_by_spec(i->control_group, prefix, c, false, extra, k, flags); } } @@ -3054,8 +2910,12 @@ static int status_property(const char *name, DBusMessageIter *iter, UnitStatusIn i->fragment_path = s; else if (streq(name, "SourcePath")) i->source_path = s; +#ifndef LEGACY else if (streq(name, "DefaultControlGroup")) - i->default_control_group = s; + i->control_group = s; +#endif + else if (streq(name, "ControlGroup")) + i->control_group = s; else if (streq(name, "StatusText")) i->status_text = s; else if (streq(name, "PIDFile")) @@ -3457,8 +3317,44 @@ static int print_property(const char *name, DBusMessageIter *iter) { } return 0; + + } else if (dbus_message_iter_get_element_type(iter) == DBUS_TYPE_STRUCT && streq(name, "DeviceAllow")) { + DBusMessageIter sub, sub2; + + dbus_message_iter_recurse(iter, &sub); + while (dbus_message_iter_get_arg_type(&sub) == DBUS_TYPE_STRUCT) { + const char *path, *rwm; + + dbus_message_iter_recurse(&sub, &sub2); + + if (bus_iter_get_basic_and_next(&sub2, DBUS_TYPE_STRING, &path, true) >= 0 && + bus_iter_get_basic_and_next(&sub2, DBUS_TYPE_STRING, &rwm, false) >= 0) + printf("%s=%s %s\n", name, strna(path), strna(rwm)); + + dbus_message_iter_next(&sub); + } + return 0; + + } else if (dbus_message_iter_get_element_type(iter) == DBUS_TYPE_STRUCT && (streq(name, "BlockIOReadBandwidth") || streq(name, "BlockIOWriteBandwidth"))) { + DBusMessageIter sub, sub2; + + dbus_message_iter_recurse(iter, &sub); + while (dbus_message_iter_get_arg_type(&sub) == DBUS_TYPE_STRUCT) { + const char *path; + uint64_t bandwidth; + + dbus_message_iter_recurse(&sub, &sub2); + + if (bus_iter_get_basic_and_next(&sub2, DBUS_TYPE_STRING, &path, true) >= 0 && + bus_iter_get_basic_and_next(&sub2, DBUS_TYPE_UINT64, &bandwidth, false) >= 0) + printf("%s=%s %" PRIu64 "\n", name, strna(path), bandwidth); + + dbus_message_iter_next(&sub); + } + return 0; } + break; } @@ -4667,14 +4563,6 @@ static int systemctl_help(void) { " help [NAME...|PID...] Show manual for one or more units\n" " reset-failed [NAME...] Reset failed state for all, one, or more\n" " units\n" - " get-cgroup-attr [NAME] [ATTR] ...\n" - " Get control group attrubute\n" - " set-cgroup-attr [NAME] [ATTR] [VALUE] ...\n" - " Set control group attribute\n" - " unset-cgroup-attr [NAME] [ATTR...]\n" - " Unset control group attribute\n" - " set-cgroup [NAME] [CGROUP...] Add unit to a control group\n" - " unset-cgroup [NAME] [CGROUP...] Remove unit from a control group\n" " load [NAME...] Load one or more units\n" " list-dependencies [NAME] Recursively show units which are required\n" " or wanted by this unit or by which this\n" @@ -5711,11 +5599,6 @@ static int systemctl_main(DBusConnection *bus, int argc, char *argv[], DBusError { "condreload", MORE, 2, start_unit }, /* For compatibility with ALTLinux */ { "condrestart", MORE, 2, start_unit }, /* For compatibility with RH */ { "isolate", EQUAL, 2, start_unit }, - { "set-cgroup", MORE, 3, set_cgroup }, - { "unset-cgroup", MORE, 3, set_cgroup }, - { "get-cgroup-attr", MORE, 3, get_cgroup_attr }, - { "set-cgroup-attr", MORE, 4, set_cgroup_attr }, - { "unset-cgroup-attr", MORE, 3, set_cgroup }, { "kill", MORE, 2, kill_unit }, { "is-active", MORE, 2, check_unit_active }, { "check", MORE, 2, check_unit_active }, diff --git a/src/test/test-cgroup.c b/src/test/test-cgroup.c index 3a3489d6a2..2a0ce27206 100644 --- a/src/test/test-cgroup.c +++ b/src/test/test-cgroup.c @@ -31,10 +31,10 @@ int main(int argc, char*argv[]) { char *path; char *c, *p; - assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, "/test-a", NULL) == 0); - assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, "/test-a", NULL) == 0); - assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, "/test-b", NULL) == 0); - assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, "/test-b/test-c", NULL) == 0); + assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, "/test-a") == 0); + assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, "/test-a") == 0); + assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, "/test-b") == 0); + assert_se(cg_create(SYSTEMD_CGROUP_CONTROLLER, "/test-b/test-c") == 0); assert_se(cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, "/test-b", 0) == 0); assert_se(cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, getpid(), &path) == 0); diff --git a/units/-.slice b/units/-.slice new file mode 100644 index 0000000000..ac82c35874 --- /dev/null +++ b/units/-.slice @@ -0,0 +1,12 @@ +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. + +[Unit] +Description=Root Slice +Documentation=man:systemd.special(7) +DefaultDependencies=no +Before=slices.target diff --git a/units/slices.target b/units/slices.target index cbfdba0055..a29310c047 100644 --- a/units/slices.target +++ b/units/slices.target @@ -8,5 +8,5 @@ [Unit] Description=Slices Documentation=man:systemd.special(7) -Wants=system.slice -After=system.slice +Wants=-.slice system.slice +After=-.slice system.slice diff --git a/units/system.slice b/units/system.slice index 8281fe58f6..c0e3df9d0f 100644 --- a/units/system.slice +++ b/units/system.slice @@ -10,3 +10,5 @@ Description=System Slice Documentation=man:systemd.special(7) DefaultDependencies=no Before=slices.target +Wants=-.slice +After=-.slice |