diff options
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/cgroup.c | 360 | ||||
-rw-r--r-- | src/core/cgroup.h | 30 | ||||
-rw-r--r-- | src/core/dbus-cgroup.c | 331 | ||||
-rw-r--r-- | src/core/dbus-execute.c | 56 | ||||
-rw-r--r-- | src/core/dbus-job.c | 2 | ||||
-rw-r--r-- | src/core/job.c | 24 | ||||
-rw-r--r-- | src/core/job.h | 2 | ||||
-rw-r--r-- | src/core/load-fragment-gperf.gperf.m4 | 8 | ||||
-rw-r--r-- | src/core/load-fragment.c | 232 | ||||
-rw-r--r-- | src/core/load-fragment.h | 3 | ||||
-rw-r--r-- | src/core/main.c | 3 | ||||
-rw-r--r-- | src/core/manager.c | 2 | ||||
-rw-r--r-- | src/core/manager.h | 1 | ||||
-rw-r--r-- | src/core/namespace.c | 12 | ||||
-rw-r--r-- | src/core/system.conf | 1 | ||||
-rw-r--r-- | src/core/transaction.c | 2 | ||||
-rw-r--r-- | src/core/unit.c | 18 |
17 files changed, 955 insertions, 132 deletions
diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 996efde22b..0fb63b1bd1 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -32,6 +32,7 @@ #include "special.h" #include "string-table.h" #include "string-util.h" +#include "stdio-util.h" #define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC) @@ -47,6 +48,9 @@ void cgroup_context_init(CGroupContext *c) { c->memory_limit = (uint64_t) -1; + c->io_weight = CGROUP_WEIGHT_INVALID; + c->startup_io_weight = CGROUP_WEIGHT_INVALID; + c->blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID; c->startup_blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID; @@ -62,6 +66,24 @@ void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) { free(a); } +void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight *w) { + assert(c); + assert(w); + + LIST_REMOVE(device_weights, c->io_device_weights, w); + free(w->path); + free(w); +} + +void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l) { + assert(c); + assert(l); + + LIST_REMOVE(device_limits, c->io_device_limits, l); + free(l->path); + free(l); +} + void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) { assert(c); assert(w); @@ -83,6 +105,12 @@ void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockI void cgroup_context_done(CGroupContext *c) { assert(c); + while (c->io_device_weights) + cgroup_context_free_io_device_weight(c, c->io_device_weights); + + while (c->io_device_limits) + cgroup_context_free_io_device_limit(c, c->io_device_limits); + while (c->blockio_device_weights) cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights); @@ -94,6 +122,8 @@ void cgroup_context_done(CGroupContext *c) { } void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { + CGroupIODeviceLimit *il; + CGroupIODeviceWeight *iw; CGroupBlockIODeviceBandwidth *b; CGroupBlockIODeviceWeight *w; CGroupDeviceAllow *a; @@ -106,12 +136,15 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { fprintf(f, "%sCPUAccounting=%s\n" + "%sIOAccounting=%s\n" "%sBlockIOAccounting=%s\n" "%sMemoryAccounting=%s\n" "%sTasksAccounting=%s\n" "%sCPUShares=%" PRIu64 "\n" "%sStartupCPUShares=%" PRIu64 "\n" "%sCPUQuotaPerSecSec=%s\n" + "%sIOWeight=%" PRIu64 "\n" + "%sStartupIOWeight=%" PRIu64 "\n" "%sBlockIOWeight=%" PRIu64 "\n" "%sStartupBlockIOWeight=%" PRIu64 "\n" "%sMemoryLimit=%" PRIu64 "\n" @@ -119,12 +152,15 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { "%sDevicePolicy=%s\n" "%sDelegate=%s\n", prefix, yes_no(c->cpu_accounting), + prefix, yes_no(c->io_accounting), prefix, yes_no(c->blockio_accounting), prefix, yes_no(c->memory_accounting), prefix, yes_no(c->tasks_accounting), prefix, c->cpu_shares, prefix, c->startup_cpu_shares, prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1), + prefix, c->io_weight, + prefix, c->startup_io_weight, prefix, c->blockio_weight, prefix, c->startup_blockio_weight, prefix, c->memory_limit, @@ -139,6 +175,27 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { a->path, a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : ""); + LIST_FOREACH(device_weights, iw, c->io_device_weights) + fprintf(f, + "%sIODeviceWeight=%s %" PRIu64, + prefix, + iw->path, + iw->weight); + + LIST_FOREACH(device_limits, il, c->io_device_limits) { + char buf[FORMAT_BYTES_MAX]; + CGroupIOLimitType type; + + for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++) + if (il->limits[type] != cgroup_io_limit_defaults[type]) + fprintf(f, + "%s%s=%s %s\n", + prefix, + cgroup_io_limit_type_to_string(type), + il->path, + format_bytes(buf, sizeof(buf), il->limits[type])); + } + LIST_FOREACH(device_weights, w, c->blockio_device_weights) fprintf(f, "%sBlockIODeviceWeight=%s %" PRIu64, @@ -149,16 +206,22 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) { char buf[FORMAT_BYTES_MAX]; - fprintf(f, - "%s%s=%s %s\n", - prefix, - b->read ? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth", - b->path, - format_bytes(buf, sizeof(buf), b->bandwidth)); + if (b->rbps != CGROUP_LIMIT_MAX) + fprintf(f, + "%sBlockIOReadBandwidth=%s %s\n", + prefix, + b->path, + format_bytes(buf, sizeof(buf), b->rbps)); + if (b->wbps != CGROUP_LIMIT_MAX) + fprintf(f, + "%sBlockIOWriteBandwidth=%s %s\n", + prefix, + b->path, + format_bytes(buf, sizeof(buf), b->wbps)); } } -static int lookup_blkio_device(const char *p, dev_t *dev) { +static int lookup_block_device(const char *p, dev_t *dev) { struct stat st; int r; @@ -295,6 +358,144 @@ fail: return -errno; } +static bool cgroup_context_has_io_config(CGroupContext *c) { + return c->io_accounting || + c->io_weight != CGROUP_WEIGHT_INVALID || + c->startup_io_weight != CGROUP_WEIGHT_INVALID || + c->io_device_weights || + c->io_device_limits; +} + +static bool cgroup_context_has_blockio_config(CGroupContext *c) { + return c->blockio_accounting || + c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID || + c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID || + c->blockio_device_weights || + c->blockio_device_bandwidths; +} + +static uint64_t cgroup_context_io_weight(CGroupContext *c, ManagerState state) { + if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && + c->startup_io_weight != CGROUP_WEIGHT_INVALID) + return c->startup_io_weight; + else if (c->io_weight != CGROUP_WEIGHT_INVALID) + return c->io_weight; + else + return CGROUP_WEIGHT_DEFAULT; +} + +static uint64_t cgroup_context_blkio_weight(CGroupContext *c, ManagerState state) { + if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && + c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID) + return c->startup_blockio_weight; + else if (c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID) + return c->blockio_weight; + else + return CGROUP_BLKIO_WEIGHT_DEFAULT; +} + +static uint64_t cgroup_weight_blkio_to_io(uint64_t blkio_weight) { + return CLAMP(blkio_weight * CGROUP_WEIGHT_DEFAULT / CGROUP_BLKIO_WEIGHT_DEFAULT, + CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX); +} + +static uint64_t cgroup_weight_io_to_blkio(uint64_t io_weight) { + return CLAMP(io_weight * CGROUP_BLKIO_WEIGHT_DEFAULT / CGROUP_WEIGHT_DEFAULT, + CGROUP_BLKIO_WEIGHT_MIN, CGROUP_BLKIO_WEIGHT_MAX); +} + +static void cgroup_apply_io_device_weight(const char *path, const char *dev_path, uint64_t io_weight) { + char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1]; + dev_t dev; + int r; + + r = lookup_block_device(dev_path, &dev); + if (r < 0) + return; + + xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), io_weight); + r = cg_set_attribute("io", path, "io.weight", buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set io.weight on %s: %m", path); +} + +static void cgroup_apply_blkio_device_weight(const char *path, const char *dev_path, uint64_t blkio_weight) { + char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1]; + dev_t dev; + int r; + + r = lookup_block_device(dev_path, &dev); + if (r < 0) + return; + + xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), blkio_weight); + r = cg_set_attribute("blkio", path, "blkio.weight_device", buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set blkio.weight_device on %s: %m", path); +} + +static unsigned cgroup_apply_io_device_limit(const char *path, const char *dev_path, uint64_t *limits) { + char limit_bufs[_CGROUP_IO_LIMIT_TYPE_MAX][DECIMAL_STR_MAX(uint64_t)]; + char buf[DECIMAL_STR_MAX(dev_t)*2+2+(6+DECIMAL_STR_MAX(uint64_t)+1)*4]; + CGroupIOLimitType type; + dev_t dev; + unsigned n = 0; + int r; + + r = lookup_block_device(dev_path, &dev); + if (r < 0) + return 0; + + for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++) { + if (limits[type] != cgroup_io_limit_defaults[type]) { + xsprintf(limit_bufs[type], "%" PRIu64, limits[type]); + n++; + } else { + xsprintf(limit_bufs[type], "%s", limits[type] == CGROUP_LIMIT_MAX ? "max" : "0"); + } + } + + xsprintf(buf, "%u:%u rbps=%s wbps=%s riops=%s wiops=%s\n", major(dev), minor(dev), + limit_bufs[CGROUP_IO_RBPS_MAX], limit_bufs[CGROUP_IO_WBPS_MAX], + limit_bufs[CGROUP_IO_RIOPS_MAX], limit_bufs[CGROUP_IO_WIOPS_MAX]); + r = cg_set_attribute("io", path, "io.max", buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set io.max on %s: %m", path); + return n; +} + +static unsigned cgroup_apply_blkio_device_limit(const char *path, const char *dev_path, uint64_t rbps, uint64_t wbps) { + char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1]; + dev_t dev; + unsigned n = 0; + int r; + + r = lookup_block_device(dev_path, &dev); + if (r < 0) + return 0; + + if (rbps != CGROUP_LIMIT_MAX) + n++; + sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), rbps); + r = cg_set_attribute("blkio", path, "blkio.throttle.read_bps_device", buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set blkio.throttle.read_bps_device on %s: %m", path); + + if (wbps != CGROUP_LIMIT_MAX) + n++; + sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), wbps); + r = cg_set_attribute("blkio", path, "blkio.throttle.write_bps_device", buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set blkio.throttle.write_bps_device on %s: %m", path); + + return n; +} + void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, ManagerState state) { bool is_root; int r; @@ -343,53 +544,120 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M "Failed to set cpu.cfs_quota_us on %s: %m", path); } - if (mask & CGROUP_MASK_BLKIO) { - char buf[MAX(DECIMAL_STR_MAX(uint64_t)+1, - DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)]; - CGroupBlockIODeviceWeight *w; - CGroupBlockIODeviceBandwidth *b; + if (mask & CGROUP_MASK_IO) { + bool has_io = cgroup_context_has_io_config(c); + bool has_blockio = cgroup_context_has_blockio_config(c); if (!is_root) { - sprintf(buf, "%" PRIu64 "\n", - IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ? c->startup_blockio_weight : - c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ? c->blockio_weight : CGROUP_BLKIO_WEIGHT_DEFAULT); - r = cg_set_attribute("blkio", path, "blkio.weight", buf); + char buf[8+DECIMAL_STR_MAX(uint64_t)+1]; + uint64_t weight; + + if (has_io) + weight = cgroup_context_io_weight(c, state); + else if (has_blockio) + weight = cgroup_weight_blkio_to_io(cgroup_context_blkio_weight(c, state)); + else + weight = CGROUP_WEIGHT_DEFAULT; + + xsprintf(buf, "default %" PRIu64 "\n", weight); + r = cg_set_attribute("io", path, "io.weight", buf); if (r < 0) log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set blkio.weight on %s: %m", path); + "Failed to set io.weight on %s: %m", path); - /* FIXME: no way to reset this list */ - LIST_FOREACH(device_weights, w, c->blockio_device_weights) { - dev_t dev; + if (has_io) { + CGroupIODeviceWeight *w; - r = lookup_blkio_device(w->path, &dev); - if (r < 0) - continue; + /* FIXME: no way to reset this list */ + LIST_FOREACH(device_weights, w, c->io_device_weights) + cgroup_apply_io_device_weight(path, w->path, w->weight); + } else if (has_blockio) { + CGroupBlockIODeviceWeight *w; - sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), w->weight); - r = cg_set_attribute("blkio", path, "blkio.weight_device", buf); - if (r < 0) - log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set blkio.weight_device on %s: %m", path); + /* FIXME: no way to reset this list */ + LIST_FOREACH(device_weights, w, c->blockio_device_weights) + cgroup_apply_io_device_weight(path, w->path, cgroup_weight_blkio_to_io(w->weight)); } } - /* FIXME: no way to reset this list */ - LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) { - const char *a; - dev_t dev; + /* Apply limits and free ones without config. */ + if (has_io) { + CGroupIODeviceLimit *l, *next; - r = lookup_blkio_device(b->path, &dev); - if (r < 0) - continue; + LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) { + if (!cgroup_apply_io_device_limit(path, l->path, l->limits)) + cgroup_context_free_io_device_limit(c, l); + } + } else if (has_blockio) { + CGroupBlockIODeviceBandwidth *b, *next; + + LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) { + uint64_t limits[_CGROUP_IO_LIMIT_TYPE_MAX]; + CGroupIOLimitType type; + + for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++) + limits[type] = cgroup_io_limit_defaults[type]; + + limits[CGROUP_IO_RBPS_MAX] = b->rbps; + limits[CGROUP_IO_WBPS_MAX] = b->wbps; + + if (!cgroup_apply_io_device_limit(path, b->path, limits)) + cgroup_context_free_blockio_device_bandwidth(c, b); + } + } + } + + if (mask & CGROUP_MASK_BLKIO) { + bool has_io = cgroup_context_has_io_config(c); + bool has_blockio = cgroup_context_has_blockio_config(c); + + if (!is_root) { + char buf[DECIMAL_STR_MAX(uint64_t)+1]; + uint64_t weight; - a = b->read ? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device"; + if (has_blockio) + weight = cgroup_context_blkio_weight(c, state); + else if (has_io) + weight = cgroup_weight_io_to_blkio(cgroup_context_io_weight(c, state)); + else + weight = CGROUP_BLKIO_WEIGHT_DEFAULT; - sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), b->bandwidth); - r = cg_set_attribute("blkio", path, a, buf); + xsprintf(buf, "%" PRIu64 "\n", weight); + r = cg_set_attribute("blkio", path, "blkio.weight", buf); if (r < 0) log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set %s on %s: %m", a, path); + "Failed to set blkio.weight on %s: %m", path); + + if (has_blockio) { + CGroupBlockIODeviceWeight *w; + + /* FIXME: no way to reset this list */ + LIST_FOREACH(device_weights, w, c->blockio_device_weights) + cgroup_apply_blkio_device_weight(path, w->path, w->weight); + } else if (has_io) { + CGroupIODeviceWeight *w; + + /* FIXME: no way to reset this list */ + LIST_FOREACH(device_weights, w, c->io_device_weights) + cgroup_apply_blkio_device_weight(path, w->path, cgroup_weight_io_to_blkio(w->weight)); + } + } + + /* Apply limits and free ones without config. */ + if (has_blockio) { + CGroupBlockIODeviceBandwidth *b, *next; + + LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) { + if (!cgroup_apply_blkio_device_limit(path, b->path, b->rbps, b->wbps)) + cgroup_context_free_blockio_device_bandwidth(c, b); + } + } else if (has_io) { + CGroupIODeviceLimit *l, *next; + + LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) { + if (!cgroup_apply_blkio_device_limit(path, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX])) + cgroup_context_free_io_device_limit(c, l); + } } } @@ -506,12 +774,8 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) { c->cpu_quota_per_sec_usec != USEC_INFINITY) mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU; - if (c->blockio_accounting || - c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID || - c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID || - c->blockio_device_weights || - c->blockio_device_bandwidths) - mask |= CGROUP_MASK_BLKIO; + if (cgroup_context_has_io_config(c) || cgroup_context_has_blockio_config(c)) + mask |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO; if (c->memory_accounting || c->memory_limit != (uint64_t) -1) @@ -1601,6 +1865,10 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) { if (m == 0) return; + /* always invalidate compat pairs together */ + if (m & (CGROUP_MASK_IO | CGROUP_MASK_BLKIO)) + m |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO; + if ((u->cgroup_realized_mask & m) == 0) return; @@ -1615,7 +1883,7 @@ void manager_invalidate_startup_units(Manager *m) { assert(m); SET_FOREACH(u, m->startup_units, i) - unit_invalidate_cgroup(u, CGROUP_MASK_CPU|CGROUP_MASK_BLKIO); + unit_invalidate_cgroup(u, CGROUP_MASK_CPU|CGROUP_MASK_IO|CGROUP_MASK_BLKIO); } static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = { diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 360bbca30f..2b1edbafc4 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -23,9 +23,12 @@ #include "list.h" #include "time-util.h" +#include "cgroup-util.h" typedef struct CGroupContext CGroupContext; typedef struct CGroupDeviceAllow CGroupDeviceAllow; +typedef struct CGroupIODeviceWeight CGroupIODeviceWeight; +typedef struct CGroupIODeviceLimit CGroupIODeviceLimit; typedef struct CGroupBlockIODeviceWeight CGroupBlockIODeviceWeight; typedef struct CGroupBlockIODeviceBandwidth CGroupBlockIODeviceBandwidth; @@ -53,6 +56,18 @@ struct CGroupDeviceAllow { bool m:1; }; +struct CGroupIODeviceWeight { + LIST_FIELDS(CGroupIODeviceWeight, device_weights); + char *path; + uint64_t weight; +}; + +struct CGroupIODeviceLimit { + LIST_FIELDS(CGroupIODeviceLimit, device_limits); + char *path; + uint64_t limits[_CGROUP_IO_LIMIT_TYPE_MAX]; +}; + struct CGroupBlockIODeviceWeight { LIST_FIELDS(CGroupBlockIODeviceWeight, device_weights); char *path; @@ -62,16 +77,24 @@ struct CGroupBlockIODeviceWeight { struct CGroupBlockIODeviceBandwidth { LIST_FIELDS(CGroupBlockIODeviceBandwidth, device_bandwidths); char *path; - uint64_t bandwidth; - bool read; + uint64_t rbps; + uint64_t wbps; }; struct CGroupContext { bool cpu_accounting; + bool io_accounting; bool blockio_accounting; bool memory_accounting; bool tasks_accounting; + /* For unified hierarchy */ + uint64_t io_weight; + uint64_t startup_io_weight; + LIST_HEAD(CGroupIODeviceWeight, io_device_weights); + LIST_HEAD(CGroupIODeviceLimit, io_device_limits); + + /* For legacy hierarchies */ uint64_t cpu_shares; uint64_t startup_cpu_shares; usec_t cpu_quota_per_sec_usec; @@ -86,6 +109,7 @@ struct CGroupContext { CGroupDevicePolicy device_policy; LIST_HEAD(CGroupDeviceAllow, device_allow); + /* Common */ uint64_t tasks_max; bool delegate; @@ -102,6 +126,8 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M CGroupMask cgroup_context_get_mask(CGroupContext *c); void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a); +void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight *w); +void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l); void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w); void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b); diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index 859d155ec1..eef1c47c14 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -28,6 +28,72 @@ static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_cgroup_device_policy, cgroup_device_policy, CGroupDevicePolicy); +static int property_get_io_device_weight( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + CGroupContext *c = userdata; + CGroupIODeviceWeight *w; + int r; + + assert(bus); + assert(reply); + assert(c); + + r = sd_bus_message_open_container(reply, 'a', "(st)"); + if (r < 0) + return r; + + LIST_FOREACH(device_weights, w, c->io_device_weights) { + r = sd_bus_message_append(reply, "(st)", w->path, w->weight); + if (r < 0) + return r; + } + + return sd_bus_message_close_container(reply); +} + +static int property_get_io_device_limits( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + CGroupContext *c = userdata; + CGroupIODeviceLimit *l; + int r; + + assert(bus); + assert(reply); + assert(c); + + r = sd_bus_message_open_container(reply, 'a', "(st)"); + if (r < 0) + return r; + + LIST_FOREACH(device_limits, l, c->io_device_limits) { + CGroupIOLimitType type; + + type = cgroup_io_limit_type_from_string(property); + if (type < 0 || l->limits[type] == cgroup_io_limit_defaults[type]) + continue; + + r = sd_bus_message_append(reply, "(st)", l->path, l->limits[type]); + if (r < 0) + return r; + } + + return sd_bus_message_close_container(reply); +} + static int property_get_blockio_device_weight( sd_bus *bus, const char *path, @@ -80,11 +146,17 @@ static int property_get_blockio_device_bandwidths( return r; LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) { + uint64_t v; - if (streq(property, "BlockIOReadBandwidth") != b->read) + if (streq(property, "BlockIOReadBandwidth")) + v = b->rbps; + else + v = b->wbps; + + if (v == CGROUP_LIMIT_MAX) continue; - r = sd_bus_message_append(reply, "(st)", b->path, b->bandwidth); + r = sd_bus_message_append(reply, "(st)", b->path, v); if (r < 0) return r; } @@ -141,6 +213,14 @@ const sd_bus_vtable bus_cgroup_vtable[] = { SD_BUS_PROPERTY("CPUShares", "t", NULL, offsetof(CGroupContext, cpu_shares), 0), SD_BUS_PROPERTY("StartupCPUShares", "t", NULL, offsetof(CGroupContext, startup_cpu_shares), 0), SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_per_sec_usec), 0), + SD_BUS_PROPERTY("IOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, io_accounting), 0), + SD_BUS_PROPERTY("IOWeight", "t", NULL, offsetof(CGroupContext, io_weight), 0), + SD_BUS_PROPERTY("StartupIOWeight", "t", NULL, offsetof(CGroupContext, startup_io_weight), 0), + SD_BUS_PROPERTY("IODeviceWeight", "a(st)", property_get_io_device_weight, 0, 0), + SD_BUS_PROPERTY("IOReadBandwidthMax", "a(st)", property_get_io_device_limits, 0, 0), + SD_BUS_PROPERTY("IOWriteBandwidthMax", "a(st)", property_get_io_device_limits, 0, 0), + SD_BUS_PROPERTY("IOReadIOPSMax", "a(st)", property_get_io_device_limits, 0, 0), + SD_BUS_PROPERTY("IOWriteIOPSMax", "a(st)", property_get_io_device_limits, 0, 0), SD_BUS_PROPERTY("BlockIOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, blockio_accounting), 0), SD_BUS_PROPERTY("BlockIOWeight", "t", NULL, offsetof(CGroupContext, blockio_weight), 0), SD_BUS_PROPERTY("StartupBlockIOWeight", "t", NULL, offsetof(CGroupContext, startup_blockio_weight), 0), @@ -197,6 +277,7 @@ int bus_cgroup_set_property( UnitSetPropertiesMode mode, sd_bus_error *error) { + CGroupIOLimitType iol_type; int r; assert(u); @@ -281,6 +362,223 @@ int bus_cgroup_set_property( return 1; + } else if (streq(name, "IOAccounting")) { + int b; + + r = sd_bus_message_read(message, "b", &b); + if (r < 0) + return r; + + if (mode != UNIT_CHECK) { + c->io_accounting = b; + unit_invalidate_cgroup(u, CGROUP_MASK_IO); + unit_write_drop_in_private(u, mode, name, b ? "IOAccounting=yes" : "IOAccounting=no"); + } + + return 1; + + } else if (streq(name, "IOWeight")) { + uint64_t weight; + + r = sd_bus_message_read(message, "t", &weight); + if (r < 0) + return r; + + if (!CGROUP_WEIGHT_IS_OK(weight)) + return sd_bus_error_set_errnof(error, EINVAL, "IOWeight value out of range"); + + if (mode != UNIT_CHECK) { + c->io_weight = weight; + unit_invalidate_cgroup(u, CGROUP_MASK_IO); + + if (weight == CGROUP_WEIGHT_INVALID) + unit_write_drop_in_private(u, mode, name, "IOWeight="); + else + unit_write_drop_in_private_format(u, mode, name, "IOWeight=%" PRIu64, weight); + } + + return 1; + + } else if (streq(name, "StartupIOWeight")) { + uint64_t weight; + + r = sd_bus_message_read(message, "t", &weight); + if (r < 0) + return r; + + if (CGROUP_WEIGHT_IS_OK(weight)) + return sd_bus_error_set_errnof(error, EINVAL, "StartupIOWeight value out of range"); + + if (mode != UNIT_CHECK) { + c->startup_io_weight = weight; + unit_invalidate_cgroup(u, CGROUP_MASK_IO); + + if (weight == CGROUP_WEIGHT_INVALID) + unit_write_drop_in_private(u, mode, name, "StartupIOWeight="); + else + unit_write_drop_in_private_format(u, mode, name, "StartupIOWeight=%" PRIu64, weight); + } + + return 1; + + } else if ((iol_type = cgroup_io_limit_type_from_string(name)) >= 0) { + const char *path; + unsigned n = 0; + uint64_t u64; + + r = sd_bus_message_enter_container(message, 'a', "(st)"); + if (r < 0) + return r; + + while ((r = sd_bus_message_read(message, "(st)", &path, &u64)) > 0) { + + if (mode != UNIT_CHECK) { + CGroupIODeviceLimit *a = NULL, *b; + + LIST_FOREACH(device_limits, b, c->io_device_limits) { + if (path_equal(path, b->path)) { + a = b; + break; + } + } + + if (!a) { + CGroupIOLimitType type; + + a = new0(CGroupIODeviceLimit, 1); + if (!a) + return -ENOMEM; + + a->path = strdup(path); + if (!a->path) { + free(a); + return -ENOMEM; + } + + for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++) + a->limits[type] = cgroup_io_limit_defaults[type]; + + LIST_PREPEND(device_limits, c->io_device_limits, a); + } + + a->limits[iol_type] = u64; + } + + n++; + } + if (r < 0) + return r; + + r = sd_bus_message_exit_container(message); + if (r < 0) + return r; + + if (mode != UNIT_CHECK) { + CGroupIODeviceLimit *a; + _cleanup_free_ char *buf = NULL; + _cleanup_fclose_ FILE *f = NULL; + size_t size = 0; + + if (n == 0) { + LIST_FOREACH(device_limits, a, c->io_device_limits) + a->limits[iol_type] = cgroup_io_limit_defaults[iol_type]; + } + + unit_invalidate_cgroup(u, CGROUP_MASK_IO); + + f = open_memstream(&buf, &size); + if (!f) + return -ENOMEM; + + fprintf(f, "%s=\n", name); + LIST_FOREACH(device_limits, a, c->io_device_limits) + if (a->limits[iol_type] != cgroup_io_limit_defaults[iol_type]) + fprintf(f, "%s=%s %" PRIu64 "\n", name, a->path, a->limits[iol_type]); + + r = fflush_and_check(f); + if (r < 0) + return r; + unit_write_drop_in_private(u, mode, name, buf); + } + + return 1; + + } else if (streq(name, "IODeviceWeight")) { + const char *path; + uint64_t weight; + unsigned n = 0; + + r = sd_bus_message_enter_container(message, 'a', "(st)"); + if (r < 0) + return r; + + while ((r = sd_bus_message_read(message, "(st)", &path, &weight)) > 0) { + + if (!CGROUP_WEIGHT_IS_OK(weight) || weight == CGROUP_WEIGHT_INVALID) + return sd_bus_error_set_errnof(error, EINVAL, "IODeviceWeight out of range"); + + if (mode != UNIT_CHECK) { + CGroupIODeviceWeight *a = NULL, *b; + + LIST_FOREACH(device_weights, b, c->io_device_weights) { + if (path_equal(b->path, path)) { + a = b; + break; + } + } + + if (!a) { + a = new0(CGroupIODeviceWeight, 1); + if (!a) + return -ENOMEM; + + a->path = strdup(path); + if (!a->path) { + free(a); + return -ENOMEM; + } + LIST_PREPEND(device_weights,c->io_device_weights, a); + } + + a->weight = weight; + } + + n++; + } + + r = sd_bus_message_exit_container(message); + if (r < 0) + return r; + + if (mode != UNIT_CHECK) { + _cleanup_free_ char *buf = NULL; + _cleanup_fclose_ FILE *f = NULL; + CGroupIODeviceWeight *a; + size_t size = 0; + + if (n == 0) { + while (c->io_device_weights) + cgroup_context_free_io_device_weight(c, c->io_device_weights); + } + + unit_invalidate_cgroup(u, CGROUP_MASK_IO); + + f = open_memstream(&buf, &size); + if (!f) + return -ENOMEM; + + fputs("IODeviceWeight=\n", f); + LIST_FOREACH(device_weights, a, c->io_device_weights) + fprintf(f, "IODeviceWeight=%s %" PRIu64 "\n", a->path, a->weight); + + r = fflush_and_check(f); + if (r < 0) + return r; + unit_write_drop_in_private(u, mode, name, buf); + } + + return 1; + } else if (streq(name, "BlockIOAccounting")) { int b; @@ -359,7 +657,7 @@ int bus_cgroup_set_property( CGroupBlockIODeviceBandwidth *a = NULL, *b; LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) { - if (path_equal(path, b->path) && read == b->read) { + if (path_equal(path, b->path)) { a = b; break; } @@ -370,7 +668,8 @@ int bus_cgroup_set_property( if (!a) return -ENOMEM; - a->read = read; + a->rbps = CGROUP_LIMIT_MAX; + a->wbps = CGROUP_LIMIT_MAX; a->path = strdup(path); if (!a->path) { free(a); @@ -380,7 +679,10 @@ int bus_cgroup_set_property( LIST_PREPEND(device_bandwidths, c->blockio_device_bandwidths, a); } - a->bandwidth = u64; + if (read) + a->rbps = u64; + else + a->wbps = u64; } n++; @@ -393,15 +695,18 @@ int bus_cgroup_set_property( return r; if (mode != UNIT_CHECK) { - CGroupBlockIODeviceBandwidth *a, *next; + CGroupBlockIODeviceBandwidth *a; _cleanup_free_ char *buf = NULL; _cleanup_fclose_ FILE *f = NULL; size_t size = 0; if (n == 0) { - LIST_FOREACH_SAFE(device_bandwidths, a, next, c->blockio_device_bandwidths) - if (a->read == read) - cgroup_context_free_blockio_device_bandwidth(c, a); + LIST_FOREACH(device_bandwidths, a, c->blockio_device_bandwidths) { + if (read) + a->rbps = CGROUP_LIMIT_MAX; + else + a->wbps = CGROUP_LIMIT_MAX; + } } unit_invalidate_cgroup(u, CGROUP_MASK_BLKIO); @@ -413,13 +718,13 @@ int bus_cgroup_set_property( if (read) { fputs("BlockIOReadBandwidth=\n", f); LIST_FOREACH(device_bandwidths, a, c->blockio_device_bandwidths) - if (a->read) - fprintf(f, "BlockIOReadBandwidth=%s %" PRIu64 "\n", a->path, a->bandwidth); + if (a->rbps != CGROUP_LIMIT_MAX) + fprintf(f, "BlockIOReadBandwidth=%s %" PRIu64 "\n", a->path, a->rbps); } else { fputs("BlockIOWriteBandwidth=\n", f); LIST_FOREACH(device_bandwidths, a, c->blockio_device_bandwidths) - if (!a->read) - fprintf(f, "BlockIOWriteBandwidth=%s %" PRIu64 "\n", a->path, a->bandwidth); + if (a->wbps != CGROUP_LIMIT_MAX) + fprintf(f, "BlockIOWriteBandwidth=%s %" PRIu64 "\n", a->path, a->wbps); } r = fflush_and_check(f); diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 9dfca14914..06943c6365 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -839,16 +839,8 @@ int bus_exec_context_set_transient_property( if (isempty(uu)) c->user = mfree(c->user); - else { - char *t; - - t = strdup(uu); - if (!t) - return -ENOMEM; - - free(c->user); - c->user = t; - } + else if (free_and_strdup(&c->user, uu) < 0) + return -ENOMEM; unit_write_drop_in_private_format(u, mode, name, "User=%s\n", uu); } @@ -866,16 +858,8 @@ int bus_exec_context_set_transient_property( if (isempty(gg)) c->group = mfree(c->group); - else { - char *t; - - t = strdup(gg); - if (!t) - return -ENOMEM; - - free(c->group); - c->group = t; - } + else if (free_and_strdup(&c->group, gg) < 0) + return -ENOMEM; unit_write_drop_in_private_format(u, mode, name, "Group=%s\n", gg); } @@ -890,18 +874,10 @@ int bus_exec_context_set_transient_property( if (mode != UNIT_CHECK) { - if (isempty(id)) { + if (isempty(id)) c->syslog_identifier = mfree(c->syslog_identifier); - } else { - char *t; - - t = strdup(id); - if (!t) - return -ENOMEM; - - free(c->syslog_identifier); - c->syslog_identifier = t; - } + else if (free_and_strdup(&c->syslog_identifier, id) < 0) + return -ENOMEM; unit_write_drop_in_private_format(u, mode, name, "SyslogIdentifier=%s\n", id); } @@ -1478,6 +1454,24 @@ int bus_exec_context_set_transient_property( return 1; + } else if (streq(name, "SELinuxContext")) { + const char *s; + + r = sd_bus_message_read(message, "s", &s); + if (r < 0) + return r; + + if (mode != UNIT_CHECK) { + if (isempty(s)) + c->selinux_context = mfree(c->selinux_context); + else if (free_and_strdup(&c->selinux_context, s) < 0) + return -ENOMEM; + + unit_write_drop_in_private_format(u, mode, name, "%s=%s\n", name, strempty(s)); + } + + return 1; + } ri = rlimit_from_string(name); diff --git a/src/core/dbus-job.c b/src/core/dbus-job.c index 97a93fb2f1..ccf7453d47 100644 --- a/src/core/dbus-job.c +++ b/src/core/dbus-job.c @@ -75,7 +75,7 @@ int bus_job_method_cancel(sd_bus_message *message, void *userdata, sd_bus_error return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */ } - job_finish_and_invalidate(j, JOB_CANCELED, true); + job_finish_and_invalidate(j, JOB_CANCELED, true, false); return sd_bus_reply_method_return(message, NULL); } diff --git a/src/core/job.c b/src/core/job.c index d9c5669c9f..7557874d4d 100644 --- a/src/core/job.c +++ b/src/core/job.c @@ -191,7 +191,7 @@ Job* job_install(Job *j) { if (uj) { if (job_type_is_conflicting(uj->type, j->type)) - job_finish_and_invalidate(uj, JOB_CANCELED, false); + job_finish_and_invalidate(uj, JOB_CANCELED, false, false); else { /* not conflicting, i.e. mergeable */ @@ -614,19 +614,19 @@ int job_run_and_invalidate(Job *j) { if (j) { if (r == -EALREADY) - r = job_finish_and_invalidate(j, JOB_DONE, true); + r = job_finish_and_invalidate(j, JOB_DONE, true, true); else if (r == -EBADR) - r = job_finish_and_invalidate(j, JOB_SKIPPED, true); + r = job_finish_and_invalidate(j, JOB_SKIPPED, true, false); else if (r == -ENOEXEC) - r = job_finish_and_invalidate(j, JOB_INVALID, true); + r = job_finish_and_invalidate(j, JOB_INVALID, true, false); else if (r == -EPROTO) - r = job_finish_and_invalidate(j, JOB_ASSERT, true); + r = job_finish_and_invalidate(j, JOB_ASSERT, true, false); else if (r == -EOPNOTSUPP) - r = job_finish_and_invalidate(j, JOB_UNSUPPORTED, true); + r = job_finish_and_invalidate(j, JOB_UNSUPPORTED, true, false); else if (r == -EAGAIN) job_set_state(j, JOB_WAITING); else if (r < 0) - r = job_finish_and_invalidate(j, JOB_FAILED, true); + r = job_finish_and_invalidate(j, JOB_FAILED, true, false); } return r; @@ -827,11 +827,11 @@ static void job_fail_dependencies(Unit *u, UnitDependency d) { if (!IN_SET(j->type, JOB_START, JOB_VERIFY_ACTIVE)) continue; - job_finish_and_invalidate(j, JOB_DEPENDENCY, true); + job_finish_and_invalidate(j, JOB_DEPENDENCY, true, false); } } -int job_finish_and_invalidate(Job *j, JobResult result, bool recursive) { +int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool already) { Unit *u; Unit *other; JobType t; @@ -848,7 +848,9 @@ int job_finish_and_invalidate(Job *j, JobResult result, bool recursive) { log_unit_debug(u, "Job %s/%s finished, result=%s", u->id, job_type_to_string(t), job_result_to_string(result)); - job_emit_status_message(u, t, result); + /* If this job did nothing to respective unit we don't log the status message */ + if (!already) + job_emit_status_message(u, t, result); job_add_to_dbus_queue(j); @@ -923,7 +925,7 @@ static int job_dispatch_timer(sd_event_source *s, uint64_t monotonic, void *user log_unit_warning(j->unit, "Job %s/%s timed out.", j->unit->id, job_type_to_string(j->type)); u = j->unit; - job_finish_and_invalidate(j, JOB_TIMEOUT, true); + job_finish_and_invalidate(j, JOB_TIMEOUT, true, false); failure_action(u->manager, u->job_timeout_action, u->job_timeout_reboot_arg); diff --git a/src/core/job.h b/src/core/job.h index 856b0ce829..d359e8bb3e 100644 --- a/src/core/job.h +++ b/src/core/job.h @@ -219,7 +219,7 @@ void job_add_to_dbus_queue(Job *j); int job_start_timer(Job *j); int job_run_and_invalidate(Job *j); -int job_finish_and_invalidate(Job *j, JobResult result, bool recursive); +int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool already); char *job_dbus_path(Job *j); diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 928b913c7b..8193418980 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -120,6 +120,14 @@ $1.MemoryAccounting, config_parse_bool, 0, $1.MemoryLimit, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.DeviceAllow, config_parse_device_allow, 0, offsetof($1, cgroup_context) $1.DevicePolicy, config_parse_device_policy, 0, offsetof($1, cgroup_context.device_policy) +$1.IOAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.io_accounting) +$1.IOWeight, config_parse_io_weight, 0, offsetof($1, cgroup_context.io_weight) +$1.StartupIOWeight, config_parse_io_weight, 0, offsetof($1, cgroup_context.startup_io_weight) +$1.IODeviceWeight, config_parse_io_device_weight, 0, offsetof($1, cgroup_context) +$1.IOReadBandwidthMax, config_parse_io_limit, 0, offsetof($1, cgroup_context) +$1.IOWriteBandwidthMax, config_parse_io_limit, 0, offsetof($1, cgroup_context) +$1.IOReadIOPSMax, config_parse_io_limit, 0, offsetof($1, cgroup_context) +$1.IOWriteIOPSMax, config_parse_io_limit, 0, offsetof($1, cgroup_context) $1.BlockIOAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.blockio_accounting) $1.BlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.blockio_weight) $1.StartupBlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.startup_blockio_weight) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index a12dd38c60..86b4fb071b 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -2910,6 +2910,193 @@ int config_parse_device_allow( return 0; } +int config_parse_io_weight( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + uint64_t *weight = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + r = cg_weight_parse(rvalue, weight); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "IO weight '%s' invalid. Ignoring.", rvalue); + return 0; + } + + return 0; +} + +int config_parse_io_device_weight( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_free_ char *path = NULL; + CGroupIODeviceWeight *w; + CGroupContext *c = data; + const char *weight; + uint64_t u; + size_t n; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + if (isempty(rvalue)) { + while (c->io_device_weights) + cgroup_context_free_io_device_weight(c, c->io_device_weights); + + return 0; + } + + n = strcspn(rvalue, WHITESPACE); + weight = rvalue + n; + weight += strspn(weight, WHITESPACE); + + if (isempty(weight)) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Expected block device and device weight. Ignoring."); + return 0; + } + + path = strndup(rvalue, n); + if (!path) + return log_oom(); + + if (!path_startswith(path, "/dev")) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid device node path '%s'. Ignoring.", path); + return 0; + } + + r = cg_weight_parse(weight, &u); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "IO weight '%s' invalid. Ignoring.", weight); + return 0; + } + + assert(u != CGROUP_WEIGHT_INVALID); + + w = new0(CGroupIODeviceWeight, 1); + if (!w) + return log_oom(); + + w->path = path; + path = NULL; + + w->weight = u; + + LIST_PREPEND(device_weights, c->io_device_weights, w); + return 0; +} + +int config_parse_io_limit( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_free_ char *path = NULL; + CGroupIODeviceLimit *l = NULL, *t; + CGroupContext *c = data; + CGroupIOLimitType type; + const char *limit; + uint64_t num; + size_t n; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + type = cgroup_io_limit_type_from_string(lvalue); + assert(type >= 0); + + if (isempty(rvalue)) { + LIST_FOREACH(device_limits, l, c->io_device_limits) + l->limits[type] = cgroup_io_limit_defaults[type]; + return 0; + } + + n = strcspn(rvalue, WHITESPACE); + limit = rvalue + n; + limit += strspn(limit, WHITESPACE); + + if (!*limit) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Expected space separated pair of device node and bandwidth. Ignoring."); + return 0; + } + + path = strndup(rvalue, n); + if (!path) + return log_oom(); + + if (!path_startswith(path, "/dev")) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Invalid device node path '%s'. Ignoring.", path); + return 0; + } + + if (streq("max", limit)) { + num = CGROUP_LIMIT_MAX; + } else { + r = parse_size(limit, 1000, &num); + if (r < 0 || num <= 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "IO Limit '%s' invalid. Ignoring.", rvalue); + return 0; + } + } + + LIST_FOREACH(device_limits, t, c->io_device_limits) { + if (path_equal(path, t->path)) { + l = t; + break; + } + } + + if (!l) { + CGroupIOLimitType ttype; + + l = new0(CGroupIODeviceLimit, 1); + if (!l) + return log_oom(); + + l->path = path; + path = NULL; + for (ttype = 0; ttype < _CGROUP_IO_LIMIT_TYPE_MAX; ttype++) + l->limits[ttype] = cgroup_io_limit_defaults[ttype]; + + LIST_PREPEND(device_limits, c->io_device_limits, l); + } + + l->limits[type] = num; + + return 0; +} + int config_parse_blockio_weight( const char *unit, const char *filename, @@ -3021,7 +3208,7 @@ int config_parse_blockio_bandwidth( void *userdata) { _cleanup_free_ char *path = NULL; - CGroupBlockIODeviceBandwidth *b; + CGroupBlockIODeviceBandwidth *b = NULL, *t; CGroupContext *c = data; const char *bandwidth; uint64_t bytes; @@ -3036,12 +3223,10 @@ int config_parse_blockio_bandwidth( read = streq("BlockIOReadBandwidth", lvalue); if (isempty(rvalue)) { - CGroupBlockIODeviceBandwidth *next; - - LIST_FOREACH_SAFE (device_bandwidths, b, next, c->blockio_device_bandwidths) - if (b->read == read) - cgroup_context_free_blockio_device_bandwidth(c, b); - + LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) { + b->rbps = CGROUP_LIMIT_MAX; + b->wbps = CGROUP_LIMIT_MAX; + } return 0; } @@ -3069,16 +3254,30 @@ int config_parse_blockio_bandwidth( return 0; } - b = new0(CGroupBlockIODeviceBandwidth, 1); - if (!b) - return log_oom(); + LIST_FOREACH(device_bandwidths, t, c->blockio_device_bandwidths) { + if (path_equal(path, t->path)) { + b = t; + break; + } + } - b->path = path; - path = NULL; - b->bandwidth = bytes; - b->read = read; + if (!t) { + b = new0(CGroupBlockIODeviceBandwidth, 1); + if (!b) + return log_oom(); - LIST_PREPEND(device_bandwidths, c->blockio_device_bandwidths, b); + b->path = path; + path = NULL; + b->rbps = CGROUP_LIMIT_MAX; + b->wbps = CGROUP_LIMIT_MAX; + + LIST_PREPEND(device_bandwidths, c->blockio_device_bandwidths, b); + } + + if (read) + b->rbps = bytes; + else + b->wbps = bytes; return 0; } @@ -3830,6 +4029,9 @@ void unit_dump_config_items(FILE *f) { { config_parse_memory_limit, "LIMIT" }, { config_parse_device_allow, "DEVICE" }, { config_parse_device_policy, "POLICY" }, + { config_parse_io_limit, "LIMIT" }, + { config_parse_io_weight, "WEIGHT" }, + { config_parse_io_device_weight, "DEVICEWEIGHT" }, { config_parse_blockio_bandwidth, "BANDWIDTH" }, { config_parse_blockio_weight, "WEIGHT" }, { config_parse_blockio_device_weight, "DEVICEWEIGHT" }, diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index 34f15afa62..b36a2e3a02 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -86,6 +86,9 @@ int config_parse_memory_limit(const char *unit, const char *filename, unsigned l int config_parse_tasks_max(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_device_policy(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_device_allow(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_io_weight(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_io_device_weight(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_io_limit(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_blockio_weight(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_blockio_device_weight(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_blockio_bandwidth(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); diff --git a/src/core/main.c b/src/core/main.c index ed4d42c8cc..6397aadc73 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -122,6 +122,7 @@ static usec_t arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE; static Set* arg_syscall_archs = NULL; static FILE* arg_serialization = NULL; static bool arg_default_cpu_accounting = false; +static bool arg_default_io_accounting = false; static bool arg_default_blockio_accounting = false; static bool arg_default_memory_accounting = false; static bool arg_default_tasks_accounting = true; @@ -691,6 +692,7 @@ static int parse_config_file(void) { { "Manager", "DefaultLimitRTPRIO", config_parse_limit, RLIMIT_RTPRIO, arg_default_rlimit }, { "Manager", "DefaultLimitRTTIME", config_parse_limit, RLIMIT_RTTIME, arg_default_rlimit }, { "Manager", "DefaultCPUAccounting", config_parse_bool, 0, &arg_default_cpu_accounting }, + { "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting }, { "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting }, { "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting }, { "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting }, @@ -733,6 +735,7 @@ static void manager_set_defaults(Manager *m) { m->default_start_limit_interval = arg_default_start_limit_interval; m->default_start_limit_burst = arg_default_start_limit_burst; m->default_cpu_accounting = arg_default_cpu_accounting; + m->default_io_accounting = arg_default_io_accounting; m->default_blockio_accounting = arg_default_blockio_accounting; m->default_memory_accounting = arg_default_memory_accounting; m->default_tasks_accounting = arg_default_tasks_accounting; diff --git a/src/core/manager.c b/src/core/manager.c index e192cd475d..7838f56fd2 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -1494,7 +1494,7 @@ void manager_clear_jobs(Manager *m) { while ((j = hashmap_first(m->jobs))) /* No need to recurse. We're cancelling all jobs. */ - job_finish_and_invalidate(j, JOB_CANCELED, false); + job_finish_and_invalidate(j, JOB_CANCELED, false, false); } static int manager_dispatch_run_queue(sd_event_source *source, void *userdata) { diff --git a/src/core/manager.h b/src/core/manager.h index 4bccca75cb..6ed15c1a41 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -255,6 +255,7 @@ struct Manager { bool default_cpu_accounting; bool default_memory_accounting; + bool default_io_accounting; bool default_blockio_accounting; bool default_tasks_accounting; diff --git a/src/core/namespace.c b/src/core/namespace.c index b573f008b9..203d122810 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -44,6 +44,8 @@ #include "user-util.h" #include "util.h" +#define DEV_MOUNT_OPTIONS (MS_NOSUID|MS_STRICTATIME|MS_NOEXEC) + typedef enum MountMode { /* This is ordered by priority! */ INACCESSIBLE, @@ -153,7 +155,7 @@ static int mount_dev(BindMount *m) { dev = strjoina(temporary_mount, "/dev"); (void) mkdir(dev, 0755); - if (mount("tmpfs", dev, "tmpfs", MS_NOSUID|MS_STRICTATIME, "mode=755") < 0) { + if (mount("tmpfs", dev, "tmpfs", DEV_MOUNT_OPTIONS, "mode=755") < 0) { r = -errno; goto fail; } @@ -237,6 +239,8 @@ static int mount_dev(BindMount *m) { */ (void) mkdir_p_label(m->path, 0755); + /* Unmount everything in old /dev */ + umount_recursive(m->path, 0); if (mount(dev, m->path, NULL, MS_MOVE, NULL) < 0) { r = -errno; goto fail; @@ -328,9 +332,11 @@ static int make_read_only(BindMount *m) { if (IN_SET(m->mode, INACCESSIBLE, READONLY)) r = bind_remount_recursive(m->path, true); - else if (IN_SET(m->mode, READWRITE, PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV)) + else if (IN_SET(m->mode, READWRITE, PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV)) { r = bind_remount_recursive(m->path, false); - else + if (r == 0 && m->mode == PRIVATE_DEV) /* can be readonly but the submounts can't*/ + r = mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL); + } else r = 0; if (m->ignore && r == -ENOENT) diff --git a/src/core/system.conf b/src/core/system.conf index eacd7ee282..db8b7acd78 100644 --- a/src/core/system.conf +++ b/src/core/system.conf @@ -38,6 +38,7 @@ #DefaultStartLimitBurst=5 #DefaultEnvironment= #DefaultCPUAccounting=no +#DefaultIOAccounting=no #DefaultBlockIOAccounting=no #DefaultMemoryAccounting=no #DefaultTasksAccounting=yes diff --git a/src/core/transaction.c b/src/core/transaction.c index d5370b2a14..e06a48a2f1 100644 --- a/src/core/transaction.c +++ b/src/core/transaction.c @@ -597,7 +597,7 @@ static int transaction_apply(Transaction *tr, Manager *m, JobMode mode) { /* Not invalidating recursively. Avoids triggering * OnFailure= actions of dependent jobs. Also avoids * invalidating our iterator. */ - job_finish_and_invalidate(j, JOB_CANCELED, false); + job_finish_and_invalidate(j, JOB_CANCELED, false, false); } } diff --git a/src/core/unit.c b/src/core/unit.c index d8ab5781b0..2fff3f2d8b 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -132,6 +132,7 @@ static void unit_init(Unit *u) { * been initialized */ cc->cpu_accounting = u->manager->default_cpu_accounting; + cc->io_accounting = u->manager->default_io_accounting; cc->blockio_accounting = u->manager->default_blockio_accounting; cc->memory_accounting = u->manager->default_memory_accounting; cc->tasks_accounting = u->manager->default_tasks_accounting; @@ -1213,6 +1214,7 @@ static int unit_add_startup_units(Unit *u) { return 0; if (c->startup_cpu_shares == CGROUP_CPU_SHARES_INVALID && + c->startup_io_weight == CGROUP_WEIGHT_INVALID && c->startup_blockio_weight == CGROUP_BLKIO_WEIGHT_INVALID) return 0; @@ -1916,12 +1918,12 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su case JOB_VERIFY_ACTIVE: if (UNIT_IS_ACTIVE_OR_RELOADING(ns)) - job_finish_and_invalidate(u->job, JOB_DONE, true); + job_finish_and_invalidate(u->job, JOB_DONE, true, false); else if (u->job->state == JOB_RUNNING && ns != UNIT_ACTIVATING) { unexpected = true; if (UNIT_IS_INACTIVE_OR_FAILED(ns)) - job_finish_and_invalidate(u->job, ns == UNIT_FAILED ? JOB_FAILED : JOB_DONE, true); + job_finish_and_invalidate(u->job, ns == UNIT_FAILED ? JOB_FAILED : JOB_DONE, true, false); } break; @@ -1932,12 +1934,12 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su if (u->job->state == JOB_RUNNING) { if (ns == UNIT_ACTIVE) - job_finish_and_invalidate(u->job, reload_success ? JOB_DONE : JOB_FAILED, true); + job_finish_and_invalidate(u->job, reload_success ? JOB_DONE : JOB_FAILED, true, false); else if (ns != UNIT_ACTIVATING && ns != UNIT_RELOADING) { unexpected = true; if (UNIT_IS_INACTIVE_OR_FAILED(ns)) - job_finish_and_invalidate(u->job, ns == UNIT_FAILED ? JOB_FAILED : JOB_DONE, true); + job_finish_and_invalidate(u->job, ns == UNIT_FAILED ? JOB_FAILED : JOB_DONE, true, false); } } @@ -1948,10 +1950,10 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su case JOB_TRY_RESTART: if (UNIT_IS_INACTIVE_OR_FAILED(ns)) - job_finish_and_invalidate(u->job, JOB_DONE, true); + job_finish_and_invalidate(u->job, JOB_DONE, true, false); else if (u->job->state == JOB_RUNNING && ns != UNIT_DEACTIVATING) { unexpected = true; - job_finish_and_invalidate(u->job, JOB_FAILED, true); + job_finish_and_invalidate(u->job, JOB_FAILED, true, false); } break; @@ -2400,9 +2402,11 @@ int unit_set_slice(Unit *u, Unit *slice) { if (UNIT_DEREF(u->slice) == slice) return 0; - if (UNIT_ISSET(u->slice)) + /* Disallow slice changes if @u is already bound to cgroups */ + if (UNIT_ISSET(u->slice) && u->cgroup_realized) return -EBUSY; + unit_ref_unset(&u->slice); unit_ref_set(&u->slice, slice); return 1; } |