summaryrefslogtreecommitdiff
path: root/src/grp-system/libcore
diff options
context:
space:
mode:
Diffstat (limited to 'src/grp-system/libcore')
-rw-r--r--src/grp-system/libcore/Makefile2
-rw-r--r--src/grp-system/libcore/automount.c20
-rw-r--r--src/grp-system/libcore/busname.c7
-rw-r--r--src/grp-system/libcore/cgroup.c242
-rw-r--r--src/grp-system/libcore/cgroup.h5
-rw-r--r--src/grp-system/libcore/dbus-cgroup.c119
-rw-r--r--src/grp-system/libcore/dbus-execute.c95
-rw-r--r--src/grp-system/libcore/dbus-kill.c8
-rw-r--r--src/grp-system/libcore/dbus-manager.c1
-rw-r--r--src/grp-system/libcore/dbus-scope.c4
-rw-r--r--src/grp-system/libcore/dbus-service.c6
-rw-r--r--src/grp-system/libcore/dbus-timer.c12
-rw-r--r--src/grp-system/libcore/dbus-unit.c8
-rw-r--r--src/grp-system/libcore/execute.c348
-rw-r--r--src/grp-system/libcore/execute.h19
-rw-r--r--src/grp-system/libcore/killall.c7
-rw-r--r--src/grp-system/libcore/kmod-setup.c3
-rw-r--r--src/grp-system/libcore/load-fragment-gperf.gperf.m416
-rw-r--r--src/grp-system/libcore/load-fragment.c193
-rw-r--r--src/grp-system/libcore/machine-id-setup.c226
-rw-r--r--src/grp-system/libcore/machine-id-setup.h2
-rw-r--r--src/grp-system/libcore/manager.c74
-rw-r--r--src/grp-system/libcore/manager.h2
-rw-r--r--src/grp-system/libcore/mount-setup.c16
-rw-r--r--src/grp-system/libcore/mount.c1
-rw-r--r--src/grp-system/libcore/namespace.c51
-rw-r--r--src/grp-system/libcore/namespace.h6
-rw-r--r--src/grp-system/libcore/scope.c21
-rw-r--r--src/grp-system/libcore/scope.h2
-rw-r--r--src/grp-system/libcore/selinux-access.c2
-rw-r--r--src/grp-system/libcore/selinux-setup.c2
-rw-r--r--src/grp-system/libcore/service.c80
-rw-r--r--src/grp-system/libcore/service.h2
-rw-r--r--src/grp-system/libcore/socket.c16
-rw-r--r--src/grp-system/libcore/transaction.c7
-rw-r--r--src/grp-system/libcore/unit.c117
-rw-r--r--src/grp-system/libcore/unit.h4
37 files changed, 1127 insertions, 619 deletions
diff --git a/src/grp-system/libcore/Makefile b/src/grp-system/libcore/Makefile
index 1f19355412..b31d00fcdb 100644
--- a/src/grp-system/libcore/Makefile
+++ b/src/grp-system/libcore/Makefile
@@ -156,7 +156,7 @@ libcore_la_CFLAGS = \
$(SECCOMP_CFLAGS)
libcore_la_LIBADD = \
- libshared.la \
+ libsystemd-shared.la \
$(PAM_LIBS) \
$(AUDIT_LIBS) \
$(KMOD_LIBS) \
diff --git a/src/grp-system/libcore/automount.c b/src/grp-system/libcore/automount.c
index 726a044030..30c30469ca 100644
--- a/src/grp-system/libcore/automount.c
+++ b/src/grp-system/libcore/automount.c
@@ -100,9 +100,6 @@ static void unmount_autofs(Automount *a) {
if (a->pipe_fd < 0)
return;
- automount_send_ready(a, a->tokens, -EHOSTDOWN);
- automount_send_ready(a, a->expire_tokens, -EHOSTDOWN);
-
a->pipe_event_source = sd_event_source_unref(a->pipe_event_source);
a->pipe_fd = safe_close(a->pipe_fd);
@@ -111,6 +108,9 @@ static void unmount_autofs(Automount *a) {
if (a->where &&
(UNIT(a)->manager->exit_code != MANAGER_RELOAD &&
UNIT(a)->manager->exit_code != MANAGER_REEXECUTE)) {
+ automount_send_ready(a, a->tokens, -EHOSTDOWN);
+ automount_send_ready(a, a->expire_tokens, -EHOSTDOWN);
+
r = repeat_unmount(a->where, MNT_DETACH);
if (r < 0)
log_error_errno(r, "Failed to unmount: %m");
@@ -504,6 +504,20 @@ static void automount_trigger_notify(Unit *u, Unit *other) {
automount_set_state(a, AUTOMOUNT_RUNNING);
}
+ if (IN_SET(MOUNT(other)->state,
+ MOUNT_MOUNTING, MOUNT_MOUNTING_DONE,
+ MOUNT_MOUNTED, MOUNT_REMOUNTING,
+ MOUNT_MOUNTING_SIGTERM, MOUNT_MOUNTING_SIGKILL,
+ MOUNT_REMOUNTING_SIGTERM, MOUNT_REMOUNTING_SIGKILL,
+ MOUNT_UNMOUNTING_SIGTERM, MOUNT_UNMOUNTING_SIGKILL,
+ MOUNT_FAILED)) {
+
+ (void) automount_send_ready(a, a->expire_tokens, -ENODEV);
+ }
+
+ if (MOUNT(other)->state == MOUNT_DEAD)
+ (void) automount_send_ready(a, a->expire_tokens, 0);
+
/* The mount is in some unhappy state now, let's unfreeze any waiting clients */
if (IN_SET(MOUNT(other)->state,
MOUNT_DEAD, MOUNT_UNMOUNTING,
diff --git a/src/grp-system/libcore/busname.c b/src/grp-system/libcore/busname.c
index 5d85c20730..f21a2d23c1 100644
--- a/src/grp-system/libcore/busname.c
+++ b/src/grp-system/libcore/busname.c
@@ -999,12 +999,7 @@ static int busname_get_timeout(Unit *u, usec_t *timeout) {
}
static bool busname_supported(void) {
- static int supported = -1;
-
- if (supported < 0)
- supported = is_kdbus_available();
-
- return supported;
+ return false;
}
static int busname_control_pid(Unit *u) {
diff --git a/src/grp-system/libcore/cgroup.c b/src/grp-system/libcore/cgroup.c
index df850ec68f..0c1365f329 100644
--- a/src/grp-system/libcore/cgroup.c
+++ b/src/grp-system/libcore/cgroup.c
@@ -37,6 +37,21 @@
#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
+static void cgroup_compat_warn(void) {
+ static bool cgroup_compat_warned = false;
+
+ if (cgroup_compat_warned)
+ return;
+
+ log_warning("cgroup compatibility translation between legacy and unified hierarchy settings activated. See cgroup-compat debug messages for details.");
+ cgroup_compat_warned = true;
+}
+
+#define log_cgroup_compat(unit, fmt, ...) do { \
+ cgroup_compat_warn(); \
+ log_unit_debug(unit, "cgroup-compat: " fmt, ##__VA_ARGS__); \
+ } while (false)
+
void cgroup_context_init(CGroupContext *c) {
assert(c);
@@ -47,7 +62,10 @@ void cgroup_context_init(CGroupContext *c) {
c->startup_cpu_shares = CGROUP_CPU_SHARES_INVALID;
c->cpu_quota_per_sec_usec = USEC_INFINITY;
- c->memory_limit = (uint64_t) -1;
+ c->memory_high = CGROUP_LIMIT_MAX;
+ c->memory_max = CGROUP_LIMIT_MAX;
+
+ c->memory_limit = CGROUP_LIMIT_MAX;
c->io_weight = CGROUP_WEIGHT_INVALID;
c->startup_io_weight = CGROUP_WEIGHT_INVALID;
@@ -148,6 +166,9 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
"%sStartupIOWeight=%" PRIu64 "\n"
"%sBlockIOWeight=%" PRIu64 "\n"
"%sStartupBlockIOWeight=%" PRIu64 "\n"
+ "%sMemoryLow=%" PRIu64 "\n"
+ "%sMemoryHigh=%" PRIu64 "\n"
+ "%sMemoryMax=%" PRIu64 "\n"
"%sMemoryLimit=%" PRIu64 "\n"
"%sTasksMax=%" PRIu64 "\n"
"%sDevicePolicy=%s\n"
@@ -164,6 +185,9 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
prefix, c->startup_io_weight,
prefix, c->blockio_weight,
prefix, c->startup_blockio_weight,
+ prefix, c->memory_low,
+ prefix, c->memory_high,
+ prefix, c->memory_max,
prefix, c->memory_limit,
prefix, c->tasks_max,
prefix, cgroup_device_policy_to_string(c->device_policy),
@@ -405,7 +429,7 @@ static uint64_t cgroup_weight_io_to_blkio(uint64_t io_weight) {
CGROUP_BLKIO_WEIGHT_MIN, CGROUP_BLKIO_WEIGHT_MAX);
}
-static void cgroup_apply_io_device_weight(const char *path, const char *dev_path, uint64_t io_weight) {
+static void cgroup_apply_io_device_weight(Unit *u, const char *dev_path, uint64_t io_weight) {
char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1];
dev_t dev;
int r;
@@ -415,13 +439,13 @@ static void cgroup_apply_io_device_weight(const char *path, const char *dev_path
return;
xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), io_weight);
- r = cg_set_attribute("io", path, "io.weight", buf);
+ r = cg_set_attribute("io", u->cgroup_path, "io.weight", buf);
if (r < 0)
- log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set io.weight on %s: %m", path);
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set io.weight: %m");
}
-static void cgroup_apply_blkio_device_weight(const char *path, const char *dev_path, uint64_t blkio_weight) {
+static void cgroup_apply_blkio_device_weight(Unit *u, const char *dev_path, uint64_t blkio_weight) {
char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1];
dev_t dev;
int r;
@@ -431,13 +455,13 @@ static void cgroup_apply_blkio_device_weight(const char *path, const char *dev_p
return;
xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), blkio_weight);
- r = cg_set_attribute("blkio", path, "blkio.weight_device", buf);
+ r = cg_set_attribute("blkio", u->cgroup_path, "blkio.weight_device", buf);
if (r < 0)
- log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set blkio.weight_device on %s: %m", path);
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set blkio.weight_device: %m");
}
-static unsigned cgroup_apply_io_device_limit(const char *path, const char *dev_path, uint64_t *limits) {
+static unsigned cgroup_apply_io_device_limit(Unit *u, const char *dev_path, uint64_t *limits) {
char limit_bufs[_CGROUP_IO_LIMIT_TYPE_MAX][DECIMAL_STR_MAX(uint64_t)];
char buf[DECIMAL_STR_MAX(dev_t)*2+2+(6+DECIMAL_STR_MAX(uint64_t)+1)*4];
CGroupIOLimitType type;
@@ -461,14 +485,14 @@ static unsigned cgroup_apply_io_device_limit(const char *path, const char *dev_p
xsprintf(buf, "%u:%u rbps=%s wbps=%s riops=%s wiops=%s\n", major(dev), minor(dev),
limit_bufs[CGROUP_IO_RBPS_MAX], limit_bufs[CGROUP_IO_WBPS_MAX],
limit_bufs[CGROUP_IO_RIOPS_MAX], limit_bufs[CGROUP_IO_WIOPS_MAX]);
- r = cg_set_attribute("io", path, "io.max", buf);
+ r = cg_set_attribute("io", u->cgroup_path, "io.max", buf);
if (r < 0)
- log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set io.max on %s: %m", path);
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set io.max: %m");
return n;
}
-static unsigned cgroup_apply_blkio_device_limit(const char *path, const char *dev_path, uint64_t rbps, uint64_t wbps) {
+static unsigned cgroup_apply_blkio_device_limit(Unit *u, const char *dev_path, uint64_t rbps, uint64_t wbps) {
char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1];
dev_t dev;
unsigned n = 0;
@@ -481,26 +505,50 @@ static unsigned cgroup_apply_blkio_device_limit(const char *path, const char *de
if (rbps != CGROUP_LIMIT_MAX)
n++;
sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), rbps);
- r = cg_set_attribute("blkio", path, "blkio.throttle.read_bps_device", buf);
+ r = cg_set_attribute("blkio", u->cgroup_path, "blkio.throttle.read_bps_device", buf);
if (r < 0)
- log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set blkio.throttle.read_bps_device on %s: %m", path);
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set blkio.throttle.read_bps_device: %m");
if (wbps != CGROUP_LIMIT_MAX)
n++;
sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), wbps);
- r = cg_set_attribute("blkio", path, "blkio.throttle.write_bps_device", buf);
+ r = cg_set_attribute("blkio", u->cgroup_path, "blkio.throttle.write_bps_device", buf);
if (r < 0)
- log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set blkio.throttle.write_bps_device on %s: %m", path);
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set blkio.throttle.write_bps_device: %m");
return n;
}
-void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, ManagerState state) {
+static bool cgroup_context_has_unified_memory_config(CGroupContext *c) {
+ return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX;
+}
+
+static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_t v) {
+ char buf[DECIMAL_STR_MAX(uint64_t) + 1] = "max";
+ int r;
+
+ if (v != CGROUP_LIMIT_MAX)
+ xsprintf(buf, "%" PRIu64 "\n", v);
+
+ r = cg_set_attribute("memory", u->cgroup_path, file, buf);
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set %s: %m", file);
+}
+
+static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
+ const char *path;
+ CGroupContext *c;
bool is_root;
int r;
+ assert(u);
+
+ c = unit_get_cgroup_context(u);
+ path = u->cgroup_path;
+
assert(c);
assert(path);
@@ -526,14 +574,14 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M
c->cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->cpu_shares : CGROUP_CPU_SHARES_DEFAULT);
r = cg_set_attribute("cpu", path, "cpu.shares", buf);
if (r < 0)
- log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set cpu.shares on %s: %m", path);
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.shares: %m");
sprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf);
if (r < 0)
- log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set cpu.cfs_period_us on %s: %m", path);
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.cfs_period_us: %m");
if (c->cpu_quota_per_sec_usec != USEC_INFINITY) {
sprintf(buf, USEC_FMT "\n", c->cpu_quota_per_sec_usec * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
@@ -541,8 +589,8 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M
} else
r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1");
if (r < 0)
- log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set cpu.cfs_quota_us on %s: %m", path);
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.cfs_quota_us: %m");
}
if (mask & CGROUP_MASK_IO) {
@@ -555,29 +603,40 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M
if (has_io)
weight = cgroup_context_io_weight(c, state);
- else if (has_blockio)
- weight = cgroup_weight_blkio_to_io(cgroup_context_blkio_weight(c, state));
- else
+ else if (has_blockio) {
+ uint64_t blkio_weight = cgroup_context_blkio_weight(c, state);
+
+ weight = cgroup_weight_blkio_to_io(blkio_weight);
+
+ log_cgroup_compat(u, "Applying [Startup]BlockIOWeight %" PRIu64 " as [Startup]IOWeight %" PRIu64,
+ blkio_weight, weight);
+ } else
weight = CGROUP_WEIGHT_DEFAULT;
xsprintf(buf, "default %" PRIu64 "\n", weight);
r = cg_set_attribute("io", path, "io.weight", buf);
if (r < 0)
- log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set io.weight on %s: %m", path);
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set io.weight: %m");
if (has_io) {
CGroupIODeviceWeight *w;
/* FIXME: no way to reset this list */
LIST_FOREACH(device_weights, w, c->io_device_weights)
- cgroup_apply_io_device_weight(path, w->path, w->weight);
+ cgroup_apply_io_device_weight(u, w->path, w->weight);
} else if (has_blockio) {
CGroupBlockIODeviceWeight *w;
/* FIXME: no way to reset this list */
- LIST_FOREACH(device_weights, w, c->blockio_device_weights)
- cgroup_apply_io_device_weight(path, w->path, cgroup_weight_blkio_to_io(w->weight));
+ LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
+ weight = cgroup_weight_blkio_to_io(w->weight);
+
+ log_cgroup_compat(u, "Applying BlockIODeviceWeight %" PRIu64 " as IODeviceWeight %" PRIu64 " for %s",
+ w->weight, weight, w->path);
+
+ cgroup_apply_io_device_weight(u, w->path, weight);
+ }
}
}
@@ -586,7 +645,7 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M
CGroupIODeviceLimit *l, *next;
LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) {
- if (!cgroup_apply_io_device_limit(path, l->path, l->limits))
+ if (!cgroup_apply_io_device_limit(u, l->path, l->limits))
cgroup_context_free_io_device_limit(c, l);
}
} else if (has_blockio) {
@@ -602,7 +661,10 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M
limits[CGROUP_IO_RBPS_MAX] = b->rbps;
limits[CGROUP_IO_WBPS_MAX] = b->wbps;
- if (!cgroup_apply_io_device_limit(path, b->path, limits))
+ log_cgroup_compat(u, "Applying BlockIO{Read|Write}Bandwidth %" PRIu64 " %" PRIu64 " as IO{Read|Write}BandwidthMax for %s",
+ b->rbps, b->wbps, b->path);
+
+ if (!cgroup_apply_io_device_limit(u, b->path, limits))
cgroup_context_free_blockio_device_bandwidth(c, b);
}
}
@@ -618,29 +680,40 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M
if (has_blockio)
weight = cgroup_context_blkio_weight(c, state);
- else if (has_io)
+ else if (has_io) {
+ uint64_t io_weight = cgroup_context_io_weight(c, state);
+
weight = cgroup_weight_io_to_blkio(cgroup_context_io_weight(c, state));
- else
+
+ log_cgroup_compat(u, "Applying [Startup]IOWeight %" PRIu64 " as [Startup]BlockIOWeight %" PRIu64,
+ io_weight, weight);
+ } else
weight = CGROUP_BLKIO_WEIGHT_DEFAULT;
xsprintf(buf, "%" PRIu64 "\n", weight);
r = cg_set_attribute("blkio", path, "blkio.weight", buf);
if (r < 0)
- log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set blkio.weight on %s: %m", path);
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set blkio.weight: %m");
if (has_blockio) {
CGroupBlockIODeviceWeight *w;
/* FIXME: no way to reset this list */
LIST_FOREACH(device_weights, w, c->blockio_device_weights)
- cgroup_apply_blkio_device_weight(path, w->path, w->weight);
+ cgroup_apply_blkio_device_weight(u, w->path, w->weight);
} else if (has_io) {
CGroupIODeviceWeight *w;
/* FIXME: no way to reset this list */
- LIST_FOREACH(device_weights, w, c->io_device_weights)
- cgroup_apply_blkio_device_weight(path, w->path, cgroup_weight_io_to_blkio(w->weight));
+ LIST_FOREACH(device_weights, w, c->io_device_weights) {
+ weight = cgroup_weight_io_to_blkio(w->weight);
+
+ log_cgroup_compat(u, "Applying IODeviceWeight %" PRIu64 " as BlockIODeviceWeight %" PRIu64 " for %s",
+ w->weight, weight, w->path);
+
+ cgroup_apply_blkio_device_weight(u, w->path, weight);
+ }
}
}
@@ -649,40 +722,59 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M
CGroupBlockIODeviceBandwidth *b, *next;
LIST_FOREACH_SAFE(device_bandwidths, b, next, c->blockio_device_bandwidths) {
- if (!cgroup_apply_blkio_device_limit(path, b->path, b->rbps, b->wbps))
+ if (!cgroup_apply_blkio_device_limit(u, b->path, b->rbps, b->wbps))
cgroup_context_free_blockio_device_bandwidth(c, b);
}
} else if (has_io) {
CGroupIODeviceLimit *l, *next;
LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) {
- if (!cgroup_apply_blkio_device_limit(path, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX]))
+ log_cgroup_compat(u, "Applying IO{Read|Write}Bandwidth %" PRIu64 " %" PRIu64 " as BlockIO{Read|Write}BandwidthMax for %s",
+ l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX], l->path);
+
+ if (!cgroup_apply_blkio_device_limit(u, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX]))
cgroup_context_free_io_device_limit(c, l);
}
}
}
if ((mask & CGROUP_MASK_MEMORY) && !is_root) {
- if (c->memory_limit != (uint64_t) -1) {
- char buf[DECIMAL_STR_MAX(uint64_t) + 1];
+ if (cg_unified() > 0) {
+ uint64_t max = c->memory_max;
- sprintf(buf, "%" PRIu64 "\n", c->memory_limit);
+ if (cgroup_context_has_unified_memory_config(c))
+ max = c->memory_max;
+ else {
+ max = c->memory_limit;
- if (cg_unified() <= 0)
- r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
- else
- r = cg_set_attribute("memory", path, "memory.max", buf);
+ if (max != CGROUP_LIMIT_MAX)
+ log_cgroup_compat(u, "Applying MemoryLimit %" PRIu64 " as MemoryMax", max);
+ }
+ cgroup_apply_unified_memory_limit(u, "memory.low", c->memory_low);
+ cgroup_apply_unified_memory_limit(u, "memory.high", c->memory_high);
+ cgroup_apply_unified_memory_limit(u, "memory.max", max);
} else {
- if (cg_unified() <= 0)
- r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1");
+ char buf[DECIMAL_STR_MAX(uint64_t) + 1];
+ uint64_t val = c->memory_limit;
+
+ if (val == CGROUP_LIMIT_MAX) {
+ val = c->memory_max;
+
+ if (val != CGROUP_LIMIT_MAX)
+ log_cgroup_compat(u, "Applying MemoryMax %" PRIi64 " as MemoryLimit", c->memory_max);
+ }
+
+ if (val == CGROUP_LIMIT_MAX)
+ strncpy(buf, "-1\n", sizeof(buf));
else
- r = cg_set_attribute("memory", path, "memory.max", "max");
- }
+ xsprintf(buf, "%" PRIu64 "\n", val);
- if (r < 0)
- log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set memory.limit_in_bytes/memory.max on %s: %m", path);
+ r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
+ if (r < 0)
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set memory.limit_in_bytes: %m");
+ }
}
if ((mask & CGROUP_MASK_DEVICES) && !is_root) {
@@ -697,8 +789,8 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M
else
r = cg_set_attribute("devices", path, "devices.allow", "a");
if (r < 0)
- log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to reset devices.list on %s: %m", path);
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to reset devices.list: %m");
if (c->device_policy == CGROUP_CLOSED ||
(c->device_policy == CGROUP_AUTO && c->device_allow)) {
@@ -709,7 +801,10 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M
"/dev/random\0" "rwm\0"
"/dev/urandom\0" "rwm\0"
"/dev/tty\0" "rwm\0"
- "/dev/pts/ptmx\0" "rw\0"; /* /dev/pts/ptmx may not be duplicated, but accessed */
+ "/dev/pts/ptmx\0" "rw\0" /* /dev/pts/ptmx may not be duplicated, but accessed */
+ /* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */
+ "/run/systemd/inaccessible/chr\0" "rwm\0"
+ "/run/systemd/inaccessible/blk\0" "rwm\0";
const char *x, *y;
@@ -744,7 +839,7 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M
else if (startswith(a->path, "char-"))
whitelist_major(path, a->path + 5, 'c', acc);
else
- log_debug("Ignoring device %s while writing cgroup attribute.", a->path);
+ log_unit_debug(u, "Ignoring device %s while writing cgroup attribute.", a->path);
}
}
@@ -759,8 +854,8 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M
r = cg_set_attribute("pids", path, "pids.max", "max");
if (r < 0)
- log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
- "Failed to set pids.max on %s: %m", path);
+ log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set pids.max: %m");
}
}
@@ -779,7 +874,8 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) {
mask |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO;
if (c->memory_accounting ||
- c->memory_limit != (uint64_t) -1)
+ c->memory_limit != CGROUP_LIMIT_MAX ||
+ cgroup_context_has_unified_memory_config(c))
mask |= CGROUP_MASK_MEMORY;
if (c->device_allow ||
@@ -1044,7 +1140,7 @@ int unit_watch_cgroup(Unit *u) {
/* Only applies to the unified hierarchy */
r = cg_unified();
if (r < 0)
- return log_unit_error_errno(u, r, "Failed detect wether the unified hierarchy is used: %m");
+ return log_unit_error_errno(u, r, "Failed detect whether the unified hierarchy is used: %m");
if (r == 0)
return 0;
@@ -1194,7 +1290,7 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
return r;
/* Finally, apply the necessary attributes. */
- cgroup_context_apply(unit_get_cgroup_context(u), target_mask, u->cgroup_path, state);
+ cgroup_context_apply(u, target_mask, state);
return 0;
}
@@ -1325,7 +1421,7 @@ void unit_prune_cgroup(Unit *u) {
r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice);
if (r < 0) {
- log_debug_errno(r, "Failed to destroy cgroup %s, ignoring: %m", u->cgroup_path);
+ log_unit_debug_errno(u, r, "Failed to destroy cgroup %s, ignoring: %m", u->cgroup_path);
return;
}
@@ -1566,7 +1662,7 @@ int manager_setup_cgroup(Manager *m) {
/* 3. Install agent */
if (unified) {
- /* In the unified hierarchy we can can get
+ /* In the unified hierarchy we can get
* cgroup empty notifications via inotify. */
m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source);
@@ -1613,7 +1709,7 @@ int manager_setup_cgroup(Manager *m) {
/* also, move all other userspace processes remaining
* in the root cgroup into that scope. */
- r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, false);
+ r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
if (r < 0)
log_warning_errno(r, "Couldn't move remaining userspace processes, ignoring: %m");
@@ -1634,7 +1730,7 @@ int manager_setup_cgroup(Manager *m) {
return log_error_errno(r, "Failed to determine supported controllers: %m");
for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++)
- log_debug("Controller '%s' supported: %s", cgroup_controller_to_string(c), yes_no(m->cgroup_supported & c));
+ log_debug("Controller '%s' supported: %s", cgroup_controller_to_string(c), yes_no(m->cgroup_supported & CGROUP_CONTROLLER_TO_MASK(c)));
return 0;
}
diff --git a/src/grp-system/libcore/cgroup.h b/src/grp-system/libcore/cgroup.h
index 36699fb21b..bf40c3cc74 100644
--- a/src/grp-system/libcore/cgroup.h
+++ b/src/grp-system/libcore/cgroup.h
@@ -94,6 +94,10 @@ struct CGroupContext {
LIST_HEAD(CGroupIODeviceWeight, io_device_weights);
LIST_HEAD(CGroupIODeviceLimit, io_device_limits);
+ uint64_t memory_low;
+ uint64_t memory_high;
+ uint64_t memory_max;
+
/* For legacy hierarchies */
uint64_t cpu_shares;
uint64_t startup_cpu_shares;
@@ -120,7 +124,6 @@ struct CGroupContext {
void cgroup_context_init(CGroupContext *c);
void cgroup_context_done(CGroupContext *c);
void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix);
-void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, ManagerState state);
CGroupMask cgroup_context_get_mask(CGroupContext *c);
diff --git a/src/grp-system/libcore/dbus-cgroup.c b/src/grp-system/libcore/dbus-cgroup.c
index d707ac51c3..da1c333043 100644
--- a/src/grp-system/libcore/dbus-cgroup.c
+++ b/src/grp-system/libcore/dbus-cgroup.c
@@ -229,6 +229,9 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_PROPERTY("BlockIOReadBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0),
SD_BUS_PROPERTY("BlockIOWriteBandwidth", "a(st)", property_get_blockio_device_bandwidths, 0, 0),
SD_BUS_PROPERTY("MemoryAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, memory_accounting), 0),
+ SD_BUS_PROPERTY("MemoryLow", "t", NULL, offsetof(CGroupContext, memory_low), 0),
+ SD_BUS_PROPERTY("MemoryHigh", "t", NULL, offsetof(CGroupContext, memory_high), 0),
+ SD_BUS_PROPERTY("MemoryMax", "t", NULL, offsetof(CGroupContext, memory_max), 0),
SD_BUS_PROPERTY("MemoryLimit", "t", NULL, offsetof(CGroupContext, memory_limit), 0),
SD_BUS_PROPERTY("DevicePolicy", "s", property_get_cgroup_device_policy, offsetof(CGroupContext, device_policy), 0),
SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0),
@@ -624,7 +627,7 @@ int bus_cgroup_set_property(
if (r < 0)
return r;
- if (CGROUP_BLKIO_WEIGHT_IS_OK(weight))
+ if (!CGROUP_BLKIO_WEIGHT_IS_OK(weight))
return sd_bus_error_set_errnof(error, EINVAL, "StartupBlockIOWeight value out of range");
if (mode != UNIT_CHECK) {
@@ -639,7 +642,7 @@ int bus_cgroup_set_property(
return 1;
- } else if (streq(name, "BlockIOReadBandwidth") || streq(name, "BlockIOWriteBandwidth")) {
+ } else if (STR_IN_SET(name, "BlockIOReadBandwidth", "BlockIOWriteBandwidth")) {
const char *path;
bool read = true;
unsigned n = 0;
@@ -827,12 +830,74 @@ int bus_cgroup_set_property(
return 1;
+ } else if (STR_IN_SET(name, "MemoryLow", "MemoryHigh", "MemoryMax")) {
+ uint64_t v;
+
+ r = sd_bus_message_read(message, "t", &v);
+ if (r < 0)
+ return r;
+ if (v <= 0)
+ return sd_bus_error_set_errnof(error, EINVAL, "%s= is too small", name);
+
+ if (mode != UNIT_CHECK) {
+ if (streq(name, "MemoryLow"))
+ c->memory_low = v;
+ else if (streq(name, "MemoryHigh"))
+ c->memory_high = v;
+ else
+ c->memory_max = v;
+
+ unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY);
+
+ if (v == CGROUP_LIMIT_MAX)
+ unit_write_drop_in_private_format(u, mode, name, "%s=infinity", name);
+ else
+ unit_write_drop_in_private_format(u, mode, name, "%s=%" PRIu64, name, v);
+ }
+
+ return 1;
+
+ } else if (STR_IN_SET(name, "MemoryLowScale", "MemoryHighScale", "MemoryMaxScale")) {
+ uint32_t raw;
+ uint64_t v;
+
+ r = sd_bus_message_read(message, "u", &raw);
+ if (r < 0)
+ return r;
+
+ v = physical_memory_scale(raw, UINT32_MAX);
+ if (v <= 0 || v == UINT64_MAX)
+ return sd_bus_error_set_errnof(error, EINVAL, "%s= is out of range", name);
+
+ if (mode != UNIT_CHECK) {
+ const char *e;
+
+ /* Chop off suffix */
+ assert_se(e = endswith(name, "Scale"));
+ name = strndupa(name, e - name);
+
+ if (streq(name, "MemoryLow"))
+ c->memory_low = v;
+ else if (streq(name, "MemoryHigh"))
+ c->memory_high = v;
+ else
+ c->memory_max = v;
+
+ unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY);
+ unit_write_drop_in_private_format(u, mode, name, "%s=%" PRIu32 "%%", name,
+ (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX)));
+ }
+
+ return 1;
+
} else if (streq(name, "MemoryLimit")) {
uint64_t limit;
r = sd_bus_message_read(message, "t", &limit);
if (r < 0)
return r;
+ if (limit <= 0)
+ return sd_bus_error_set_errnof(error, EINVAL, "%s= is too small", name);
if (mode != UNIT_CHECK) {
c->memory_limit = limit;
@@ -846,6 +911,27 @@ int bus_cgroup_set_property(
return 1;
+ } else if (streq(name, "MemoryLimitScale")) {
+ uint64_t limit;
+ uint32_t raw;
+
+ r = sd_bus_message_read(message, "u", &raw);
+ if (r < 0)
+ return r;
+
+ limit = physical_memory_scale(raw, UINT32_MAX);
+ if (limit <= 0 || limit == UINT64_MAX)
+ return sd_bus_error_set_errnof(error, EINVAL, "%s= is out of range", name);
+
+ if (mode != UNIT_CHECK) {
+ c->memory_limit = limit;
+ unit_invalidate_cgroup(u, CGROUP_MASK_MEMORY);
+ unit_write_drop_in_private_format(u, mode, "MemoryLimit", "MemoryLimit=%" PRIu32 "%%",
+ (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX)));
+ }
+
+ return 1;
+
} else if (streq(name, "DevicePolicy")) {
const char *policy;
CGroupDevicePolicy p;
@@ -859,13 +945,9 @@ int bus_cgroup_set_property(
return -EINVAL;
if (mode != UNIT_CHECK) {
- char *buf;
-
c->device_policy = p;
unit_invalidate_cgroup(u, CGROUP_MASK_DEVICES);
-
- buf = strjoina("DevicePolicy=", policy);
- unit_write_drop_in_private(u, mode, name, buf);
+ unit_write_drop_in_private_format(u, mode, name, "DevicePolicy=%s", policy);
}
return 1;
@@ -881,6 +963,7 @@ int bus_cgroup_set_property(
while ((r = sd_bus_message_read(message, "(ss)", &path, &rwm)) > 0) {
if ((!startswith(path, "/dev/") &&
+ !startswith(path, "/run/systemd/inaccessible/") &&
!startswith(path, "block-") &&
!startswith(path, "char-")) ||
strpbrk(path, WHITESPACE))
@@ -980,6 +1063,8 @@ int bus_cgroup_set_property(
r = sd_bus_message_read(message, "t", &limit);
if (r < 0)
return r;
+ if (limit <= 0)
+ return sd_bus_error_set_errnof(error, EINVAL, "%s= is too small", name);
if (mode != UNIT_CHECK) {
c->tasks_max = limit;
@@ -992,6 +1077,26 @@ int bus_cgroup_set_property(
}
return 1;
+ } else if (streq(name, "TasksMaxScale")) {
+ uint64_t limit;
+ uint32_t raw;
+
+ r = sd_bus_message_read(message, "u", &raw);
+ if (r < 0)
+ return r;
+
+ limit = system_tasks_max_scale(raw, UINT32_MAX);
+ if (limit <= 0 || limit >= UINT64_MAX)
+ return sd_bus_error_set_errnof(error, EINVAL, "%s= is out of range", name);
+
+ if (mode != UNIT_CHECK) {
+ c->tasks_max = limit;
+ unit_invalidate_cgroup(u, CGROUP_MASK_PIDS);
+ unit_write_drop_in_private_format(u, mode, name, "TasksMax=%" PRIu32 "%%",
+ (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX)));
+ }
+
+ return 1;
}
if (u->transient && u->load_state == UNIT_STUB) {
diff --git a/src/grp-system/libcore/dbus-execute.c b/src/grp-system/libcore/dbus-execute.c
index fe1d9da440..f7c217efc1 100644
--- a/src/grp-system/libcore/dbus-execute.c
+++ b/src/grp-system/libcore/dbus-execute.c
@@ -696,9 +696,12 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("Group", "s", NULL, offsetof(ExecContext, group), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SupplementaryGroups", "as", NULL, offsetof(ExecContext, supplementary_groups), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PAMName", "s", NULL, offsetof(ExecContext, pam_name), SD_BUS_VTABLE_PROPERTY_CONST),
- SD_BUS_PROPERTY("ReadWriteDirectories", "as", NULL, offsetof(ExecContext, read_write_dirs), SD_BUS_VTABLE_PROPERTY_CONST),
- SD_BUS_PROPERTY("ReadOnlyDirectories", "as", NULL, offsetof(ExecContext, read_only_dirs), SD_BUS_VTABLE_PROPERTY_CONST),
- SD_BUS_PROPERTY("InaccessibleDirectories", "as", NULL, offsetof(ExecContext, inaccessible_dirs), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ReadWriteDirectories", "as", NULL, offsetof(ExecContext, read_write_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("ReadOnlyDirectories", "as", NULL, offsetof(ExecContext, read_only_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("InaccessibleDirectories", "as", NULL, offsetof(ExecContext, inaccessible_paths), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
+ SD_BUS_PROPERTY("ReadWritePaths", "as", NULL, offsetof(ExecContext, read_write_paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("ReadOnlyPaths", "as", NULL, offsetof(ExecContext, read_only_paths), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("InaccessiblePaths", "as", NULL, offsetof(ExecContext, inaccessible_paths), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MountFlags", "t", bus_property_get_ulong, offsetof(ExecContext, mount_flags), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -720,6 +723,8 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("RestrictAddressFamilies", "(bas)", property_get_address_families, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RuntimeDirectoryMode", "u", bus_property_get_mode, offsetof(ExecContext, runtime_directory_mode), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RuntimeDirectory", "as", NULL, offsetof(ExecContext, runtime_directory), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("MemoryDenyWriteExecute", "b", bus_property_get_bool, offsetof(ExecContext, memory_deny_write_execute), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("RestrictRealtime", "b", bus_property_get_bool, offsetof(ExecContext, restrict_realtime), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_VTABLE_END
};
@@ -843,7 +848,7 @@ int bus_exec_context_set_transient_property(
else if (free_and_strdup(&c->user, uu) < 0)
return -ENOMEM;
- unit_write_drop_in_private_format(u, mode, name, "User=%s\n", uu);
+ unit_write_drop_in_private_format(u, mode, name, "User=%s", uu);
}
return 1;
@@ -862,7 +867,7 @@ int bus_exec_context_set_transient_property(
else if (free_and_strdup(&c->group, gg) < 0)
return -ENOMEM;
- unit_write_drop_in_private_format(u, mode, name, "Group=%s\n", gg);
+ unit_write_drop_in_private_format(u, mode, name, "Group=%s", gg);
}
return 1;
@@ -880,7 +885,7 @@ int bus_exec_context_set_transient_property(
else if (free_and_strdup(&c->syslog_identifier, id) < 0)
return -ENOMEM;
- unit_write_drop_in_private_format(u, mode, name, "SyslogIdentifier=%s\n", id);
+ unit_write_drop_in_private_format(u, mode, name, "SyslogIdentifier=%s", id);
}
return 1;
@@ -896,7 +901,7 @@ int bus_exec_context_set_transient_property(
if (mode != UNIT_CHECK) {
c->syslog_priority = (c->syslog_priority & LOG_FACMASK) | level;
- unit_write_drop_in_private_format(u, mode, name, "SyslogLevel=%i\n", level);
+ unit_write_drop_in_private_format(u, mode, name, "SyslogLevel=%i", level);
}
return 1;
@@ -912,7 +917,7 @@ int bus_exec_context_set_transient_property(
if (mode != UNIT_CHECK) {
c->syslog_priority = (facility << 3) | LOG_PRI(c->syslog_priority);
- unit_write_drop_in_private_format(u, mode, name, "SyslogFacility=%i\n", facility);
+ unit_write_drop_in_private_format(u, mode, name, "SyslogFacility=%i", facility);
}
return 1;
@@ -928,7 +933,7 @@ int bus_exec_context_set_transient_property(
if (mode != UNIT_CHECK) {
c->nice = n;
- unit_write_drop_in_private_format(u, mode, name, "Nice=%i\n", n);
+ unit_write_drop_in_private_format(u, mode, name, "Nice=%i", n);
}
return 1;
@@ -953,7 +958,7 @@ int bus_exec_context_set_transient_property(
if (r < 0)
return r;
- unit_write_drop_in_private_format(u, mode, name, "%s=%s\n", name, s);
+ unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, s);
}
return 1;
@@ -1008,7 +1013,7 @@ int bus_exec_context_set_transient_property(
if (mode != UNIT_CHECK) {
c->std_input = p;
- unit_write_drop_in_private_format(u, mode, name, "StandardInput=%s\n", exec_input_to_string(p));
+ unit_write_drop_in_private_format(u, mode, name, "StandardInput=%s", exec_input_to_string(p));
}
return 1;
@@ -1029,7 +1034,7 @@ int bus_exec_context_set_transient_property(
if (mode != UNIT_CHECK) {
c->std_output = p;
- unit_write_drop_in_private_format(u, mode, name, "StandardOutput=%s\n", exec_output_to_string(p));
+ unit_write_drop_in_private_format(u, mode, name, "StandardOutput=%s", exec_output_to_string(p));
}
return 1;
@@ -1049,7 +1054,7 @@ int bus_exec_context_set_transient_property(
if (mode != UNIT_CHECK) {
c->std_error = p;
- unit_write_drop_in_private_format(u, mode, name, "StandardError=%s\n", exec_output_to_string(p));
+ unit_write_drop_in_private_format(u, mode, name, "StandardError=%s", exec_output_to_string(p));
}
return 1;
@@ -1057,7 +1062,7 @@ int bus_exec_context_set_transient_property(
} else if (STR_IN_SET(name,
"IgnoreSIGPIPE", "TTYVHangup", "TTYReset",
"PrivateTmp", "PrivateDevices", "PrivateNetwork",
- "NoNewPrivileges", "SyslogLevelPrefix")) {
+ "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute", "RestrictRealtime")) {
int b;
r = sd_bus_message_read(message, "b", &b);
@@ -1081,8 +1086,12 @@ int bus_exec_context_set_transient_property(
c->no_new_privileges = b;
else if (streq(name, "SyslogLevelPrefix"))
c->syslog_level_prefix = b;
+ else if (streq(name, "MemoryDenyWriteExecute"))
+ c->memory_deny_write_execute = b;
+ else if (streq(name, "RestrictRealtime"))
+ c->restrict_realtime = b;
- unit_write_drop_in_private_format(u, mode, name, "%s=%s\n", name, yes_no(b));
+ unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, yes_no(b));
}
return 1;
@@ -1100,7 +1109,7 @@ int bus_exec_context_set_transient_property(
else if (free_and_strdup(&c->utmp_id, id) < 0)
return -ENOMEM;
- unit_write_drop_in_private_format(u, mode, name, "UtmpIdentifier=%s\n", strempty(id));
+ unit_write_drop_in_private_format(u, mode, name, "UtmpIdentifier=%s", strempty(id));
}
return 1;
@@ -1120,7 +1129,7 @@ int bus_exec_context_set_transient_property(
if (mode != UNIT_CHECK) {
c->utmp_mode = m;
- unit_write_drop_in_private_format(u, mode, name, "UtmpMode=%s\n", exec_utmp_mode_to_string(m));
+ unit_write_drop_in_private_format(u, mode, name, "UtmpMode=%s", exec_utmp_mode_to_string(m));
}
return 1;
@@ -1138,7 +1147,7 @@ int bus_exec_context_set_transient_property(
else if (free_and_strdup(&c->pam_name, n) < 0)
return -ENOMEM;
- unit_write_drop_in_private_format(u, mode, name, "PAMName=%s\n", strempty(n));
+ unit_write_drop_in_private_format(u, mode, name, "PAMName=%s", strempty(n));
}
return 1;
@@ -1160,7 +1169,7 @@ int bus_exec_context_set_transient_property(
if (strv_length(l) == 0) {
c->environment = strv_free(c->environment);
- unit_write_drop_in_private_format(u, mode, name, "Environment=\n");
+ unit_write_drop_in_private_format(u, mode, name, "Environment=");
} else {
e = strv_env_merge(2, c->environment, l);
if (!e)
@@ -1173,7 +1182,7 @@ int bus_exec_context_set_transient_property(
if (!joined)
return -ENOMEM;
- unit_write_drop_in_private_format(u, mode, name, "Environment=%s\n", joined);
+ unit_write_drop_in_private_format(u, mode, name, "Environment=%s", joined);
}
}
@@ -1189,7 +1198,7 @@ int bus_exec_context_set_transient_property(
if (mode != UNIT_CHECK) {
c->timer_slack_nsec = n;
- unit_write_drop_in_private_format(u, mode, name, "TimerSlackNSec=" NSEC_FMT "\n", n);
+ unit_write_drop_in_private_format(u, mode, name, "TimerSlackNSec=" NSEC_FMT, n);
}
return 1;
@@ -1207,7 +1216,7 @@ int bus_exec_context_set_transient_property(
if (mode != UNIT_CHECK) {
c->oom_score_adjust = oa;
c->oom_score_adjust_set = true;
- unit_write_drop_in_private_format(u, mode, name, "OOMScoreAdjust=%i\n", oa);
+ unit_write_drop_in_private_format(u, mode, name, "OOMScoreAdjust=%i", oa);
}
return 1;
@@ -1229,7 +1238,7 @@ int bus_exec_context_set_transient_property(
return -ENOMEM;
STRV_FOREACH(i, c->environment_files)
- fprintf(f, "EnvironmentFile=%s\n", *i);
+ fprintf(f, "EnvironmentFile=%s", *i);
while ((r = sd_bus_message_enter_container(message, 'r', "sb")) > 0) {
const char *path;
@@ -1253,7 +1262,7 @@ int bus_exec_context_set_transient_property(
if (!buf)
return -ENOMEM;
- fprintf(f, "EnvironmentFile=%s\n", buf);
+ fprintf(f, "EnvironmentFile=%s", buf);
r = strv_consume(&l, buf);
if (r < 0)
@@ -1274,7 +1283,7 @@ int bus_exec_context_set_transient_property(
if (mode != UNIT_CHECK) {
if (strv_isempty(l)) {
c->environment_files = strv_free(c->environment_files);
- unit_write_drop_in_private(u, mode, name, "EnvironmentFile=\n");
+ unit_write_drop_in_private(u, mode, name, "EnvironmentFile=");
} else {
r = strv_extend_strv(&c->environment_files, l, true);
if (r < 0)
@@ -1300,7 +1309,7 @@ int bus_exec_context_set_transient_property(
if (mode != UNIT_CHECK) {
if (strv_isempty(l)) {
c->pass_environment = strv_free(c->pass_environment);
- unit_write_drop_in_private_format(u, mode, name, "PassEnvironment=\n");
+ unit_write_drop_in_private_format(u, mode, name, "PassEnvironment=");
} else {
_cleanup_free_ char *joined = NULL;
@@ -1312,14 +1321,14 @@ int bus_exec_context_set_transient_property(
if (!joined)
return -ENOMEM;
- unit_write_drop_in_private_format(u, mode, name, "PassEnvironment=%s\n", joined);
+ unit_write_drop_in_private_format(u, mode, name, "PassEnvironment=%s", joined);
}
}
return 1;
- } else if (STR_IN_SET(name, "ReadWriteDirectories", "ReadOnlyDirectories", "InaccessibleDirectories")) {
-
+ } else if (STR_IN_SET(name, "ReadWriteDirectories", "ReadOnlyDirectories", "InaccessibleDirectories",
+ "ReadWritePaths", "ReadOnlyPaths", "InaccessiblePaths")) {
_cleanup_strv_free_ char **l = NULL;
char ***dirs;
char **p;
@@ -1341,16 +1350,16 @@ int bus_exec_context_set_transient_property(
if (mode != UNIT_CHECK) {
_cleanup_free_ char *joined = NULL;
- if (streq(name, "ReadWriteDirectories"))
- dirs = &c->read_write_dirs;
- else if (streq(name, "ReadOnlyDirectories"))
- dirs = &c->read_only_dirs;
- else /* "InaccessibleDirectories" */
- dirs = &c->inaccessible_dirs;
+ if (STR_IN_SET(name, "ReadWriteDirectories", "ReadWritePaths"))
+ dirs = &c->read_write_paths;
+ else if (STR_IN_SET(name, "ReadOnlyDirectories", "ReadOnlyPaths"))
+ dirs = &c->read_only_paths;
+ else /* "InaccessiblePaths" */
+ dirs = &c->inaccessible_paths;
if (strv_length(l) == 0) {
*dirs = strv_free(*dirs);
- unit_write_drop_in_private_format(u, mode, name, "%s=\n", name);
+ unit_write_drop_in_private_format(u, mode, name, "%s=", name);
} else {
r = strv_extend_strv(dirs, l, true);
@@ -1361,7 +1370,7 @@ int bus_exec_context_set_transient_property(
if (!joined)
return -ENOMEM;
- unit_write_drop_in_private_format(u, mode, name, "%s=%s\n", name, joined);
+ unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, joined);
}
}
@@ -1389,7 +1398,7 @@ int bus_exec_context_set_transient_property(
if (mode != UNIT_CHECK) {
c->protect_system = ps;
- unit_write_drop_in_private_format(u, mode, name, "%s=%s\n", name, s);
+ unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, s);
}
return 1;
@@ -1415,7 +1424,7 @@ int bus_exec_context_set_transient_property(
if (mode != UNIT_CHECK) {
c->protect_home = ph;
- unit_write_drop_in_private_format(u, mode, name, "%s=%s\n", name, s);
+ unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, s);
}
return 1;
@@ -1438,7 +1447,7 @@ int bus_exec_context_set_transient_property(
if (strv_isempty(l)) {
c->runtime_directory = strv_free(c->runtime_directory);
- unit_write_drop_in_private_format(u, mode, name, "%s=\n", name);
+ unit_write_drop_in_private_format(u, mode, name, "%s=", name);
} else {
r = strv_extend_strv(&c->runtime_directory, l, true);
@@ -1449,7 +1458,7 @@ int bus_exec_context_set_transient_property(
if (!joined)
return -ENOMEM;
- unit_write_drop_in_private_format(u, mode, name, "%s=%s\n", name, joined);
+ unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, joined);
}
}
@@ -1468,7 +1477,7 @@ int bus_exec_context_set_transient_property(
else if (free_and_strdup(&c->selinux_context, s) < 0)
return -ENOMEM;
- unit_write_drop_in_private_format(u, mode, name, "%s=%s\n", name, strempty(s));
+ unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, strempty(s));
}
return 1;
@@ -1536,7 +1545,7 @@ int bus_exec_context_set_transient_property(
return -ENOMEM;
}
- unit_write_drop_in_private_format(u, mode, name, "%s=%s\n", name, f);
+ unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, f);
}
return 1;
diff --git a/src/grp-system/libcore/dbus-kill.c b/src/grp-system/libcore/dbus-kill.c
index ba7a3f3b72..44bc2db7e2 100644
--- a/src/grp-system/libcore/dbus-kill.c
+++ b/src/grp-system/libcore/dbus-kill.c
@@ -64,7 +64,7 @@ int bus_kill_context_set_transient_property(
if (mode != UNIT_CHECK) {
c->kill_mode = k;
- unit_write_drop_in_private_format(u, mode, name, "KillMode=%s\n", kill_mode_to_string(k));
+ unit_write_drop_in_private_format(u, mode, name, "KillMode=%s", kill_mode_to_string(k));
}
return 1;
@@ -82,7 +82,7 @@ int bus_kill_context_set_transient_property(
if (mode != UNIT_CHECK) {
c->kill_signal = sig;
- unit_write_drop_in_private_format(u, mode, name, "KillSignal=%s\n", signal_to_string(sig));
+ unit_write_drop_in_private_format(u, mode, name, "KillSignal=%s", signal_to_string(sig));
}
return 1;
@@ -97,7 +97,7 @@ int bus_kill_context_set_transient_property(
if (mode != UNIT_CHECK) {
c->send_sighup = b;
- unit_write_drop_in_private_format(u, mode, name, "SendSIGHUP=%s\n", yes_no(b));
+ unit_write_drop_in_private_format(u, mode, name, "SendSIGHUP=%s", yes_no(b));
}
return 1;
@@ -112,7 +112,7 @@ int bus_kill_context_set_transient_property(
if (mode != UNIT_CHECK) {
c->send_sigkill = b;
- unit_write_drop_in_private_format(u, mode, name, "SendSIGKILL=%s\n", yes_no(b));
+ unit_write_drop_in_private_format(u, mode, name, "SendSIGKILL=%s", yes_no(b));
}
return 1;
diff --git a/src/grp-system/libcore/dbus-manager.c b/src/grp-system/libcore/dbus-manager.c
index b2ca867987..c5684014d3 100644
--- a/src/grp-system/libcore/dbus-manager.c
+++ b/src/grp-system/libcore/dbus-manager.c
@@ -782,6 +782,7 @@ static int transient_unit_from_message(
return r;
/* Now load the missing bits of the unit we just created */
+ unit_add_to_load_queue(u);
manager_dispatch_load_queue(m);
*unit = u;
diff --git a/src/grp-system/libcore/dbus-scope.c b/src/grp-system/libcore/dbus-scope.c
index 9ecc01d275..d319597c10 100644
--- a/src/grp-system/libcore/dbus-scope.c
+++ b/src/grp-system/libcore/dbus-scope.c
@@ -148,7 +148,7 @@ static int bus_scope_set_transient_property(
if (r < 0)
return r;
- unit_write_drop_in_format(UNIT(s), mode, name, "[Scope]\nTimeoutStopSec="USEC_FMT"us\n", s->timeout_stop_usec);
+ unit_write_drop_in_private_format(UNIT(s), mode, name, "TimeoutStopSec="USEC_FMT"us", s->timeout_stop_usec);
} else {
r = sd_bus_message_skip(message, "t");
if (r < 0)
@@ -226,5 +226,5 @@ int bus_scope_send_request_stop(Scope *s) {
if (r < 0)
return r;
- return sd_bus_send_to(UNIT(s)->manager->api_bus, m, /* s->controller */ NULL, NULL);
+ return sd_bus_send_to(UNIT(s)->manager->api_bus, m, s->controller, NULL);
}
diff --git a/src/grp-system/libcore/dbus-service.c b/src/grp-system/libcore/dbus-service.c
index a666c9db95..a3524815bd 100644
--- a/src/grp-system/libcore/dbus-service.c
+++ b/src/grp-system/libcore/dbus-service.c
@@ -103,7 +103,7 @@ static int bus_service_set_transient_property(
if (mode != UNIT_CHECK) {
s->remain_after_exit = b;
- unit_write_drop_in_private_format(UNIT(s), mode, name, "RemainAfterExit=%s\n", yes_no(b));
+ unit_write_drop_in_private_format(UNIT(s), mode, name, "RemainAfterExit=%s", yes_no(b));
}
return 1;
@@ -122,7 +122,7 @@ static int bus_service_set_transient_property(
if (mode != UNIT_CHECK) {
s->type = k;
- unit_write_drop_in_private_format(UNIT(s), mode, name, "Type=%s\n", service_type_to_string(s->type));
+ unit_write_drop_in_private_format(UNIT(s), mode, name, "Type=%s", service_type_to_string(s->type));
}
return 1;
@@ -135,7 +135,7 @@ static int bus_service_set_transient_property(
if (mode != UNIT_CHECK) {
s->runtime_max_usec = u;
- unit_write_drop_in_private_format(UNIT(s), mode, name, "RuntimeMaxSec=" USEC_FMT "us\n", u);
+ unit_write_drop_in_private_format(UNIT(s), mode, name, "RuntimeMaxSec=" USEC_FMT "us", u);
}
return 1;
diff --git a/src/grp-system/libcore/dbus-timer.c b/src/grp-system/libcore/dbus-timer.c
index 02036de42f..ce454385f8 100644
--- a/src/grp-system/libcore/dbus-timer.c
+++ b/src/grp-system/libcore/dbus-timer.c
@@ -221,7 +221,7 @@ static int bus_timer_set_transient_property(
if (mode != UNIT_CHECK) {
char time[FORMAT_TIMESPAN_MAX];
- unit_write_drop_in_private_format(UNIT(t), mode, name, "%s=%s\n", name, format_timespan(time, sizeof(time), u, USEC_PER_MSEC));
+ unit_write_drop_in_private_format(UNIT(t), mode, name, "%s=%s", name, format_timespan(time, sizeof(time), u, USEC_PER_MSEC));
v = new0(TimerValue, 1);
if (!v)
@@ -250,7 +250,7 @@ static int bus_timer_set_transient_property(
if (r < 0)
return r;
- unit_write_drop_in_private_format(UNIT(t), mode, name, "%s=%s\n", name, str);
+ unit_write_drop_in_private_format(UNIT(t), mode, name, "%s=%s", name, str);
v = new0(TimerValue, 1);
if (!v) {
@@ -278,7 +278,7 @@ static int bus_timer_set_transient_property(
if (mode != UNIT_CHECK) {
t->accuracy_usec = u;
- unit_write_drop_in_private_format(UNIT(t), mode, name, "AccuracySec=" USEC_FMT "us\n", u);
+ unit_write_drop_in_private_format(UNIT(t), mode, name, "AccuracySec=" USEC_FMT "us", u);
}
return 1;
@@ -292,7 +292,7 @@ static int bus_timer_set_transient_property(
if (mode != UNIT_CHECK) {
t->random_usec = u;
- unit_write_drop_in_private_format(UNIT(t), mode, name, "RandomizedDelaySec=" USEC_FMT "us\n", u);
+ unit_write_drop_in_private_format(UNIT(t), mode, name, "RandomizedDelaySec=" USEC_FMT "us", u);
}
return 1;
@@ -306,7 +306,7 @@ static int bus_timer_set_transient_property(
if (mode != UNIT_CHECK) {
t->wake_system = b;
- unit_write_drop_in_private_format(UNIT(t), mode, name, "%s=%s\n", name, yes_no(b));
+ unit_write_drop_in_private_format(UNIT(t), mode, name, "%s=%s", name, yes_no(b));
}
return 1;
@@ -320,7 +320,7 @@ static int bus_timer_set_transient_property(
if (mode != UNIT_CHECK) {
t->remain_after_elapse = b;
- unit_write_drop_in_private_format(UNIT(t), mode, name, "%s=%s\n", name, yes_no(b));
+ unit_write_drop_in_private_format(UNIT(t), mode, name, "%s=%s", name, yes_no(b));
}
return 1;
diff --git a/src/grp-system/libcore/dbus-unit.c b/src/grp-system/libcore/dbus-unit.c
index 31fbb0d54d..7e2a63b434 100644
--- a/src/grp-system/libcore/dbus-unit.c
+++ b/src/grp-system/libcore/dbus-unit.c
@@ -1206,7 +1206,7 @@ static int bus_unit_set_transient_property(
if (r < 0)
return r;
- unit_write_drop_in_format(u, mode, name, "[Unit]\nDescription=%s\n", d);
+ unit_write_drop_in_format(u, mode, name, "[Unit]\nDescription=%s", d);
}
return 1;
@@ -1220,7 +1220,7 @@ static int bus_unit_set_transient_property(
if (mode != UNIT_CHECK) {
u->default_dependencies = b;
- unit_write_drop_in_format(u, mode, name, "[Unit]\nDefaultDependencies=%s\n", yes_no(b));
+ unit_write_drop_in_format(u, mode, name, "[Unit]\nDefaultDependencies=%s", yes_no(b));
}
return 1;
@@ -1258,7 +1258,7 @@ static int bus_unit_set_transient_property(
if (r < 0)
return r;
- unit_write_drop_in_private_format(u, mode, name, "Slice=%s\n", s);
+ unit_write_drop_in_private_format(u, mode, name, "Slice=%s", s);
}
return 1;
@@ -1306,7 +1306,7 @@ static int bus_unit_set_transient_property(
if (!label)
return -ENOMEM;
- unit_write_drop_in_format(u, mode, label, "[Unit]\n%s=%s\n", name, other);
+ unit_write_drop_in_format(u, mode, label, "[Unit]\n%s=%s", name, other);
}
}
diff --git a/src/grp-system/libcore/execute.c b/src/grp-system/libcore/execute.c
index b19473c6a5..b73577817f 100644
--- a/src/grp-system/libcore/execute.c
+++ b/src/grp-system/libcore/execute.c
@@ -25,6 +25,7 @@
#include <signal.h>
#include <string.h>
#include <sys/capability.h>
+#include <sys/mman.h>
#include <sys/personality.h>
#include <sys/prctl.h>
#include <sys/socket.h>
@@ -290,7 +291,15 @@ static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
return r;
}
-static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
+static int connect_logger_as(
+ Unit *unit,
+ const ExecContext *context,
+ ExecOutput output,
+ const char *ident,
+ int nfd,
+ uid_t uid,
+ gid_t gid) {
+
int fd, r;
assert(context);
@@ -311,7 +320,7 @@ static int connect_logger_as(const ExecContext *context, ExecOutput output, cons
return -errno;
}
- fd_inc_sndbuf(fd, SNDBUF_SIZE);
+ (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
dprintf(fd,
"%s\n"
@@ -322,18 +331,18 @@ static int connect_logger_as(const ExecContext *context, ExecOutput output, cons
"%i\n"
"%i\n",
context->syslog_identifier ? context->syslog_identifier : ident,
- unit_id,
+ unit->id,
context->syslog_priority,
!!context->syslog_level_prefix,
output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
is_terminal_output(output));
- if (fd != nfd) {
- r = dup2(fd, nfd) < 0 ? -errno : nfd;
- safe_close(fd);
- } else
- r = nfd;
+ if (fd == nfd)
+ return nfd;
+
+ r = dup2(fd, nfd) < 0 ? -errno : nfd;
+ safe_close(fd);
return r;
}
@@ -447,7 +456,10 @@ static int setup_output(
int fileno,
int socket_fd,
const char *ident,
- uid_t uid, gid_t gid) {
+ uid_t uid,
+ gid_t gid,
+ dev_t *journal_stream_dev,
+ ino_t *journal_stream_ino) {
ExecOutput o;
ExecInput i;
@@ -457,6 +469,8 @@ static int setup_output(
assert(context);
assert(params);
assert(ident);
+ assert(journal_stream_dev);
+ assert(journal_stream_ino);
if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
@@ -532,10 +546,21 @@ static int setup_output(
case EXEC_OUTPUT_KMSG_AND_CONSOLE:
case EXEC_OUTPUT_JOURNAL:
case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
- r = connect_logger_as(context, o, ident, unit->id, fileno, uid, gid);
+ r = connect_logger_as(unit, context, o, ident, fileno, uid, gid);
if (r < 0) {
log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
r = open_null_as(O_WRONLY, fileno);
+ } else {
+ struct stat st;
+
+ /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
+ * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
+ * services to detect whether they are connected to the journal or not. */
+
+ if (fstat(fileno, &st) >= 0) {
+ *journal_stream_dev = st.st_dev;
+ *journal_stream_ino = st.st_ino;
+ }
}
return r;
@@ -553,6 +578,10 @@ static int chown_terminal(int fd, uid_t uid) {
assert(fd >= 0);
+ /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
+ if (isatty(fd) < 1)
+ return 0;
+
/* This might fail. What matters are the results. */
(void) fchown(fd, uid, -1);
(void) fchmod(fd, TTY_MODE);
@@ -796,7 +825,7 @@ static int setup_pam(
const char *user,
uid_t uid,
const char *tty,
- char ***pam_env,
+ char ***env,
int fds[], unsigned n_fds) {
static const struct pam_conv conv = {
@@ -808,14 +837,14 @@ static int setup_pam(
pam_handle_t *handle = NULL;
sigset_t old_ss;
int pam_code = PAM_SUCCESS, r;
- char **e = NULL;
+ char **nv, **e = NULL;
bool close_session = false;
pid_t pam_pid = 0, parent_pid;
int flags = 0;
assert(name);
assert(user);
- assert(pam_env);
+ assert(env);
/* We set up PAM in the parent process, then fork. The child
* will then stay around until killed via PR_GET_PDEATHSIG or
@@ -843,6 +872,12 @@ static int setup_pam(
goto fail;
}
+ STRV_FOREACH(nv, *env) {
+ pam_code = pam_putenv(handle, *nv);
+ if (pam_code != PAM_SUCCESS)
+ goto fail;
+ }
+
pam_code = pam_acct_mgmt(handle, flags);
if (pam_code != PAM_SUCCESS)
goto fail;
@@ -963,8 +998,8 @@ static int setup_pam(
if (!barrier_place_and_sync(&barrier))
log_error("PAM initialization failed");
- *pam_env = e;
- e = NULL;
+ strv_free(*env);
+ *env = e;
return 0;
@@ -1192,6 +1227,115 @@ finish:
return r;
}
+static int apply_memory_deny_write_execute(const ExecContext *c) {
+ scmp_filter_ctx *seccomp;
+ int r;
+
+ assert(c);
+
+ seccomp = seccomp_init(SCMP_ACT_ALLOW);
+ if (!seccomp)
+ return -ENOMEM;
+
+ r = seccomp_rule_add(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(mmap),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE));
+ if (r < 0)
+ goto finish;
+
+ r = seccomp_rule_add(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(mprotect),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC));
+ if (r < 0)
+ goto finish;
+
+ r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+ if (r < 0)
+ goto finish;
+
+ r = seccomp_load(seccomp);
+
+finish:
+ seccomp_release(seccomp);
+ return r;
+}
+
+static int apply_restrict_realtime(const ExecContext *c) {
+ static const int permitted_policies[] = {
+ SCHED_OTHER,
+ SCHED_BATCH,
+ SCHED_IDLE,
+ };
+
+ scmp_filter_ctx *seccomp;
+ unsigned i;
+ int r, p, max_policy = 0;
+
+ assert(c);
+
+ seccomp = seccomp_init(SCMP_ACT_ALLOW);
+ if (!seccomp)
+ return -ENOMEM;
+
+ /* Determine the highest policy constant we want to allow */
+ for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
+ if (permitted_policies[i] > max_policy)
+ max_policy = permitted_policies[i];
+
+ /* Go through all policies with lower values than that, and block them -- unless they appear in the
+ * whitelist. */
+ for (p = 0; p < max_policy; p++) {
+ bool good = false;
+
+ /* Check if this is in the whitelist. */
+ for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
+ if (permitted_policies[i] == p) {
+ good = true;
+ break;
+ }
+
+ if (good)
+ continue;
+
+ /* Deny this policy */
+ r = seccomp_rule_add(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(sched_setscheduler),
+ 1,
+ SCMP_A1(SCMP_CMP_EQ, p));
+ if (r < 0)
+ goto finish;
+ }
+
+ /* Blacklist all other policies, i.e. the ones with higher values. Note that all comparisons are unsigned here,
+ * hence no need no check for < 0 values. */
+ r = seccomp_rule_add(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(sched_setscheduler),
+ 1,
+ SCMP_A1(SCMP_CMP_GT, max_policy));
+ if (r < 0)
+ goto finish;
+
+ r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+ if (r < 0)
+ goto finish;
+
+ r = seccomp_load(seccomp);
+
+finish:
+ seccomp_release(seccomp);
+ return r;
+}
+
#endif
static void do_idle_pipe_dance(int idle_pipe[4]) {
@@ -1230,6 +1374,8 @@ static int build_environment(
const char *home,
const char *username,
const char *shell,
+ dev_t journal_stream_dev,
+ ino_t journal_stream_ino,
char ***ret) {
_cleanup_strv_free_ char **our_env = NULL;
@@ -1239,7 +1385,7 @@ static int build_environment(
assert(c);
assert(ret);
- our_env = new0(char*, 11);
+ our_env = new0(char*, 12);
if (!our_env)
return -ENOMEM;
@@ -1311,8 +1457,15 @@ static int build_environment(
our_env[n_env++] = x;
}
+ if (journal_stream_dev != 0 && journal_stream_ino != 0) {
+ if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
+ return -ENOMEM;
+
+ our_env[n_env++] = x;
+ }
+
our_env[n_env++] = NULL;
- assert(n_env <= 11);
+ assert(n_env <= 12);
*ret = our_env;
our_env = NULL;
@@ -1356,9 +1509,9 @@ static bool exec_needs_mount_namespace(
assert(context);
assert(params);
- if (!strv_isempty(context->read_write_dirs) ||
- !strv_isempty(context->read_only_dirs) ||
- !strv_isempty(context->inaccessible_dirs))
+ if (!strv_isempty(context->read_write_paths) ||
+ !strv_isempty(context->read_only_paths) ||
+ !strv_isempty(context->inaccessible_paths))
return true;
if (context->mount_flags != 0)
@@ -1422,13 +1575,15 @@ static int exec_child(
char **files_env,
int *exit_status) {
- _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
+ _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
_cleanup_free_ char *mac_selinux_context_net = NULL;
const char *username = NULL, *home = NULL, *shell = NULL, *wd;
+ dev_t journal_stream_dev = 0;
+ ino_t journal_stream_ino = 0;
+ bool needs_mount_namespace;
uid_t uid = UID_INVALID;
gid_t gid = GID_INVALID;
int i, r;
- bool needs_mount_namespace;
assert(unit);
assert(command);
@@ -1528,13 +1683,13 @@ static int exec_child(
return r;
}
- r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, basename(command->path), uid, gid);
+ r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
if (r < 0) {
*exit_status = EXIT_STDOUT;
return r;
}
- r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, basename(command->path), uid, gid);
+ r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
if (r < 0) {
*exit_status = EXIT_STDERR;
return r;
@@ -1673,9 +1828,43 @@ static int exec_child(
}
}
+ r = build_environment(
+ context,
+ params,
+ n_fds,
+ home,
+ username,
+ shell,
+ journal_stream_dev,
+ journal_stream_ino,
+ &our_env);
+ if (r < 0) {
+ *exit_status = EXIT_MEMORY;
+ return r;
+ }
+
+ r = build_pass_environment(context, &pass_env);
+ if (r < 0) {
+ *exit_status = EXIT_MEMORY;
+ return r;
+ }
+
+ accum_env = strv_env_merge(5,
+ params->environment,
+ our_env,
+ pass_env,
+ context->environment,
+ files_env,
+ NULL);
+ if (!accum_env) {
+ *exit_status = EXIT_MEMORY;
+ return -ENOMEM;
+ }
+ accum_env = strv_env_clean(accum_env);
+
umask(context->umask);
- if (params->apply_permissions) {
+ if (params->apply_permissions && !command->privileged) {
r = enforce_groups(context, username, gid);
if (r < 0) {
*exit_status = EXIT_GROUP;
@@ -1709,7 +1898,7 @@ static int exec_child(
#endif
#ifdef HAVE_PAM
if (context->pam_name && username) {
- r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
+ r = setup_pam(context->pam_name, username, uid, context->tty_path, &accum_env, fds, n_fds);
if (r < 0) {
*exit_status = EXIT_PAM;
return r;
@@ -1746,9 +1935,9 @@ static int exec_child(
r = setup_namespace(
params->apply_chroot ? context->root_directory : NULL,
- context->read_write_dirs,
- context->read_only_dirs,
- context->inaccessible_dirs,
+ context->read_write_paths,
+ context->read_only_paths,
+ context->inaccessible_paths,
tmp,
var,
context->private_devices,
@@ -1800,7 +1989,7 @@ static int exec_child(
}
#ifdef HAVE_SELINUX
- if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
+ if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0 && !command->privileged) {
r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
if (r < 0) {
*exit_status = EXIT_SELINUX_CONTEXT;
@@ -1825,7 +2014,7 @@ static int exec_child(
return r;
}
- if (params->apply_permissions) {
+ if (params->apply_permissions && !command->privileged) {
bool use_address_families = context->address_families_whitelist ||
!set_isempty(context->address_families);
@@ -1835,10 +2024,20 @@ static int exec_child(
int secure_bits = context->secure_bits;
for (i = 0; i < _RLIMIT_MAX; i++) {
+
if (!context->rlimit[i])
continue;
- if (setrlimit_closest(i, context->rlimit[i]) < 0) {
+ r = setrlimit_closest(i, context->rlimit[i]);
+ if (r < 0) {
+ *exit_status = EXIT_LIMITS;
+ return r;
+ }
+ }
+
+ /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
+ if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
+ if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
*exit_status = EXIT_LIMITS;
return -errno;
}
@@ -1899,7 +2098,7 @@ static int exec_child(
}
if (context->no_new_privileges ||
- (!have_effective_cap(CAP_SYS_ADMIN) && (use_address_families || use_syscall_filter)))
+ (!have_effective_cap(CAP_SYS_ADMIN) && (use_address_families || context->memory_deny_write_execute || context->restrict_realtime || use_syscall_filter)))
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
*exit_status = EXIT_NO_NEW_PRIVILEGES;
return -errno;
@@ -1914,6 +2113,22 @@ static int exec_child(
}
}
+ if (context->memory_deny_write_execute) {
+ r = apply_memory_deny_write_execute(context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return r;
+ }
+ }
+
+ if (context->restrict_realtime) {
+ r = apply_restrict_realtime(context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return r;
+ }
+ }
+
if (use_syscall_filter) {
r = apply_seccomp(context);
if (r < 0) {
@@ -1948,39 +2163,12 @@ static int exec_child(
#endif
}
- r = build_environment(context, params, n_fds, home, username, shell, &our_env);
- if (r < 0) {
- *exit_status = EXIT_MEMORY;
- return r;
- }
-
- r = build_pass_environment(context, &pass_env);
- if (r < 0) {
- *exit_status = EXIT_MEMORY;
- return r;
- }
-
- final_env = strv_env_merge(6,
- params->environment,
- our_env,
- pass_env,
- context->environment,
- files_env,
- pam_env,
- NULL);
- if (!final_env) {
- *exit_status = EXIT_MEMORY;
- return -ENOMEM;
- }
-
- final_argv = replace_env_argv(argv, final_env);
+ final_argv = replace_env_argv(argv, accum_env);
if (!final_argv) {
*exit_status = EXIT_MEMORY;
return -ENOMEM;
}
- final_env = strv_env_clean(final_env);
-
if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
_cleanup_free_ char *line;
@@ -1996,7 +2184,7 @@ static int exec_child(
}
}
- execve(command->path, final_argv, final_env);
+ execve(command->path, final_argv, accum_env);
*exit_status = EXIT_EXEC;
return -errno;
}
@@ -2138,9 +2326,9 @@ void exec_context_done(ExecContext *c) {
c->pam_name = mfree(c->pam_name);
- c->read_only_dirs = strv_free(c->read_only_dirs);
- c->read_write_dirs = strv_free(c->read_write_dirs);
- c->inaccessible_dirs = strv_free(c->inaccessible_dirs);
+ c->read_only_paths = strv_free(c->read_only_paths);
+ c->read_write_paths = strv_free(c->read_write_paths);
+ c->inaccessible_paths = strv_free(c->inaccessible_paths);
if (c->cpuset)
CPU_FREE(c->cpuset);
@@ -2373,7 +2561,9 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
"%sPrivateDevices: %s\n"
"%sProtectHome: %s\n"
"%sProtectSystem: %s\n"
- "%sIgnoreSIGPIPE: %s\n",
+ "%sIgnoreSIGPIPE: %s\n"
+ "%sMemoryDenyWriteExecute: %s\n"
+ "%sRestrictRealtime: %s\n",
prefix, c->umask,
prefix, c->working_directory ? c->working_directory : "/",
prefix, c->root_directory ? c->root_directory : "/",
@@ -2383,7 +2573,9 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
prefix, yes_no(c->private_devices),
prefix, protect_home_to_string(c->protect_home),
prefix, protect_system_to_string(c->protect_system),
- prefix, yes_no(c->ignore_sigpipe));
+ prefix, yes_no(c->ignore_sigpipe),
+ prefix, yes_no(c->memory_deny_write_execute),
+ prefix, yes_no(c->restrict_realtime));
STRV_FOREACH(e, c->environment)
fprintf(f, "%sEnvironment: %s\n", prefix, *e);
@@ -2542,21 +2734,21 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
if (c->pam_name)
fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
- if (strv_length(c->read_write_dirs) > 0) {
- fprintf(f, "%sReadWriteDirs:", prefix);
- strv_fprintf(f, c->read_write_dirs);
+ if (strv_length(c->read_write_paths) > 0) {
+ fprintf(f, "%sReadWritePaths:", prefix);
+ strv_fprintf(f, c->read_write_paths);
fputs("\n", f);
}
- if (strv_length(c->read_only_dirs) > 0) {
- fprintf(f, "%sReadOnlyDirs:", prefix);
- strv_fprintf(f, c->read_only_dirs);
+ if (strv_length(c->read_only_paths) > 0) {
+ fprintf(f, "%sReadOnlyPaths:", prefix);
+ strv_fprintf(f, c->read_only_paths);
fputs("\n", f);
}
- if (strv_length(c->inaccessible_dirs) > 0) {
- fprintf(f, "%sInaccessibleDirs:", prefix);
- strv_fprintf(f, c->inaccessible_dirs);
+ if (strv_length(c->inaccessible_paths) > 0) {
+ fprintf(f, "%sInaccessiblePaths:", prefix);
+ strv_fprintf(f, c->inaccessible_paths);
fputs("\n", f);
}
@@ -2637,7 +2829,7 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
bool exec_context_maintains_privileges(ExecContext *c) {
assert(c);
- /* Returns true if the process forked off would run run under
+ /* Returns true if the process forked off would run under
* an unchanged UID or as root. */
if (!c->user)
@@ -2872,7 +3064,7 @@ int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
return r;
if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
- if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
+ if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0)
return -errno;
}
diff --git a/src/grp-system/libcore/execute.h b/src/grp-system/libcore/execute.h
index 18d6f0e4a3..cf5e7e4617 100644
--- a/src/grp-system/libcore/execute.h
+++ b/src/grp-system/libcore/execute.h
@@ -24,9 +24,10 @@
#include <stdio.h>
#include <sys/capability.h>
-#include "basic/fdset.h"
+#include "basic/cgroup-util.h"
#include "basic/list.h"
#include "basic/missing.h"
+#include "shared/fdset.h"
typedef struct ExecCommand ExecCommand;
typedef struct ExecContext ExecContext;
@@ -82,7 +83,8 @@ struct ExecCommand {
char **argv;
ExecStatus exec_status;
LIST_FIELDS(ExecCommand, command); /* useful for chaining commands */
- bool ignore;
+ bool ignore:1;
+ bool privileged:1;
};
struct ExecRuntime {
@@ -130,7 +132,7 @@ struct ExecContext {
bool ignore_sigpipe;
- /* Since resolving these names might might involve socket
+ /* Since resolving these names might involve socket
* connections and we don't want to deadlock ourselves these
* names are resolved on execution only and in the child
* process. */
@@ -152,7 +154,7 @@ struct ExecContext {
bool smack_process_label_ignore;
char *smack_process_label;
- char **read_write_dirs, **read_only_dirs, **inaccessible_dirs;
+ char **read_write_paths, **read_only_paths, **inaccessible_paths;
unsigned long mount_flags;
uint64_t capability_bounding_set;
@@ -193,6 +195,9 @@ struct ExecContext {
char **runtime_directory;
mode_t runtime_directory_mode;
+ bool memory_deny_write_execute;
+ bool restrict_realtime;
+
bool oom_score_adjust_set:1;
bool nice_set:1;
bool ioprio_set:1;
@@ -200,10 +205,6 @@ struct ExecContext {
bool no_new_privileges_set:1;
};
-#include "basic/cgroup-util.h"
-
-#include "cgroup.h"
-
struct ExecParameters {
char **argv;
char **environment;
@@ -234,6 +235,8 @@ struct ExecParameters {
int stderr_fd;
};
+#include "unit.h"
+
int exec_spawn(Unit *unit,
ExecCommand *command,
const ExecContext *context,
diff --git a/src/grp-system/libcore/killall.c b/src/grp-system/libcore/killall.c
index bc4d67ae21..27946f1f84 100644
--- a/src/grp-system/libcore/killall.c
+++ b/src/grp-system/libcore/killall.c
@@ -23,6 +23,7 @@
#include <unistd.h>
#include "basic/alloc-util.h"
+#include "basic/def.h"
#include "basic/fd-util.h"
#include "basic/formats-util.h"
#include "basic/parse-util.h"
@@ -34,8 +35,6 @@
#include "killall.h"
-#define TIMEOUT_USEC (10 * USEC_PER_SEC)
-
static bool ignore_proc(pid_t pid, bool warn_rootfs) {
_cleanup_fclose_ FILE *f = NULL;
char c;
@@ -81,7 +80,7 @@ static bool ignore_proc(pid_t pid, bool warn_rootfs) {
get_process_comm(pid, &comm);
if (r)
- log_notice("Process " PID_FMT " (%s) has been been marked to be excluded from killing. It is "
+ log_notice("Process " PID_FMT " (%s) has been marked to be excluded from killing. It is "
"running from the root file system, and thus likely to block re-mounting of the "
"root file system to read-only. Please consider moving it into an initrd file "
"system instead.", pid, strna(comm));
@@ -100,7 +99,7 @@ static void wait_for_children(Set *pids, sigset_t *mask) {
if (set_isempty(pids))
return;
- until = now(CLOCK_MONOTONIC) + TIMEOUT_USEC;
+ until = now(CLOCK_MONOTONIC) + DEFAULT_TIMEOUT_USEC;
for (;;) {
struct timespec ts;
int k;
diff --git a/src/grp-system/libcore/kmod-setup.c b/src/grp-system/libcore/kmod-setup.c
index a6fa6a32fb..c91c280e7d 100644
--- a/src/grp-system/libcore/kmod-setup.c
+++ b/src/grp-system/libcore/kmod-setup.c
@@ -65,9 +65,6 @@ int kmod_setup(void) {
/* this should never be a module */
{ "unix", "/proc/net/unix", true, true, NULL },
- /* IPC is needed before we bring up any other services */
- { "kdbus", "/sys/fs/kdbus", false, false, is_kdbus_wanted },
-
#ifdef HAVE_LIBIPTC
/* netfilter is needed by networkd, nspawn among others, and cannot be autoloaded */
{ "ip_tables", "/proc/net/ip_tables_names", false, false, NULL },
diff --git a/src/grp-system/libcore/load-fragment-gperf.gperf.m4 b/src/grp-system/libcore/load-fragment-gperf.gperf.m4
index c9f6e6acf7..ac4598f4c2 100644
--- a/src/grp-system/libcore/load-fragment-gperf.gperf.m4
+++ b/src/grp-system/libcore/load-fragment-gperf.gperf.m4
@@ -55,10 +55,14 @@ m4_ifdef(`HAVE_SECCOMP',
`$1.SystemCallFilter, config_parse_syscall_filter, 0, offsetof($1, exec_context)
$1.SystemCallArchitectures, config_parse_syscall_archs, 0, offsetof($1, exec_context.syscall_archs)
$1.SystemCallErrorNumber, config_parse_syscall_errno, 0, offsetof($1, exec_context)
+$1.MemoryDenyWriteExecute, config_parse_bool, 0, offsetof($1, exec_context.memory_deny_write_execute)
+$1.RestrictRealtime, config_parse_bool, 0, offsetof($1, exec_context.restrict_realtime)
$1.RestrictAddressFamilies, config_parse_address_families, 0, offsetof($1, exec_context)',
`$1.SystemCallFilter, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.SystemCallArchitectures, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.SystemCallErrorNumber, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+$1.MemoryDenyWriteExecute, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+$1.RestrictRealtime, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.RestrictAddressFamilies, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
$1.LimitCPU, config_parse_limit, RLIMIT_CPU, offsetof($1, exec_context.rlimit)
$1.LimitFSIZE, config_parse_limit, RLIMIT_FSIZE, offsetof($1, exec_context.rlimit)
@@ -76,9 +80,12 @@ $1.LimitMSGQUEUE, config_parse_limit, RLIMIT_MSGQ
$1.LimitNICE, config_parse_limit, RLIMIT_NICE, offsetof($1, exec_context.rlimit)
$1.LimitRTPRIO, config_parse_limit, RLIMIT_RTPRIO, offsetof($1, exec_context.rlimit)
$1.LimitRTTIME, config_parse_limit, RLIMIT_RTTIME, offsetof($1, exec_context.rlimit)
-$1.ReadWriteDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_write_dirs)
-$1.ReadOnlyDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_only_dirs)
-$1.InaccessibleDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_dirs)
+$1.ReadWriteDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_write_paths)
+$1.ReadOnlyDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_only_paths)
+$1.InaccessibleDirectories, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_paths)
+$1.ReadWritePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_write_paths)
+$1.ReadOnlyPaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.read_only_paths)
+$1.InaccessiblePaths, config_parse_namespace_path_strv, 0, offsetof($1, exec_context.inaccessible_paths)
$1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp)
$1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network)
$1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices)
@@ -117,6 +124,9 @@ $1.CPUShares, config_parse_cpu_shares, 0,
$1.StartupCPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context.startup_cpu_shares)
$1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context)
$1.MemoryAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.memory_accounting)
+$1.MemoryLow, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
+$1.MemoryHigh, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
+$1.MemoryMax, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.MemoryLimit, config_parse_memory_limit, 0, offsetof($1, cgroup_context)
$1.DeviceAllow, config_parse_device_allow, 0, offsetof($1, cgroup_context)
$1.DevicePolicy, config_parse_device_policy, 0, offsetof($1, cgroup_context.device_policy)
diff --git a/src/grp-system/libcore/load-fragment.c b/src/grp-system/libcore/load-fragment.c
index bb12b6cbfe..9a6ffc8bff 100644
--- a/src/grp-system/libcore/load-fragment.c
+++ b/src/grp-system/libcore/load-fragment.c
@@ -599,7 +599,7 @@ int config_parse_exec(
p = rvalue;
do {
_cleanup_free_ char *path = NULL, *firstword = NULL;
- bool separate_argv0 = false, ignore = false;
+ bool separate_argv0 = false, ignore = false, privileged = false;
_cleanup_free_ ExecCommand *nce = NULL;
_cleanup_strv_free_ char **n = NULL;
size_t nlen = 0, nbufsize = 0;
@@ -613,14 +613,18 @@ int config_parse_exec(
return 0;
f = firstword;
- for (i = 0; i < 2; i++) {
- /* We accept an absolute path as first argument, or
- * alternatively an absolute prefixed with @ to allow
- * overriding of argv[0]. */
+ for (i = 0; i < 3; i++) {
+ /* We accept an absolute path as first argument.
+ * If it's prefixed with - and the path doesn't exist,
+ * we ignore it instead of erroring out;
+ * if it's prefixed with @, we allow overriding of argv[0];
+ * and if it's prefixed with !, it will be run with full privileges */
if (*f == '-' && !ignore)
ignore = true;
else if (*f == '@' && !separate_argv0)
separate_argv0 = true;
+ else if (*f == '+' && !privileged)
+ privileged = true;
else
break;
f++;
@@ -718,6 +722,7 @@ int config_parse_exec(
nce->argv = n;
nce->path = path;
nce->ignore = ignore;
+ nce->privileged = privileged;
exec_command_append_list(e, nce);
@@ -2399,6 +2404,55 @@ int config_parse_documentation(const char *unit,
}
#ifdef HAVE_SECCOMP
+static int syscall_filter_parse_one(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ ExecContext *c,
+ bool invert,
+ const char *t,
+ bool warn) {
+ int r;
+
+ if (*t == '@') {
+ const SystemCallFilterSet *set;
+
+ for (set = syscall_filter_sets; set->set_name; set++)
+ if (streq(set->set_name, t)) {
+ const char *sys;
+
+ NULSTR_FOREACH(sys, set->value) {
+ r = syscall_filter_parse_one(unit, filename, line, c, invert, sys, false);
+ if (r < 0)
+ return r;
+ }
+ break;
+ }
+ } else {
+ int id;
+
+ id = seccomp_syscall_resolve_name(t);
+ if (id == __NR_SCMP_ERROR) {
+ if (warn)
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse system call, ignoring: %s", t);
+ return 0;
+ }
+
+ /* If we previously wanted to forbid a syscall and now
+ * we want to allow it, then remove it from the list
+ */
+ if (!invert == c->syscall_whitelist) {
+ r = set_put(c->syscall_filter, INT_TO_PTR(id + 1));
+ if (r == 0)
+ return 0;
+ if (r < 0)
+ return log_oom();
+ } else
+ set_remove(c->syscall_filter, INT_TO_PTR(id + 1));
+ }
+ return 0;
+}
+
int config_parse_syscall_filter(
const char *unit,
const char *filename,
@@ -2411,13 +2465,6 @@ int config_parse_syscall_filter(
void *data,
void *userdata) {
- static const char default_syscalls[] =
- "execve\0"
- "exit\0"
- "exit_group\0"
- "rt_sigreturn\0"
- "sigreturn\0";
-
ExecContext *c = data;
Unit *u = userdata;
bool invert = false;
@@ -2451,53 +2498,26 @@ int config_parse_syscall_filter(
/* Allow everything but the ones listed */
c->syscall_whitelist = false;
else {
- const char *i;
-
/* Allow nothing but the ones listed */
c->syscall_whitelist = true;
/* Accept default syscalls if we are on a whitelist */
- NULSTR_FOREACH(i, default_syscalls) {
- int id;
-
- id = seccomp_syscall_resolve_name(i);
- if (id < 0)
- continue;
-
- r = set_put(c->syscall_filter, INT_TO_PTR(id + 1));
- if (r == 0)
- continue;
- if (r < 0)
- return log_oom();
- }
+ r = syscall_filter_parse_one(unit, filename, line, c, false, "@default", false);
+ if (r < 0)
+ return r;
}
}
FOREACH_WORD_QUOTED(word, l, rvalue, state) {
_cleanup_free_ char *t = NULL;
- int id;
t = strndup(word, l);
if (!t)
return log_oom();
- id = seccomp_syscall_resolve_name(t);
- if (id < 0) {
- log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse system call, ignoring: %s", t);
- continue;
- }
-
- /* If we previously wanted to forbid a syscall and now
- * we want to allow it, then remove it from the list
- */
- if (!invert == c->syscall_whitelist) {
- r = set_put(c->syscall_filter, INT_TO_PTR(id + 1));
- if (r == 0)
- continue;
- if (r < 0)
- return log_oom();
- } else
- set_remove(c->syscall_filter, INT_TO_PTR(id + 1));
+ r = syscall_filter_parse_one(unit, filename, line, c, invert, t, true);
+ if (r < 0)
+ return r;
}
if (!isempty(state))
log_syntax(unit, LOG_ERR, filename, line, 0, "Trailing garbage, ignoring.");
@@ -2757,7 +2777,7 @@ int config_parse_cpu_quota(
void *userdata) {
CGroupContext *c = data;
- double percent;
+ int r;
assert(filename);
assert(lvalue);
@@ -2768,18 +2788,13 @@ int config_parse_cpu_quota(
return 0;
}
- if (!endswith(rvalue, "%")) {
- log_syntax(unit, LOG_ERR, filename, line, 0, "CPU quota '%s' not ending in '%%'. Ignoring.", rvalue);
+ r = parse_percent(rvalue);
+ if (r <= 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "CPU quota '%s' invalid. Ignoring.", rvalue);
return 0;
}
- if (sscanf(rvalue, "%lf%%", &percent) != 1 || percent <= 0) {
- log_syntax(unit, LOG_ERR, filename, line, 0, "CPU quota '%s' invalid. Ignoring.", rvalue);
- return 0;
- }
-
- c->cpu_quota_per_sec_usec = (usec_t) (percent * USEC_PER_SEC / 100);
-
+ c->cpu_quota_per_sec_usec = ((usec_t) r * USEC_PER_SEC) / 100U;
return 0;
}
@@ -2796,21 +2811,36 @@ int config_parse_memory_limit(
void *userdata) {
CGroupContext *c = data;
- uint64_t bytes;
+ uint64_t bytes = CGROUP_LIMIT_MAX;
int r;
- if (isempty(rvalue) || streq(rvalue, "infinity")) {
- c->memory_limit = (uint64_t) -1;
- return 0;
- }
+ if (!isempty(rvalue) && !streq(rvalue, "infinity")) {
- r = parse_size(rvalue, 1024, &bytes);
- if (r < 0 || bytes < 1) {
- log_syntax(unit, LOG_ERR, filename, line, r, "Memory limit '%s' invalid. Ignoring.", rvalue);
- return 0;
+ r = parse_percent(rvalue);
+ if (r < 0) {
+ r = parse_size(rvalue, 1024, &bytes);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Memory limit '%s' invalid. Ignoring.", rvalue);
+ return 0;
+ }
+ } else
+ bytes = physical_memory_scale(r, 100U);
+
+ if (bytes <= 0 || bytes >= UINT64_MAX) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Memory limit '%s' out of range. Ignoring.", rvalue);
+ return 0;
+ }
}
- c->memory_limit = bytes;
+ if (streq(lvalue, "MemoryLow"))
+ c->memory_low = bytes;
+ else if (streq(lvalue, "MemoryHigh"))
+ c->memory_high = bytes;
+ else if (streq(lvalue, "MemoryMax"))
+ c->memory_max = bytes;
+ else
+ c->memory_limit = bytes;
+
return 0;
}
@@ -2834,9 +2864,18 @@ int config_parse_tasks_max(
return 0;
}
- r = safe_atou64(rvalue, &u);
- if (r < 0 || u < 1) {
- log_syntax(unit, LOG_ERR, filename, line, r, "Maximum tasks value '%s' invalid. Ignoring.", rvalue);
+ r = parse_percent(rvalue);
+ if (r < 0) {
+ r = safe_atou64(rvalue, &u);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Maximum tasks value '%s' invalid. Ignoring.", rvalue);
+ return 0;
+ }
+ } else
+ u = system_tasks_max_scale(r, 100U);
+
+ if (u <= 0 || u >= UINT64_MAX) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range. Ignoring.", rvalue);
return 0;
}
@@ -3063,7 +3102,7 @@ int config_parse_io_limit(
return 0;
}
- if (streq("max", limit)) {
+ if (streq("infinity", limit)) {
num = CGROUP_LIMIT_MAX;
} else {
r = parse_size(limit, 1000, &num);
@@ -3567,7 +3606,7 @@ int config_parse_protect_home(
assert(data);
/* Our enum shall be a superset of booleans, hence first try
- * to parse as as boolean, and then as enum */
+ * to parse as boolean, and then as enum */
k = parse_boolean(rvalue);
if (k > 0)
@@ -3610,7 +3649,7 @@ int config_parse_protect_system(
assert(data);
/* Our enum shall be a superset of booleans, hence first try
- * to parse as as boolean, and then as enum */
+ * to parse as boolean, and then as enum */
k = parse_boolean(rvalue);
if (k > 0)
@@ -3726,7 +3765,7 @@ static int merge_by_names(Unit **u, Set *names, const char *id) {
/* If the symlink name we are looking at is unit template, then
we must search for instance of this template */
- if (unit_name_is_valid(k, UNIT_NAME_TEMPLATE)) {
+ if (unit_name_is_valid(k, UNIT_NAME_TEMPLATE) && (*u)->instance) {
_cleanup_free_ char *instance = NULL;
r = unit_name_replace_instance(k, (*u)->instance, &instance);
@@ -3808,7 +3847,15 @@ static int load_from_path(Unit *u, const char *path) {
if (r >= 0)
break;
filename = mfree(filename);
- if (r != -ENOENT)
+
+ /* ENOENT means that the file is missing or is a dangling symlink.
+ * ENOTDIR means that one of paths we expect to be is a directory
+ * is not a directory, we should just ignore that.
+ * EACCES means that the directory or file permissions are wrong.
+ */
+ if (r == -EACCES)
+ log_debug_errno(r, "Cannot access \"%s\": %m", filename);
+ else if (!IN_SET(r, -ENOENT, -ENOTDIR))
return r;
/* Empty the symlink names for the next run */
diff --git a/src/grp-system/libcore/machine-id-setup.c b/src/grp-system/libcore/machine-id-setup.c
index 28722b46f0..0b65583686 100644
--- a/src/grp-system/libcore/machine-id-setup.c
+++ b/src/grp-system/libcore/machine-id-setup.c
@@ -17,11 +17,8 @@
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
-#include <errno.h>
#include <fcntl.h>
#include <sched.h>
-#include <stdio.h>
-#include <string.h>
#include <sys/mount.h>
#include <unistd.h>
@@ -29,10 +26,7 @@
#include "basic/alloc-util.h"
#include "basic/fd-util.h"
-#include "basic/fileio.h"
#include "basic/fs-util.h"
-#include "basic/hexdecoct.h"
-#include "basic/io-util.h"
#include "basic/log.h"
#include "basic/macro.h"
#include "basic/mkdir.h"
@@ -44,104 +38,27 @@
#include "basic/umask-util.h"
#include "basic/util.h"
#include "basic/virt.h"
+#include "sd-id128/id128-util.h"
#include "machine-id-setup.h"
-static int shorten_uuid(char destination[34], const char source[36]) {
- unsigned i, j;
-
- assert(destination);
- assert(source);
-
- /* Converts a UUID into a machine ID, by lowercasing it and
- * removing dashes. Validates everything. */
-
- for (i = 0, j = 0; i < 36 && j < 32; i++) {
- int t;
-
- t = unhexchar(source[i]);
- if (t < 0)
- continue;
-
- destination[j++] = hexchar(t);
- }
-
- if (i != 36 || j != 32)
- return -EINVAL;
-
- destination[32] = '\n';
- destination[33] = 0;
- return 0;
-}
-
-static int read_machine_id(int fd, char id[34]) {
- char id_to_validate[34];
- int r;
-
- assert(fd >= 0);
- assert(id);
-
- /* Reads a machine ID from a file, validates it, and returns
- * it. The returned ID ends in a newline. */
-
- r = loop_read_exact(fd, id_to_validate, 33, false);
- if (r < 0)
- return r;
-
- if (id_to_validate[32] != '\n')
- return -EINVAL;
-
- id_to_validate[32] = 0;
-
- if (!id128_is_valid(id_to_validate))
- return -EINVAL;
-
- memcpy(id, id_to_validate, 32);
- id[32] = '\n';
- id[33] = 0;
- return 0;
-}
-
-static int write_machine_id(int fd, const char id[34]) {
- int r;
-
- assert(fd >= 0);
- assert(id);
-
- if (lseek(fd, 0, SEEK_SET) < 0)
- return -errno;
-
- r = loop_write(fd, id, 33, false);
- if (r < 0)
- return r;
-
- if (fsync(fd) < 0)
- return -errno;
-
- return 0;
-}
-
-static int generate_machine_id(char id[34], const char *root) {
- int fd, r;
- unsigned char *p;
- sd_id128_t buf;
- char *q;
+static int generate_machine_id(const char *root, sd_id128_t *ret) {
const char *dbus_machine_id;
+ _cleanup_close_ int fd = -1;
+ int r;
- assert(id);
-
- dbus_machine_id = prefix_roota(root, "/var/lib/dbus/machine-id");
+ assert(ret);
/* First, try reading the D-Bus machine id, unless it is a symlink */
+ dbus_machine_id = prefix_roota(root, "/var/lib/dbus/machine-id");
fd = open(dbus_machine_id, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
if (fd >= 0) {
- r = read_machine_id(fd, id);
- safe_close(fd);
-
- if (r >= 0) {
+ if (id128_read_fd(fd, ID128_PLAIN, ret) >= 0) {
log_info("Initializing machine ID from D-Bus machine ID.");
return 0;
}
+
+ fd = safe_close(fd);
}
if (isempty(root)) {
@@ -152,13 +69,10 @@ static int generate_machine_id(char id[34], const char *root) {
if (detect_container() > 0) {
_cleanup_free_ char *e = NULL;
- r = getenv_for_pid(1, "container_uuid", &e);
- if (r > 0) {
- r = shorten_uuid(id, e);
- if (r >= 0) {
- log_info("Initializing machine ID from container UUID.");
- return 0;
- }
+ if (getenv_for_pid(1, "container_uuid", &e) > 0 &&
+ sd_id128_from_string(e, ret) >= 0) {
+ log_info("Initializing machine ID from container UUID.");
+ return 0;
}
} else if (detect_vm() == VIRTUALIZATION_KVM) {
@@ -167,51 +81,29 @@ static int generate_machine_id(char id[34], const char *root) {
* running in qemu/kvm and a machine ID was passed in
* via -uuid on the qemu/kvm command line */
- char uuid[36];
-
- fd = open("/sys/class/dmi/id/product_uuid", O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
- if (fd >= 0) {
- r = loop_read_exact(fd, uuid, 36, false);
- safe_close(fd);
-
- if (r >= 0) {
- r = shorten_uuid(id, uuid);
- if (r >= 0) {
- log_info("Initializing machine ID from KVM UUID.");
- return 0;
- }
- }
+ if (id128_read("/sys/class/dmi/id/product_uuid", ID128_UUID, ret) >= 0) {
+ log_info("Initializing machine ID from KVM UUID.");
+ return 0;
}
}
}
/* If that didn't work, generate a random machine id */
- r = sd_id128_randomize(&buf);
+ r = sd_id128_randomize(ret);
if (r < 0)
- return log_error_errno(r, "Failed to open /dev/urandom: %m");
-
- for (p = buf.bytes, q = id; p < buf.bytes + sizeof(buf); p++, q += 2) {
- q[0] = hexchar(*p >> 4);
- q[1] = hexchar(*p & 15);
- }
-
- id[32] = '\n';
- id[33] = 0;
+ return log_error_errno(r, "Failed to generate randomized : %m");
log_info("Initializing machine ID from random generator.");
-
return 0;
}
-int machine_id_setup(const char *root, sd_id128_t machine_id) {
+int machine_id_setup(const char *root, sd_id128_t machine_id, sd_id128_t *ret) {
const char *etc_machine_id, *run_machine_id;
_cleanup_close_ int fd = -1;
- bool writable = true;
- char id[34]; /* 32 + \n + \0 */
+ bool writable;
int r;
etc_machine_id = prefix_roota(root, "/etc/machine-id");
- run_machine_id = prefix_roota(root, "/run/machine-id");
RUN_WITH_UMASK(0000) {
/* We create this 0444, to indicate that this isn't really
@@ -219,7 +111,7 @@ int machine_id_setup(const char *root, sd_id128_t machine_id) {
* will be owned by root it doesn't matter much, but maybe
* people look. */
- mkdir_parents(etc_machine_id, 0755);
+ (void) mkdir_parents(etc_machine_id, 0755);
fd = open(etc_machine_id, O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY, 0444);
if (fd < 0) {
int old_errno = errno;
@@ -240,41 +132,41 @@ int machine_id_setup(const char *root, sd_id128_t machine_id) {
}
writable = false;
- }
+ } else
+ writable = true;
}
- /* A machine id argument overrides all other machined-ids */
- if (!sd_id128_is_null(machine_id)) {
- sd_id128_to_string(machine_id, id);
- id[32] = '\n';
- id[33] = 0;
- } else {
- if (read_machine_id(fd, id) >= 0)
- return 0;
+ /* A we got a valid machine ID argument, that's what counts */
+ if (sd_id128_is_null(machine_id)) {
- /* Hmm, so, the id currently stored is not useful, then let's
- * generate one */
+ /* Try to read any existing machine ID */
+ if (id128_read_fd(fd, ID128_PLAIN, ret) >= 0)
+ return 0;
- r = generate_machine_id(id, root);
+ /* Hmm, so, the id currently stored is not useful, then let's generate one */
+ r = generate_machine_id(root, &machine_id);
if (r < 0)
return r;
+
+ if (lseek(fd, 0, SEEK_SET) == (off_t) -1)
+ return log_error_errno(errno, "Failed to seek: %m");
}
if (writable)
- if (write_machine_id(fd, id) >= 0)
- return 0;
+ if (id128_write_fd(fd, ID128_PLAIN, machine_id, true) >= 0)
+ goto finish;
fd = safe_close(fd);
- /* Hmm, we couldn't write it? So let's write it to
- * /run/machine-id as a replacement */
+ /* Hmm, we couldn't write it? So let's write it to /run/machine-id as a replacement */
- RUN_WITH_UMASK(0022) {
- r = write_string_file(run_machine_id, id, WRITE_STRING_FILE_CREATE);
- if (r < 0) {
- (void) unlink(run_machine_id);
- return log_error_errno(r, "Cannot write %s: %m", run_machine_id);
- }
+ run_machine_id = prefix_roota(root, "/run/machine-id");
+
+ RUN_WITH_UMASK(0022)
+ r = id128_write(run_machine_id, ID128_PLAIN, machine_id, false);
+ if (r < 0) {
+ (void) unlink(run_machine_id);
+ return log_error_errno(r, "Cannot write %s: %m", run_machine_id);
}
/* And now, let's mount it over */
@@ -287,7 +179,11 @@ int machine_id_setup(const char *root, sd_id128_t machine_id) {
/* Mark the mount read-only */
if (mount(NULL, etc_machine_id, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, NULL) < 0)
- log_warning_errno(errno, "Failed to make transient %s read-only: %m", etc_machine_id);
+ log_warning_errno(errno, "Failed to make transient %s read-only, ignoring: %m", etc_machine_id);
+
+finish:
+ if (ret)
+ *ret = machine_id;
return 0;
}
@@ -295,16 +191,20 @@ int machine_id_setup(const char *root, sd_id128_t machine_id) {
int machine_id_commit(const char *root) {
_cleanup_close_ int fd = -1, initial_mntns_fd = -1;
const char *etc_machine_id;
- char id[34]; /* 32 + \n + \0 */
+ sd_id128_t id;
int r;
+ /* Replaces a tmpfs bind mount of /etc/machine-id by a proper file, atomically. For this, the umount is removed
+ * in a mount namespace, a new file is created at the right place. Afterwards the mount is also removed in the
+ * original mount namespace, thus revealing the file that was just created. */
+
etc_machine_id = prefix_roota(root, "/etc/machine-id");
r = path_is_mount_point(etc_machine_id, 0);
if (r < 0)
return log_error_errno(r, "Failed to determine whether %s is a mount point: %m", etc_machine_id);
if (r == 0) {
- log_debug("%s is is not a mount point. Nothing to do.", etc_machine_id);
+ log_debug("%s is not a mount point. Nothing to do.", etc_machine_id);
return 0;
}
@@ -313,10 +213,6 @@ int machine_id_commit(const char *root) {
if (fd < 0)
return log_error_errno(errno, "Cannot open %s: %m", etc_machine_id);
- r = read_machine_id(fd, id);
- if (r < 0)
- return log_error_errno(r, "We didn't find a valid machine ID in %s.", etc_machine_id);
-
r = fd_is_temporary_fs(fd);
if (r < 0)
return log_error_errno(r, "Failed to determine whether %s is on a temporary file system: %m", etc_machine_id);
@@ -325,6 +221,10 @@ int machine_id_commit(const char *root) {
return -EROFS;
}
+ r = id128_read_fd(fd, ID128_PLAIN, &id);
+ if (r < 0)
+ return log_error_errno(r, "We didn't find a valid machine ID in %s.", etc_machine_id);
+
fd = safe_close(fd);
/* Store current mount namespace */
@@ -343,15 +243,9 @@ int machine_id_commit(const char *root) {
return log_error_errno(errno, "Failed to unmount transient %s file in our private namespace: %m", etc_machine_id);
/* Update a persistent version of etc_machine_id */
- fd = open(etc_machine_id, O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY, 0444);
- if (fd < 0)
- return log_error_errno(errno, "Cannot open for writing %s. This is mandatory to get a persistent machine-id: %m", etc_machine_id);
-
- r = write_machine_id(fd, id);
+ r = id128_write(etc_machine_id, ID128_PLAIN, id, true);
if (r < 0)
- return log_error_errno(r, "Cannot write %s: %m", etc_machine_id);
-
- fd = safe_close(fd);
+ return log_error_errno(r, "Cannot write %s. This is mandatory to get a persistent machine ID: %m", etc_machine_id);
/* Return to initial namespace and proceed a lazy tmpfs unmount */
r = namespace_enter(-1, initial_mntns_fd, -1, -1, -1);
diff --git a/src/grp-system/libcore/machine-id-setup.h b/src/grp-system/libcore/machine-id-setup.h
index c5dd8d7790..88830ecc42 100644
--- a/src/grp-system/libcore/machine-id-setup.h
+++ b/src/grp-system/libcore/machine-id-setup.h
@@ -22,4 +22,4 @@
#include <systemd/sd-id128.h>
int machine_id_commit(const char *root);
-int machine_id_setup(const char *root, sd_id128_t machine_id);
+int machine_id_setup(const char *root, sd_id128_t requested, sd_id128_t *ret);
diff --git a/src/grp-system/libcore/manager.c b/src/grp-system/libcore/manager.c
index 29dbf092d7..26395bc2b3 100644
--- a/src/grp-system/libcore/manager.c
+++ b/src/grp-system/libcore/manager.c
@@ -137,23 +137,28 @@ static void draw_cylon(char buffer[], size_t buflen, unsigned width, unsigned po
if (pos > 1) {
if (pos > 2)
p = mempset(p, ' ', pos-2);
- p = stpcpy(p, ANSI_RED);
+ if (log_get_show_color())
+ p = stpcpy(p, ANSI_RED);
*p++ = '*';
}
if (pos > 0 && pos <= width) {
- p = stpcpy(p, ANSI_HIGHLIGHT_RED);
+ if (log_get_show_color())
+ p = stpcpy(p, ANSI_HIGHLIGHT_RED);
*p++ = '*';
}
- p = stpcpy(p, ANSI_NORMAL);
+ if (log_get_show_color())
+ p = stpcpy(p, ANSI_NORMAL);
if (pos < width) {
- p = stpcpy(p, ANSI_RED);
+ if (log_get_show_color())
+ p = stpcpy(p, ANSI_RED);
*p++ = '*';
if (pos < width-1)
p = mempset(p, ' ', width-1-pos);
- strcpy(p, ANSI_NORMAL);
+ if (log_get_show_color())
+ strcpy(p, ANSI_NORMAL);
}
}
@@ -566,7 +571,7 @@ int manager_new(UnitFileScope scope, bool test_run, Manager **_m) {
m->exit_code = _MANAGER_EXIT_CODE_INVALID;
m->default_timer_accuracy_usec = USEC_PER_MINUTE;
m->default_tasks_accounting = true;
- m->default_tasks_max = UINT64_C(512);
+ m->default_tasks_max = UINT64_MAX;
#ifdef ENABLE_EFI
if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0)
@@ -810,28 +815,6 @@ static int manager_setup_cgroups_agent(Manager *m) {
return 0;
}
-static int manager_setup_kdbus(Manager *m) {
- _cleanup_free_ char *p = NULL;
-
- assert(m);
-
- if (m->test_run || m->kdbus_fd >= 0)
- return 0;
- if (!is_kdbus_available())
- return -ESOCKTNOSUPPORT;
-
- m->kdbus_fd = bus_kernel_create_bus(
- MANAGER_IS_SYSTEM(m) ? "system" : "user",
- MANAGER_IS_SYSTEM(m), &p);
-
- if (m->kdbus_fd < 0)
- return log_debug_errno(m->kdbus_fd, "Failed to set up kdbus: %m");
-
- log_debug("Successfully set up kdbus on %s", p);
-
- return 0;
-}
-
static int manager_connect_bus(Manager *m, bool reexecuting) {
bool try_bus_connect;
@@ -873,6 +856,19 @@ enum {
_GC_OFFSET_MAX
};
+static void unit_gc_mark_good(Unit *u, unsigned gc_marker)
+{
+ Iterator i;
+ Unit *other;
+
+ u->gc_marker = gc_marker + GC_OFFSET_GOOD;
+
+ /* Recursively mark referenced units as GOOD as well */
+ SET_FOREACH(other, u->dependencies[UNIT_REFERENCES], i)
+ if (other->gc_marker == gc_marker + GC_OFFSET_UNSURE)
+ unit_gc_mark_good(other, gc_marker);
+}
+
static void unit_gc_sweep(Unit *u, unsigned gc_marker) {
Iterator i;
Unit *other;
@@ -882,6 +878,7 @@ static void unit_gc_sweep(Unit *u, unsigned gc_marker) {
if (u->gc_marker == gc_marker + GC_OFFSET_GOOD ||
u->gc_marker == gc_marker + GC_OFFSET_BAD ||
+ u->gc_marker == gc_marker + GC_OFFSET_UNSURE ||
u->gc_marker == gc_marker + GC_OFFSET_IN_PATH)
return;
@@ -922,7 +919,7 @@ bad:
return;
good:
- u->gc_marker = gc_marker + GC_OFFSET_GOOD;
+ unit_gc_mark_good(u, gc_marker);
}
static unsigned manager_dispatch_gc_queue(Manager *m) {
@@ -1226,7 +1223,6 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
/* We might have deserialized the kdbus control fd, but if we
* didn't, then let's create the bus now. */
- manager_setup_kdbus(m);
manager_connect_bus(m, !!serialization);
bus_track_coldplug(m, &m->subscribed, &m->deserialized_subscribed);
@@ -1611,9 +1607,9 @@ static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, const
}
static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+
_cleanup_fdset_free_ FDSet *fds = NULL;
Manager *m = userdata;
-
char buf[NOTIFY_BUFFER_MAX+1];
struct iovec iovec = {
.iov_base = buf,
@@ -1721,16 +1717,28 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
}
static void invoke_sigchld_event(Manager *m, Unit *u, const siginfo_t *si) {
+ uint64_t iteration;
+
assert(m);
assert(u);
assert(si);
+ sd_event_get_iteration(m->event, &iteration);
+
log_unit_debug(u, "Child "PID_FMT" belongs to %s", si->si_pid, u->id);
unit_unwatch_pid(u, si->si_pid);
- if (UNIT_VTABLE(u)->sigchld_event)
- UNIT_VTABLE(u)->sigchld_event(u, si->si_pid, si->si_code, si->si_status);
+ if (UNIT_VTABLE(u)->sigchld_event) {
+ if (set_size(u->pids) <= 1 ||
+ iteration != u->sigchldgen ||
+ unit_main_pid(u) == si->si_pid ||
+ unit_control_pid(u) == si->si_pid) {
+ UNIT_VTABLE(u)->sigchld_event(u, si->si_pid, si->si_code, si->si_status);
+ u->sigchldgen = iteration;
+ } else
+ log_debug("%s already issued a sigchld this iteration %" PRIu64 ", skipping. Pids still being watched %d", u->id, iteration, set_size(u->pids));
+ }
}
static int manager_dispatch_sigchld(Manager *m) {
diff --git a/src/grp-system/libcore/manager.h b/src/grp-system/libcore/manager.h
index fedf39ddcd..252919c27f 100644
--- a/src/grp-system/libcore/manager.h
+++ b/src/grp-system/libcore/manager.h
@@ -27,10 +27,10 @@
#include <systemd/sd-event.h>
#include "basic/cgroup-util.h"
-#include "basic/fdset.h"
#include "basic/hashmap.h"
#include "basic/list.h"
#include "basic/ratelimit.h"
+#include "shared/fdset.h"
/* Enforce upper limit how many names we allow */
#define MANAGER_MAX_NAMES 131072 /* 128K */
diff --git a/src/grp-system/libcore/mount-setup.c b/src/grp-system/libcore/mount-setup.c
index e90f0e918a..0de1c63b3e 100644
--- a/src/grp-system/libcore/mount-setup.c
+++ b/src/grp-system/libcore/mount-setup.c
@@ -25,6 +25,7 @@
#include "basic/alloc-util.h"
#include "basic/cgroup-util.h"
+#include "basic/fs-util.h"
#include "basic/label.h"
#include "basic/log.h"
#include "basic/macro.h"
@@ -109,8 +110,6 @@ static const MountPoint mount_table[] = {
{ "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
is_efi_boot, MNT_NONE },
#endif
- { "kdbusfs", "/sys/fs/kdbus", "kdbusfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
- is_kdbus_wanted, MNT_IN_CONTAINER },
};
/* These are API file systems that might be mounted by other software,
@@ -406,9 +405,16 @@ int mount_setup(bool loaded_policy) {
* really needs to stay for good, otherwise software that
* copied sd-daemon.c into their sources will misdetect
* systemd. */
- mkdir_label("/run/systemd", 0755);
- mkdir_label("/run/systemd/system", 0755);
- mkdir_label("/run/systemd/inaccessible", 0000);
+ (void) mkdir_label("/run/systemd", 0755);
+ (void) mkdir_label("/run/systemd/system", 0755);
+ (void) mkdir_label("/run/systemd/inaccessible", 0000);
+ /* Set up inaccessible items */
+ (void) mknod("/run/systemd/inaccessible/reg", S_IFREG | 0000, 0);
+ (void) mkdir_label("/run/systemd/inaccessible/dir", 0000);
+ (void) mknod("/run/systemd/inaccessible/chr", S_IFCHR | 0000, makedev(0, 0));
+ (void) mknod("/run/systemd/inaccessible/blk", S_IFBLK | 0000, makedev(0, 0));
+ (void) mkfifo("/run/systemd/inaccessible/fifo", 0000);
+ (void) mknod("/run/systemd/inaccessible/sock", S_IFSOCK | 0000, 0);
return 0;
}
diff --git a/src/grp-system/libcore/mount.c b/src/grp-system/libcore/mount.c
index d4af6c65f6..bc5f29692d 100644
--- a/src/grp-system/libcore/mount.c
+++ b/src/grp-system/libcore/mount.c
@@ -1707,6 +1707,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
/* This has just been unmounted by
* somebody else, follow the state
* change. */
+ mount->result = MOUNT_SUCCESS; /* make sure we forget any earlier umount failures */
mount_enter_dead(mount, MOUNT_SUCCESS);
break;
diff --git a/src/grp-system/libcore/namespace.c b/src/grp-system/libcore/namespace.c
index db60336a60..f76a0a7fbf 100644
--- a/src/grp-system/libcore/namespace.c
+++ b/src/grp-system/libcore/namespace.c
@@ -280,6 +280,7 @@ static int apply_mount(
const char *what;
int r;
+ struct stat target;
assert(m);
@@ -289,12 +290,21 @@ static int apply_mount(
/* First, get rid of everything that is below if there
* is anything... Then, overmount it with an
- * inaccessible directory. */
+ * inaccessible path. */
umount_recursive(m->path, 0);
- what = "/run/systemd/inaccessible";
- break;
+ if (lstat(m->path, &target) < 0) {
+ if (m->ignore && errno == ENOENT)
+ return 0;
+ return -errno;
+ }
+ what = mode_to_inaccessible_node(target.st_mode);
+ if (!what) {
+ log_debug("File type not supported for inaccessible mounts. Note that symlinks are not allowed");
+ return -ELOOP;
+ }
+ break;
case READONLY:
case READWRITE:
/* Nothing to mount here, we just later toggle the
@@ -319,12 +329,14 @@ static int apply_mount(
assert(what);
r = mount(what, m->path, NULL, MS_BIND|MS_REC, NULL);
- if (r >= 0)
+ if (r >= 0) {
log_debug("Successfully mounted %s to %s", what, m->path);
- else if (m->ignore && errno == ENOENT)
- return 0;
-
- return r;
+ return r;
+ } else {
+ if (m->ignore && errno == ENOENT)
+ return 0;
+ return log_debug_errno(errno, "Failed to mount %s to %s: %m", what, m->path);
+ }
}
static int make_read_only(BindMount *m) {
@@ -337,7 +349,8 @@ static int make_read_only(BindMount *m) {
else if (IN_SET(m->mode, READWRITE, PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV)) {
r = bind_remount_recursive(m->path, false);
if (r == 0 && m->mode == PRIVATE_DEV) /* can be readonly but the submounts can't*/
- r = mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL);
+ if (mount(NULL, m->path, NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0)
+ r = -errno;
} else
r = 0;
@@ -349,9 +362,9 @@ static int make_read_only(BindMount *m) {
int setup_namespace(
const char* root_directory,
- char** read_write_dirs,
- char** read_only_dirs,
- char** inaccessible_dirs,
+ char** read_write_paths,
+ char** read_only_paths,
+ char** inaccessible_paths,
const char* tmp_dir,
const char* var_tmp_dir,
bool private_dev,
@@ -370,9 +383,9 @@ int setup_namespace(
return -errno;
n = !!tmp_dir + !!var_tmp_dir +
- strv_length(read_write_dirs) +
- strv_length(read_only_dirs) +
- strv_length(inaccessible_dirs) +
+ strv_length(read_write_paths) +
+ strv_length(read_only_paths) +
+ strv_length(inaccessible_paths) +
private_dev +
(protect_home != PROTECT_HOME_NO ? 3 : 0) +
(protect_system != PROTECT_SYSTEM_NO ? 2 : 0) +
@@ -380,15 +393,15 @@ int setup_namespace(
if (n > 0) {
m = mounts = (BindMount *) alloca0(n * sizeof(BindMount));
- r = append_mounts(&m, read_write_dirs, READWRITE);
+ r = append_mounts(&m, read_write_paths, READWRITE);
if (r < 0)
return r;
- r = append_mounts(&m, read_only_dirs, READONLY);
+ r = append_mounts(&m, read_only_paths, READONLY);
if (r < 0)
return r;
- r = append_mounts(&m, inaccessible_dirs, INACCESSIBLE);
+ r = append_mounts(&m, inaccessible_paths, INACCESSIBLE);
if (r < 0)
return r;
@@ -631,7 +644,7 @@ int setup_netns(int netns_storage_socket[2]) {
}
fail:
- lockf(netns_storage_socket[0], F_ULOCK, 0);
+ (void) lockf(netns_storage_socket[0], F_ULOCK, 0);
return r;
}
diff --git a/src/grp-system/libcore/namespace.h b/src/grp-system/libcore/namespace.h
index 03097327dd..1ae206efd1 100644
--- a/src/grp-system/libcore/namespace.h
+++ b/src/grp-system/libcore/namespace.h
@@ -40,9 +40,9 @@ typedef enum ProtectSystem {
} ProtectSystem;
int setup_namespace(const char *chroot,
- char **read_write_dirs,
- char **read_only_dirs,
- char **inaccessible_dirs,
+ char **read_write_paths,
+ char **read_only_paths,
+ char **inaccessible_paths,
const char *tmp_dir,
const char *var_tmp_dir,
bool private_dev,
diff --git a/src/grp-system/libcore/scope.c b/src/grp-system/libcore/scope.c
index c290529e14..8998f751f5 100644
--- a/src/grp-system/libcore/scope.c
+++ b/src/grp-system/libcore/scope.c
@@ -241,7 +241,7 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) {
/* If we have a controller set let's ask the controller nicely
* to terminate the scope, instead of us going directly into
- * SIGTERM beserk mode */
+ * SIGTERM berserk mode */
if (state == SCOPE_STOP_SIGTERM)
skip_signal = bus_scope_send_request_stop(s) > 0;
@@ -249,7 +249,9 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) {
r = unit_kill_context(
UNIT(s),
&s->kill_context,
- state != SCOPE_STOP_SIGTERM ? KILL_KILL : KILL_TERMINATE,
+ state != SCOPE_STOP_SIGTERM ? KILL_KILL :
+ s->was_abandoned ? KILL_TERMINATE_AND_LOG :
+ KILL_TERMINATE,
-1, -1, false);
if (r < 0)
goto fail;
@@ -370,6 +372,7 @@ static int scope_serialize(Unit *u, FILE *f, FDSet *fds) {
assert(fds);
unit_serialize_item(u, f, "state", scope_state_to_string(s->state));
+ unit_serialize_item(u, f, "was-abandoned", yes_no(s->was_abandoned));
return 0;
}
@@ -390,6 +393,14 @@ static int scope_deserialize_item(Unit *u, const char *key, const char *value, F
else
s->deserialized_state = state;
+ } else if (streq(key, "was-abandoned")) {
+ int k;
+
+ k = parse_boolean(value);
+ if (k < 0)
+ log_unit_debug(u, "Failed to parse boolean value: %s", value);
+ else
+ s->was_abandoned = k;
} else
log_unit_debug(u, "Unknown serialization key: %s", key);
@@ -429,8 +440,9 @@ static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) {
unit_tidy_watch_pids(u, 0, 0);
unit_watch_all_pids(u);
- /* If the PID set is empty now, then let's finish this off */
- if (set_isempty(u->pids))
+ /* If the PID set is empty now, then let's finish this off
+ (On unified we use proper notifications) */
+ if (cg_unified() <= 0 && set_isempty(u->pids))
scope_notify_cgroup_empty_event(u);
}
@@ -474,6 +486,7 @@ int scope_abandon(Scope *s) {
if (!IN_SET(s->state, SCOPE_RUNNING, SCOPE_ABANDONED))
return -ESTALE;
+ s->was_abandoned = true;
s->controller = mfree(s->controller);
/* The client is no longer watching the remaining processes,
diff --git a/src/grp-system/libcore/scope.h b/src/grp-system/libcore/scope.h
index f0cb3bd3e2..eaf8e8b447 100644
--- a/src/grp-system/libcore/scope.h
+++ b/src/grp-system/libcore/scope.h
@@ -21,6 +21,7 @@
typedef struct Scope Scope;
+#include "cgroup.h"
#include "kill.h"
#include "unit.h"
@@ -44,6 +45,7 @@ struct Scope {
usec_t timeout_stop_usec;
char *controller;
+ bool was_abandoned;
sd_event_source *timer_event_source;
};
diff --git a/src/grp-system/libcore/selinux-access.c b/src/grp-system/libcore/selinux-access.c
index ce2ef2db1c..f6dbfa64b7 100644
--- a/src/grp-system/libcore/selinux-access.c
+++ b/src/grp-system/libcore/selinux-access.c
@@ -192,7 +192,7 @@ int mac_selinux_generic_access_check(
const char *tclass = NULL, *scon = NULL;
struct audit_info audit_info = {};
_cleanup_free_ char *cl = NULL;
- security_context_t fcon = NULL;
+ char *fcon = NULL;
char **cmdline = NULL;
int r = 0;
diff --git a/src/grp-system/libcore/selinux-setup.c b/src/grp-system/libcore/selinux-setup.c
index 314d8edbaa..08c61af146 100644
--- a/src/grp-system/libcore/selinux-setup.c
+++ b/src/grp-system/libcore/selinux-setup.c
@@ -45,7 +45,7 @@ int mac_selinux_setup(bool *loaded_policy) {
#ifdef HAVE_SELINUX
int enforce = 0;
usec_t before_load, after_load;
- security_context_t con;
+ char *con;
int r;
union selinux_callback cb;
bool initialized = false;
diff --git a/src/grp-system/libcore/service.c b/src/grp-system/libcore/service.c
index 9b7f7bd68c..43d195bbba 100644
--- a/src/grp-system/libcore/service.c
+++ b/src/grp-system/libcore/service.c
@@ -201,16 +201,27 @@ static void service_stop_watchdog(Service *s) {
s->watchdog_timestamp = DUAL_TIMESTAMP_NULL;
}
+static usec_t service_get_watchdog_usec(Service *s) {
+ assert(s);
+
+ if (s->watchdog_override_enable)
+ return s->watchdog_override_usec;
+ else
+ return s->watchdog_usec;
+}
+
static void service_start_watchdog(Service *s) {
int r;
+ usec_t watchdog_usec;
assert(s);
- if (s->watchdog_usec <= 0)
+ watchdog_usec = service_get_watchdog_usec(s);
+ if (watchdog_usec == 0 || watchdog_usec == USEC_INFINITY)
return;
if (s->watchdog_event_source) {
- r = sd_event_source_set_time(s->watchdog_event_source, usec_add(s->watchdog_timestamp.monotonic, s->watchdog_usec));
+ r = sd_event_source_set_time(s->watchdog_event_source, usec_add(s->watchdog_timestamp.monotonic, watchdog_usec));
if (r < 0) {
log_unit_warning_errno(UNIT(s), r, "Failed to reset watchdog timer: %m");
return;
@@ -222,7 +233,7 @@ static void service_start_watchdog(Service *s) {
UNIT(s)->manager->event,
&s->watchdog_event_source,
CLOCK_MONOTONIC,
- usec_add(s->watchdog_timestamp.monotonic, s->watchdog_usec), 0,
+ usec_add(s->watchdog_timestamp.monotonic, watchdog_usec), 0,
service_dispatch_watchdog, s);
if (r < 0) {
log_unit_warning_errno(UNIT(s), r, "Failed to add watchdog timer: %m");
@@ -247,6 +258,17 @@ static void service_reset_watchdog(Service *s) {
service_start_watchdog(s);
}
+static void service_reset_watchdog_timeout(Service *s, usec_t watchdog_override_usec) {
+ assert(s);
+
+ s->watchdog_override_enable = true;
+ s->watchdog_override_usec = watchdog_override_usec;
+ service_reset_watchdog(s);
+
+ log_unit_debug(UNIT(s), "watchdog_usec="USEC_FMT, s->watchdog_usec);
+ log_unit_debug(UNIT(s), "watchdog_override_usec="USEC_FMT, s->watchdog_override_usec);
+}
+
static void service_fd_store_unlink(ServiceFDStore *fs) {
if (!fs)
@@ -575,20 +597,9 @@ static int service_setup_bus_name(Service *s) {
if (!s->bus_name)
return 0;
- if (is_kdbus_available()) {
- const char *n;
-
- n = strjoina(s->bus_name, ".busname");
- r = unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, n, NULL, true);
- if (r < 0)
- return log_unit_error_errno(UNIT(s), r, "Failed to add dependency to .busname unit: %m");
-
- } else {
- /* If kdbus is not available, we know the dbus socket is required, hence pull it in, and require it */
- r = unit_add_dependency_by_name(UNIT(s), UNIT_REQUIRES, SPECIAL_DBUS_SOCKET, NULL, true);
- if (r < 0)
- return log_unit_error_errno(UNIT(s), r, "Failed to add dependency on " SPECIAL_DBUS_SOCKET ": %m");
- }
+ r = unit_add_dependency_by_name(UNIT(s), UNIT_REQUIRES, SPECIAL_DBUS_SOCKET, NULL, true);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to add dependency on " SPECIAL_DBUS_SOCKET ": %m");
/* Regardless if kdbus is used or not, we always want to be ordered against dbus.socket if both are in the transaction. */
r = unit_add_dependency_by_name(UNIT(s), UNIT_AFTER, SPECIAL_DBUS_SOCKET, NULL, true);
@@ -1664,7 +1675,7 @@ static void service_kill_control_processes(Service *s) {
return;
p = strjoina(UNIT(s)->cgroup_path, "/control");
- cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, p, SIGKILL, true, true, true, NULL);
+ cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, p, SIGKILL, CGROUP_SIGCONT|CGROUP_IGNORE_SELF|CGROUP_REMOVE, NULL, NULL, NULL);
}
static void service_enter_start(Service *s) {
@@ -2004,6 +2015,9 @@ static int service_start(Unit *u) {
s->notify_state = NOTIFY_UNKNOWN;
+ s->watchdog_override_enable = false;
+ s->watchdog_override_usec = 0;
+
service_enter_start_pre(s);
return 1;
}
@@ -2135,6 +2149,9 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) {
unit_serialize_item(u, f, "forbid-restart", yes_no(s->forbid_restart));
+ if (s->watchdog_override_enable)
+ unit_serialize_item_format(u, f, "watchdog-override-usec", USEC_FMT, s->watchdog_override_usec);
+
return 0;
}
@@ -2329,6 +2346,14 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
s->stderr_fd = fdset_remove(fds, fd);
s->exec_context.stdio_as_fds = true;
}
+ } else if (streq(key, "watchdog-override-usec")) {
+ usec_t watchdog_override_usec;
+ if (timestamp_deserialize(value, &watchdog_override_usec) < 0)
+ log_unit_debug(u, "Failed to parse watchdog_override_usec value: %s", value);
+ else {
+ s->watchdog_override_enable = true;
+ s->watchdog_override_usec = watchdog_override_usec;
+ }
} else
log_unit_debug(u, "Unknown serialization key: %s", key);
@@ -2801,8 +2826,9 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
unit_tidy_watch_pids(u, s->main_pid, s->control_pid);
unit_watch_all_pids(u);
- /* If the PID set is empty now, then let's finish this off */
- if (set_isempty(u->pids))
+ /* If the PID set is empty now, then let's finish this off
+ (On unified we use proper notifications) */
+ if (cg_unified() <= 0 && set_isempty(u->pids))
service_notify_cgroup_empty_event(u);
}
@@ -2906,12 +2932,15 @@ static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *us
static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void *userdata) {
Service *s = SERVICE(userdata);
char t[FORMAT_TIMESPAN_MAX];
+ usec_t watchdog_usec;
assert(s);
assert(source == s->watchdog_event_source);
+ watchdog_usec = service_get_watchdog_usec(s);
+
log_unit_error(UNIT(s), "Watchdog timeout (limit %s)!",
- format_timespan(t, sizeof(t), s->watchdog_usec, 1));
+ format_timespan(t, sizeof(t), watchdog_usec, 1));
service_enter_signal(s, SERVICE_STOP_SIGABRT, SERVICE_FAILURE_WATCHDOG);
@@ -3048,6 +3077,15 @@ static void service_notify_message(Unit *u, pid_t pid, char **tags, FDSet *fds)
service_add_fd_store_set(s, fds, name);
}
+ e = strv_find_startswith(tags, "WATCHDOG_USEC=");
+ if (e) {
+ usec_t watchdog_override_usec;
+ if (safe_atou64(e, &watchdog_override_usec) < 0)
+ log_unit_warning(u, "Failed to parse WATCHDOG_USEC=%s", e);
+ else
+ service_reset_watchdog_timeout(s, watchdog_override_usec);
+ }
+
/* Notify clients about changed status or main pid */
if (notify_dbus)
unit_add_to_dbus_queue(u);
diff --git a/src/grp-system/libcore/service.h b/src/grp-system/libcore/service.h
index aa7d1de8b6..4dcf5ecf78 100644
--- a/src/grp-system/libcore/service.h
+++ b/src/grp-system/libcore/service.h
@@ -122,6 +122,8 @@ struct Service {
dual_timestamp watchdog_timestamp;
usec_t watchdog_usec;
+ usec_t watchdog_override_usec;
+ bool watchdog_override_enable;
sd_event_source *watchdog_event_source;
ExecCommand* exec_command[_SERVICE_EXEC_COMMAND_MAX];
diff --git a/src/grp-system/libcore/socket.c b/src/grp-system/libcore/socket.c
index 051cbdab8b..3e0b3e2e49 100644
--- a/src/grp-system/libcore/socket.c
+++ b/src/grp-system/libcore/socket.c
@@ -732,16 +732,16 @@ static int instance_from_socket(int fd, unsigned nr, char **instance) {
case AF_INET: {
uint32_t
- a = ntohl(local.in.sin_addr.s_addr),
- b = ntohl(remote.in.sin_addr.s_addr);
+ a = be32toh(local.in.sin_addr.s_addr),
+ b = be32toh(remote.in.sin_addr.s_addr);
if (asprintf(&r,
"%u-%u.%u.%u.%u:%u-%u.%u.%u.%u:%u",
nr,
a >> 24, (a >> 16) & 0xFF, (a >> 8) & 0xFF, a & 0xFF,
- ntohs(local.in.sin_port),
+ be16toh(local.in.sin_port),
b >> 24, (b >> 16) & 0xFF, (b >> 8) & 0xFF, b & 0xFF,
- ntohs(remote.in.sin_port)) < 0)
+ be16toh(remote.in.sin_port)) < 0)
return -ENOMEM;
break;
@@ -762,9 +762,9 @@ static int instance_from_socket(int fd, unsigned nr, char **instance) {
"%u-%u.%u.%u.%u:%u-%u.%u.%u.%u:%u",
nr,
a[0], a[1], a[2], a[3],
- ntohs(local.in6.sin6_port),
+ be16toh(local.in6.sin6_port),
b[0], b[1], b[2], b[3],
- ntohs(remote.in6.sin6_port)) < 0)
+ be16toh(remote.in6.sin6_port)) < 0)
return -ENOMEM;
} else {
char a[INET6_ADDRSTRLEN], b[INET6_ADDRSTRLEN];
@@ -773,9 +773,9 @@ static int instance_from_socket(int fd, unsigned nr, char **instance) {
"%u-%s:%u-%s:%u",
nr,
inet_ntop(AF_INET6, &local.in6.sin6_addr, a, sizeof(a)),
- ntohs(local.in6.sin6_port),
+ be16toh(local.in6.sin6_port),
inet_ntop(AF_INET6, &remote.in6.sin6_addr, b, sizeof(b)),
- ntohs(remote.in6.sin6_port)) < 0)
+ be16toh(remote.in6.sin6_port)) < 0)
return -ENOMEM;
}
diff --git a/src/grp-system/libcore/transaction.c b/src/grp-system/libcore/transaction.c
index d19e19f978..aa57eee556 100644
--- a/src/grp-system/libcore/transaction.c
+++ b/src/grp-system/libcore/transaction.c
@@ -374,7 +374,7 @@ static int transaction_verify_order_one(Transaction *tr, Job *j, Job *from, unsi
delete = NULL;
for (k = from; k; k = ((k->generation == generation && k->marker != k) ? k->marker : NULL)) {
- /* logging for j not k here here to provide consistent narrative */
+ /* logging for j not k here to provide consistent narrative */
log_unit_warning(j->unit,
"Found dependency on %s/%s",
k->unit->id, job_type_to_string(k->type));
@@ -393,7 +393,7 @@ static int transaction_verify_order_one(Transaction *tr, Job *j, Job *from, unsi
if (delete) {
const char *status;
- /* logging for j not k here here to provide consistent narrative */
+ /* logging for j not k here to provide consistent narrative */
log_unit_warning(j->unit,
"Breaking ordering cycle by deleting job %s/%s",
delete->unit->id, job_type_to_string(delete->type));
@@ -592,6 +592,9 @@ static int transaction_apply(Transaction *tr, Manager *m, JobMode mode) {
HASHMAP_FOREACH(j, m->jobs, i) {
assert(j->installed);
+ if (j->unit->ignore_on_isolate)
+ continue;
+
if (hashmap_get(tr->jobs, j->unit))
continue;
diff --git a/src/grp-system/libcore/unit.c b/src/grp-system/libcore/unit.c
index f539c3971d..aff4dbd2ca 100644
--- a/src/grp-system/libcore/unit.c
+++ b/src/grp-system/libcore/unit.c
@@ -101,6 +101,7 @@ Unit *unit_new(Manager *m, size_t size) {
u->on_failure_job_mode = JOB_REPLACE;
u->cgroup_inotify_wd = -1;
u->job_timeout = USEC_INFINITY;
+ u->sigchldgen = 0;
RATELIMIT_INIT(u->start_limit, m->default_start_limit_interval, m->default_start_limit_burst);
RATELIMIT_INIT(u->auto_stop_ratelimit, 10 * USEC_PER_SEC, 16);
@@ -1683,7 +1684,7 @@ static void unit_check_unneeded(Unit *u) {
if (unit_active_or_pending(other))
return;
- /* If stopping a unit fails continously we might enter a stop
+ /* If stopping a unit fails continuously we might enter a stop
* loop here, hence stop acting on the service being
* unnecessary after a while. */
if (!ratelimit_test(&u->auto_stop_ratelimit)) {
@@ -1728,7 +1729,7 @@ static void unit_check_binds_to(Unit *u) {
if (!stop)
return;
- /* If stopping a unit fails continously we might enter a stop
+ /* If stopping a unit fails continuously we might enter a stop
* loop here, hence stop acting on the service being
* unnecessary after a while. */
if (!ratelimit_test(&u->auto_stop_ratelimit)) {
@@ -3144,7 +3145,7 @@ int unit_kill_common(
if (!pid_set)
return -ENOMEM;
- q = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, signo, false, false, false, pid_set);
+ q = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, signo, 0, pid_set, NULL, NULL);
if (q < 0 && q != -EAGAIN && q != -ESRCH && q != -ENOENT)
r = q;
else
@@ -3356,7 +3357,7 @@ static const char* unit_drop_in_dir(Unit *u, UnitSetPropertiesMode mode) {
int unit_write_drop_in(Unit *u, UnitSetPropertiesMode mode, const char *name, const char *data) {
_cleanup_free_ char *p = NULL, *q = NULL;
- const char *dir, *prefixed;
+ const char *dir, *wrapped;
int r;
assert(u);
@@ -3365,6 +3366,7 @@ int unit_write_drop_in(Unit *u, UnitSetPropertiesMode mode, const char *name, co
/* When this is a transient unit file in creation, then let's not create a new drop-in but instead
* write to the transient unit file. */
fputs(data, u->transient_file);
+ fputc('\n', u->transient_file);
return 0;
}
@@ -3375,15 +3377,17 @@ int unit_write_drop_in(Unit *u, UnitSetPropertiesMode mode, const char *name, co
if (!dir)
return -EINVAL;
- prefixed = strjoina("# This is a drop-in unit file extension, created via \"systemctl set-property\" or an equivalent operation. Do not edit.\n",
- data);
+ wrapped = strjoina("# This is a drop-in unit file extension, created via \"systemctl set-property\"\n"
+ "# or an equivalent operation. Do not edit.\n",
+ data,
+ "\n");
r = drop_in_file(dir, u->id, 50, name, &p, &q);
if (r < 0)
return r;
(void) mkdir_p(p, 0755);
- r = write_string_file_atomic_label(q, prefixed);
+ r = write_string_file_atomic_label(q, wrapped);
if (r < 0)
return r;
@@ -3502,7 +3506,6 @@ int unit_make_transient(Unit *u) {
unit_add_to_dbus_queue(u);
unit_add_to_gc_queue(u);
- unit_add_to_load_queue(u);
fputs("# This is a transient unit file, created programmatically via the systemd API. Do not edit.\n",
u->transient_file);
@@ -3510,6 +3513,43 @@ int unit_make_transient(Unit *u) {
return 0;
}
+static void log_kill(pid_t pid, int sig, void *userdata) {
+ _cleanup_free_ char *comm = NULL;
+
+ (void) get_process_comm(pid, &comm);
+
+ /* Don't log about processes marked with brackets, under the assumption that these are temporary processes
+ only, like for example systemd's own PAM stub process. */
+ if (comm && comm[0] == '(')
+ return;
+
+ log_unit_notice(userdata,
+ "Killing process " PID_FMT " (%s) with signal SIG%s.",
+ pid,
+ strna(comm),
+ signal_to_string(sig));
+}
+
+static int operation_to_signal(KillContext *c, KillOperation k) {
+ assert(c);
+
+ switch (k) {
+
+ case KILL_TERMINATE:
+ case KILL_TERMINATE_AND_LOG:
+ return c->kill_signal;
+
+ case KILL_KILL:
+ return SIGKILL;
+
+ case KILL_ABORT:
+ return SIGABRT;
+
+ default:
+ assert_not_reached("KillOperation unknown");
+ }
+}
+
int unit_kill_context(
Unit *u,
KillContext *c,
@@ -3518,58 +3558,63 @@ int unit_kill_context(
pid_t control_pid,
bool main_pid_alien) {
- bool wait_for_exit = false;
+ bool wait_for_exit = false, send_sighup;
+ cg_kill_log_func_t log_func;
int sig, r;
assert(u);
assert(c);
+ /* Kill the processes belonging to this unit, in preparation for shutting the unit down. Returns > 0 if we
+ * killed something worth waiting for, 0 otherwise. */
+
if (c->kill_mode == KILL_NONE)
return 0;
- switch (k) {
- case KILL_KILL:
- sig = SIGKILL;
- break;
- case KILL_ABORT:
- sig = SIGABRT;
- break;
- case KILL_TERMINATE:
- sig = c->kill_signal;
- break;
- default:
- assert_not_reached("KillOperation unknown");
- }
+ sig = operation_to_signal(c, k);
+
+ send_sighup =
+ c->send_sighup &&
+ IN_SET(k, KILL_TERMINATE, KILL_TERMINATE_AND_LOG) &&
+ sig != SIGHUP;
+
+ log_func =
+ k != KILL_TERMINATE ||
+ IN_SET(sig, SIGKILL, SIGABRT) ? log_kill : NULL;
if (main_pid > 0) {
- r = kill_and_sigcont(main_pid, sig);
+ if (log_func)
+ log_func(main_pid, sig, u);
+ r = kill_and_sigcont(main_pid, sig);
if (r < 0 && r != -ESRCH) {
_cleanup_free_ char *comm = NULL;
- get_process_comm(main_pid, &comm);
+ (void) get_process_comm(main_pid, &comm);
log_unit_warning_errno(u, r, "Failed to kill main process " PID_FMT " (%s), ignoring: %m", main_pid, strna(comm));
} else {
if (!main_pid_alien)
wait_for_exit = true;
- if (c->send_sighup && k == KILL_TERMINATE)
+ if (r != -ESRCH && send_sighup)
(void) kill(main_pid, SIGHUP);
}
}
if (control_pid > 0) {
- r = kill_and_sigcont(control_pid, sig);
+ if (log_func)
+ log_func(control_pid, sig, u);
+ r = kill_and_sigcont(control_pid, sig);
if (r < 0 && r != -ESRCH) {
_cleanup_free_ char *comm = NULL;
- get_process_comm(control_pid, &comm);
+ (void) get_process_comm(control_pid, &comm);
log_unit_warning_errno(u, r, "Failed to kill control process " PID_FMT " (%s), ignoring: %m", control_pid, strna(comm));
} else {
wait_for_exit = true;
- if (c->send_sighup && k == KILL_TERMINATE)
+ if (r != -ESRCH && send_sighup)
(void) kill(control_pid, SIGHUP);
}
}
@@ -3583,7 +3628,11 @@ int unit_kill_context(
if (!pid_set)
return -ENOMEM;
- r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, sig, true, k != KILL_TERMINATE, false, pid_set);
+ r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path,
+ sig,
+ CGROUP_SIGCONT|CGROUP_IGNORE_SELF,
+ pid_set,
+ log_func, u);
if (r < 0) {
if (r != -EAGAIN && r != -ESRCH && r != -ENOENT)
log_unit_warning_errno(u, r, "Failed to kill control group %s, ignoring: %m", u->cgroup_path);
@@ -3608,14 +3657,18 @@ int unit_kill_context(
(detect_container() == 0 && !unit_cgroup_delegate(u)))
wait_for_exit = true;
- if (c->send_sighup && k != KILL_KILL) {
+ if (send_sighup) {
set_free(pid_set);
pid_set = unit_pid_set(main_pid, control_pid);
if (!pid_set)
return -ENOMEM;
- cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, SIGHUP, false, true, false, pid_set);
+ cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path,
+ SIGHUP,
+ CGROUP_IGNORE_SELF,
+ pid_set,
+ NULL, NULL);
}
}
}
@@ -3788,7 +3841,7 @@ bool unit_is_pristine(Unit *u) {
/* Check if the unit already exists or is already around,
* in a number of different ways. Note that to cater for unit
* types such as slice, we are generally fine with units that
- * are marked UNIT_LOADED even even though nothing was
+ * are marked UNIT_LOADED even though nothing was
* actually loaded, as those unit types don't require a file
* on disk to validly load. */
diff --git a/src/grp-system/libcore/unit.h b/src/grp-system/libcore/unit.h
index 72ceed1fef..3e25bfd32a 100644
--- a/src/grp-system/libcore/unit.h
+++ b/src/grp-system/libcore/unit.h
@@ -37,6 +37,7 @@ typedef struct UnitVTable UnitVTable;
typedef enum KillOperation {
KILL_TERMINATE,
+ KILL_TERMINATE_AND_LOG,
KILL_KILL,
KILL_ABORT,
_KILL_OPERATION_MAX,
@@ -163,6 +164,9 @@ struct Unit {
* process SIGCHLD for */
Set *pids;
+ /* Used in sigchld event invocation to avoid repeat events being invoked */
+ uint64_t sigchldgen;
+
/* Used during GC sweeps */
unsigned gc_marker;