summaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2016-08-25 15:57:21 +0200
committerDjalal Harouni <tixxdz@opendz.org>2016-09-25 10:42:18 +0200
commit3f815163ff8fdcdbd329680580df36f94e15325d (patch)
tree1436ba9f8a74ad8c6f4311764b7591a8283a5c00 /src/core
parent160cfdbed3eb23b6bc3c17613685b756f23be4a1 (diff)
core: introduce ProtectSystem=strict
Let's tighten our sandbox a bit more: with this change ProtectSystem= gains a new setting "strict". If set, the entire directory tree of the system is mounted read-only, but the API file systems /proc, /dev, /sys are excluded (they may be managed with PrivateDevices= and ProtectKernelTunables=). Also, /home and /root are excluded as those are left for ProtectHome= to manage. In this mode, all "real" file systems (i.e. non-API file systems) are mounted read-only, and specific directories may only be excluded via ReadWriteDirectories=, thus implementing an effective whitelist instead of blacklist of writable directories. While we are at, also add /efi to the list of paths always affected by ProtectSystem=. This is a follow-up for b52a109ad38cd37b660ccd5394ff5c171a5e5355 which added /efi as alternative for /boot. Our namespacing logic should respect that too.
Diffstat (limited to 'src/core')
-rw-r--r--src/core/namespace.c56
-rw-r--r--src/core/namespace.h1
2 files changed, 48 insertions, 9 deletions
diff --git a/src/core/namespace.c b/src/core/namespace.c
index e08d7459c5..498cd139bf 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -472,9 +472,11 @@ int setup_namespace(
private_dev +
(protect_sysctl ? 3 : 0) +
(protect_cgroups != protect_sysctl) +
- (protect_home != PROTECT_HOME_NO ? 3 : 0) +
- (protect_system != PROTECT_SYSTEM_NO ? 2 : 0) +
- (protect_system == PROTECT_SYSTEM_FULL ? 1 : 0);
+ (protect_home != PROTECT_HOME_NO || protect_system == PROTECT_SYSTEM_STRICT ? 3 : 0) +
+ (protect_system == PROTECT_SYSTEM_STRICT ?
+ (2 + !private_dev + !protect_sysctl) :
+ ((protect_system != PROTECT_SYSTEM_NO ? 3 : 0) +
+ (protect_system == PROTECT_SYSTEM_FULL ? 1 : 0)));
if (n > 0) {
m = mounts = (BindMount *) alloca0(n * sizeof(BindMount));
@@ -529,9 +531,13 @@ int setup_namespace(
m++;
}
- if (protect_home != PROTECT_HOME_NO) {
+ if (protect_home != PROTECT_HOME_NO || protect_system == PROTECT_SYSTEM_STRICT) {
const char *home_dir, *run_user_dir, *root_dir;
+ /* If protection of $HOME and $XDG_RUNTIME_DIR is requested, then go for it. If we are in
+ * strict system protection mode, then also add entries for these directories, but mark them
+ * writable. This is because we want ProtectHome= and ProtectSystem= to be fully orthogonal. */
+
home_dir = prefix_roota(root_directory, "/home");
home_dir = strjoina("-", home_dir);
run_user_dir = prefix_roota(root_directory, "/run/user");
@@ -540,22 +546,53 @@ int setup_namespace(
root_dir = strjoina("-", root_dir);
r = append_mounts(&m, STRV_MAKE(home_dir, run_user_dir, root_dir),
- protect_home == PROTECT_HOME_READ_ONLY ? READONLY : INACCESSIBLE);
+ protect_home == PROTECT_HOME_READ_ONLY ? READONLY :
+ protect_home == PROTECT_HOME_YES ? INACCESSIBLE : READWRITE);
if (r < 0)
return r;
}
- if (protect_system != PROTECT_SYSTEM_NO) {
- const char *usr_dir, *boot_dir, *etc_dir;
+ if (protect_system == PROTECT_SYSTEM_STRICT) {
+ /* In strict mode, we mount everything read-only, except for /proc, /dev, /sys which are the
+ * kernel API VFS, which are left writable, but PrivateDevices= + ProtectKernelTunables=
+ * protect those, and these options should be fully orthogonal. (And of course /home and
+ * friends are also left writable, as ProtectHome= shall manage those, orthogonally, see
+ * above). */
+
+ m->path = prefix_roota(root_directory, "/");
+ m->mode = READONLY;
+ m++;
+
+ m->path = prefix_roota(root_directory, "/proc");
+ m->mode = READWRITE;
+ m++;
+
+ if (!private_dev) {
+ m->path = prefix_roota(root_directory, "/dev");
+ m->mode = READWRITE;
+ m++;
+ }
+ if (!protect_sysctl) {
+ m->path = prefix_roota(root_directory, "/sys");
+ m->mode = READWRITE;
+ m++;
+ }
+
+ } else if (protect_system != PROTECT_SYSTEM_NO) {
+ const char *usr_dir, *boot_dir, *efi_dir, *etc_dir;
+
+ /* In any other mode we simply mark the relevant three directories ready-only. */
usr_dir = prefix_roota(root_directory, "/usr");
boot_dir = prefix_roota(root_directory, "/boot");
boot_dir = strjoina("-", boot_dir);
+ efi_dir = prefix_roota(root_directory, "/efi");
+ efi_dir = strjoina("-", efi_dir);
etc_dir = prefix_roota(root_directory, "/etc");
r = append_mounts(&m, protect_system == PROTECT_SYSTEM_FULL
- ? STRV_MAKE(usr_dir, boot_dir, etc_dir)
- : STRV_MAKE(usr_dir, boot_dir), READONLY);
+ ? STRV_MAKE(usr_dir, boot_dir, efi_dir, etc_dir)
+ : STRV_MAKE(usr_dir, boot_dir, efi_dir), READONLY);
if (r < 0)
return r;
}
@@ -780,6 +817,7 @@ static const char *const protect_system_table[_PROTECT_SYSTEM_MAX] = {
[PROTECT_SYSTEM_NO] = "no",
[PROTECT_SYSTEM_YES] = "yes",
[PROTECT_SYSTEM_FULL] = "full",
+ [PROTECT_SYSTEM_STRICT] = "strict",
};
DEFINE_STRING_TABLE_LOOKUP(protect_system, ProtectSystem);
diff --git a/src/core/namespace.h b/src/core/namespace.h
index 3845336287..6505bcc499 100644
--- a/src/core/namespace.h
+++ b/src/core/namespace.h
@@ -35,6 +35,7 @@ typedef enum ProtectSystem {
PROTECT_SYSTEM_NO,
PROTECT_SYSTEM_YES,
PROTECT_SYSTEM_FULL,
+ PROTECT_SYSTEM_STRICT,
_PROTECT_SYSTEM_MAX,
_PROTECT_SYSTEM_INVALID = -1
} ProtectSystem;