summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/auditsc.c332
-rw-r--r--kernel/cgroup.c18
-rw-r--r--kernel/module.c13
3 files changed, 181 insertions, 182 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 2672d105c..b3341284f 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -72,6 +72,7 @@
#include <linux/compat.h>
#include <linux/ctype.h>
#include <linux/string.h>
+#include <linux/uaccess.h>
#include <uapi/linux/limits.h>
#include "audit.h"
@@ -81,7 +82,8 @@
#define AUDITSC_SUCCESS 1
#define AUDITSC_FAILURE 2
-/* no execve audit message should be longer than this (userspace limits) */
+/* no execve audit message should be longer than this (userspace limits),
+ * see the note near the top of audit_log_execve_info() about this value */
#define MAX_EXECVE_AUDIT_LEN 7500
/* max length to print of cmdline/proctitle value during audit */
@@ -987,184 +989,178 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid,
return rc;
}
-/*
- * to_send and len_sent accounting are very loose estimates. We aren't
- * really worried about a hard cap to MAX_EXECVE_AUDIT_LEN so much as being
- * within about 500 bytes (next page boundary)
- *
- * why snprintf? an int is up to 12 digits long. if we just assumed when
- * logging that a[%d]= was going to be 16 characters long we would be wasting
- * space in every audit message. In one 7500 byte message we can log up to
- * about 1000 min size arguments. That comes down to about 50% waste of space
- * if we didn't do the snprintf to find out how long arg_num_len was.
- */
-static int audit_log_single_execve_arg(struct audit_context *context,
- struct audit_buffer **ab,
- int arg_num,
- size_t *len_sent,
- const char __user *p,
- char *buf)
+static void audit_log_execve_info(struct audit_context *context,
+ struct audit_buffer **ab)
{
- char arg_num_len_buf[12];
- const char __user *tmp_p = p;
- /* how many digits are in arg_num? 5 is the length of ' a=""' */
- size_t arg_num_len = snprintf(arg_num_len_buf, 12, "%d", arg_num) + 5;
- size_t len, len_left, to_send;
- size_t max_execve_audit_len = MAX_EXECVE_AUDIT_LEN;
- unsigned int i, has_cntl = 0, too_long = 0;
- int ret;
-
- /* strnlen_user includes the null we don't want to send */
- len_left = len = strnlen_user(p, MAX_ARG_STRLEN) - 1;
-
- /*
- * We just created this mm, if we can't find the strings
- * we just copied into it something is _very_ wrong. Similar
- * for strings that are too long, we should not have created
- * any.
- */
- if (WARN_ON_ONCE(len < 0 || len > MAX_ARG_STRLEN - 1)) {
- send_sig(SIGKILL, current, 0);
- return -1;
+ long len_max;
+ long len_rem;
+ long len_full;
+ long len_buf;
+ long len_abuf;
+ long len_tmp;
+ bool require_data;
+ bool encode;
+ unsigned int iter;
+ unsigned int arg;
+ char *buf_head;
+ char *buf;
+ const char __user *p = (const char __user *)current->mm->arg_start;
+
+ /* NOTE: this buffer needs to be large enough to hold all the non-arg
+ * data we put in the audit record for this argument (see the
+ * code below) ... at this point in time 96 is plenty */
+ char abuf[96];
+
+ /* NOTE: we set MAX_EXECVE_AUDIT_LEN to a rather arbitrary limit, the
+ * current value of 7500 is not as important as the fact that it
+ * is less than 8k, a setting of 7500 gives us plenty of wiggle
+ * room if we go over a little bit in the logging below */
+ WARN_ON_ONCE(MAX_EXECVE_AUDIT_LEN > 7500);
+ len_max = MAX_EXECVE_AUDIT_LEN;
+
+ /* scratch buffer to hold the userspace args */
+ buf_head = kmalloc(MAX_EXECVE_AUDIT_LEN + 1, GFP_KERNEL);
+ if (!buf_head) {
+ audit_panic("out of memory for argv string");
+ return;
}
+ buf = buf_head;
- /* walk the whole argument looking for non-ascii chars */
+ audit_log_format(*ab, "argc=%d", context->execve.argc);
+
+ len_rem = len_max;
+ len_buf = 0;
+ len_full = 0;
+ require_data = true;
+ encode = false;
+ iter = 0;
+ arg = 0;
do {
- if (len_left > MAX_EXECVE_AUDIT_LEN)
- to_send = MAX_EXECVE_AUDIT_LEN;
- else
- to_send = len_left;
- ret = copy_from_user(buf, tmp_p, to_send);
- /*
- * There is no reason for this copy to be short. We just
- * copied them here, and the mm hasn't been exposed to user-
- * space yet.
- */
- if (ret) {
- WARN_ON(1);
- send_sig(SIGKILL, current, 0);
- return -1;
- }
- buf[to_send] = '\0';
- has_cntl = audit_string_contains_control(buf, to_send);
- if (has_cntl) {
- /*
- * hex messages get logged as 2 bytes, so we can only
- * send half as much in each message
- */
- max_execve_audit_len = MAX_EXECVE_AUDIT_LEN / 2;
- break;
- }
- len_left -= to_send;
- tmp_p += to_send;
- } while (len_left > 0);
-
- len_left = len;
-
- if (len > max_execve_audit_len)
- too_long = 1;
-
- /* rewalk the argument actually logging the message */
- for (i = 0; len_left > 0; i++) {
- int room_left;
-
- if (len_left > max_execve_audit_len)
- to_send = max_execve_audit_len;
- else
- to_send = len_left;
-
- /* do we have space left to send this argument in this ab? */
- room_left = MAX_EXECVE_AUDIT_LEN - arg_num_len - *len_sent;
- if (has_cntl)
- room_left -= (to_send * 2);
- else
- room_left -= to_send;
- if (room_left < 0) {
- *len_sent = 0;
- audit_log_end(*ab);
- *ab = audit_log_start(context, GFP_KERNEL, AUDIT_EXECVE);
- if (!*ab)
- return 0;
- }
+ /* NOTE: we don't ever want to trust this value for anything
+ * serious, but the audit record format insists we
+ * provide an argument length for really long arguments,
+ * e.g. > MAX_EXECVE_AUDIT_LEN, so we have no choice but
+ * to use strncpy_from_user() to obtain this value for
+ * recording in the log, although we don't use it
+ * anywhere here to avoid a double-fetch problem */
+ if (len_full == 0)
+ len_full = strnlen_user(p, MAX_ARG_STRLEN) - 1;
+
+ /* read more data from userspace */
+ if (require_data) {
+ /* can we make more room in the buffer? */
+ if (buf != buf_head) {
+ memmove(buf_head, buf, len_buf);
+ buf = buf_head;
+ }
+
+ /* fetch as much as we can of the argument */
+ len_tmp = strncpy_from_user(&buf_head[len_buf], p,
+ len_max - len_buf);
+ if (len_tmp == -EFAULT) {
+ /* unable to copy from userspace */
+ send_sig(SIGKILL, current, 0);
+ goto out;
+ } else if (len_tmp == (len_max - len_buf)) {
+ /* buffer is not large enough */
+ require_data = true;
+ /* NOTE: if we are going to span multiple
+ * buffers force the encoding so we stand
+ * a chance at a sane len_full value and
+ * consistent record encoding */
+ encode = true;
+ len_full = len_full * 2;
+ p += len_tmp;
+ } else {
+ require_data = false;
+ if (!encode)
+ encode = audit_string_contains_control(
+ buf, len_tmp);
+ /* try to use a trusted value for len_full */
+ if (len_full < len_max)
+ len_full = (encode ?
+ len_tmp * 2 : len_tmp);
+ p += len_tmp + 1;
+ }
+ len_buf += len_tmp;
+ buf_head[len_buf] = '\0';
- /*
- * first record needs to say how long the original string was
- * so we can be sure nothing was lost.
- */
- if ((i == 0) && (too_long))
- audit_log_format(*ab, " a%d_len=%zu", arg_num,
- has_cntl ? 2*len : len);
-
- /*
- * normally arguments are small enough to fit and we already
- * filled buf above when we checked for control characters
- * so don't bother with another copy_from_user
- */
- if (len >= max_execve_audit_len)
- ret = copy_from_user(buf, p, to_send);
- else
- ret = 0;
- if (ret) {
- WARN_ON(1);
- send_sig(SIGKILL, current, 0);
- return -1;
+ /* length of the buffer in the audit record? */
+ len_abuf = (encode ? len_buf * 2 : len_buf + 2);
}
- buf[to_send] = '\0';
-
- /* actually log it */
- audit_log_format(*ab, " a%d", arg_num);
- if (too_long)
- audit_log_format(*ab, "[%d]", i);
- audit_log_format(*ab, "=");
- if (has_cntl)
- audit_log_n_hex(*ab, buf, to_send);
- else
- audit_log_string(*ab, buf);
-
- p += to_send;
- len_left -= to_send;
- *len_sent += arg_num_len;
- if (has_cntl)
- *len_sent += to_send * 2;
- else
- *len_sent += to_send;
- }
- /* include the null we didn't log */
- return len + 1;
-}
-static void audit_log_execve_info(struct audit_context *context,
- struct audit_buffer **ab)
-{
- int i, len;
- size_t len_sent = 0;
- const char __user *p;
- char *buf;
+ /* write as much as we can to the audit log */
+ if (len_buf > 0) {
+ /* NOTE: some magic numbers here - basically if we
+ * can't fit a reasonable amount of data into the
+ * existing audit buffer, flush it and start with
+ * a new buffer */
+ if ((sizeof(abuf) + 8) > len_rem) {
+ len_rem = len_max;
+ audit_log_end(*ab);
+ *ab = audit_log_start(context,
+ GFP_KERNEL, AUDIT_EXECVE);
+ if (!*ab)
+ goto out;
+ }
- p = (const char __user *)current->mm->arg_start;
+ /* create the non-arg portion of the arg record */
+ len_tmp = 0;
+ if (require_data || (iter > 0) ||
+ ((len_abuf + sizeof(abuf)) > len_rem)) {
+ if (iter == 0) {
+ len_tmp += snprintf(&abuf[len_tmp],
+ sizeof(abuf) - len_tmp,
+ " a%d_len=%lu",
+ arg, len_full);
+ }
+ len_tmp += snprintf(&abuf[len_tmp],
+ sizeof(abuf) - len_tmp,
+ " a%d[%d]=", arg, iter++);
+ } else
+ len_tmp += snprintf(&abuf[len_tmp],
+ sizeof(abuf) - len_tmp,
+ " a%d=", arg);
+ WARN_ON(len_tmp >= sizeof(abuf));
+ abuf[sizeof(abuf) - 1] = '\0';
+
+ /* log the arg in the audit record */
+ audit_log_format(*ab, "%s", abuf);
+ len_rem -= len_tmp;
+ len_tmp = len_buf;
+ if (encode) {
+ if (len_abuf > len_rem)
+ len_tmp = len_rem / 2; /* encoding */
+ audit_log_n_hex(*ab, buf, len_tmp);
+ len_rem -= len_tmp * 2;
+ len_abuf -= len_tmp * 2;
+ } else {
+ if (len_abuf > len_rem)
+ len_tmp = len_rem - 2; /* quotes */
+ audit_log_n_string(*ab, buf, len_tmp);
+ len_rem -= len_tmp + 2;
+ /* don't subtract the "2" because we still need
+ * to add quotes to the remaining string */
+ len_abuf -= len_tmp;
+ }
+ len_buf -= len_tmp;
+ buf += len_tmp;
+ }
- audit_log_format(*ab, "argc=%d", context->execve.argc);
+ /* ready to move to the next argument? */
+ if ((len_buf == 0) && !require_data) {
+ arg++;
+ iter = 0;
+ len_full = 0;
+ require_data = true;
+ encode = false;
+ }
+ } while (arg < context->execve.argc);
- /*
- * we need some kernel buffer to hold the userspace args. Just
- * allocate one big one rather than allocating one of the right size
- * for every single argument inside audit_log_single_execve_arg()
- * should be <8k allocation so should be pretty safe.
- */
- buf = kmalloc(MAX_EXECVE_AUDIT_LEN + 1, GFP_KERNEL);
- if (!buf) {
- audit_panic("out of memory for argv string");
- return;
- }
+ /* NOTE: the caller handles the final audit_log_end() call */
- for (i = 0; i < context->execve.argc; i++) {
- len = audit_log_single_execve_arg(context, ab, i,
- &len_sent, p, buf);
- if (len <= 0)
- break;
- p += len;
- }
- kfree(buf);
+out:
+ kfree(buf_head);
}
static void show_special(struct audit_context *context, int *call_panic)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 75c0ff00a..e0be49fc3 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2215,12 +2215,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
goto out_unlock;
}
- /*
- * We know this subsystem has not yet been bound. Users in a non-init
- * user namespace may only mount hierarchies with no bound subsystems,
- * i.e. 'none,name=user1'
- */
- if (!opts.none && !capable(CAP_SYS_ADMIN)) {
+ /* Hierarchies may only be created in the initial cgroup namespace. */
+ if (ns != &init_cgroup_ns) {
ret = -EPERM;
goto out_unlock;
}
@@ -2962,6 +2958,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
int retval = 0;
mutex_lock(&cgroup_mutex);
+ percpu_down_write(&cgroup_threadgroup_rwsem);
for_each_root(root) {
struct cgroup *from_cgrp;
@@ -2976,6 +2973,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
if (retval)
break;
}
+ percpu_up_write(&cgroup_threadgroup_rwsem);
mutex_unlock(&cgroup_mutex);
return retval;
@@ -4343,6 +4341,8 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
mutex_lock(&cgroup_mutex);
+ percpu_down_write(&cgroup_threadgroup_rwsem);
+
/* all tasks in @from are being moved, all csets are source */
spin_lock_irq(&css_set_lock);
list_for_each_entry(link, &from->cset_links, cset_link)
@@ -4371,6 +4371,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
} while (task && !ret);
out_err:
cgroup_migrate_finish(&preloaded_csets);
+ percpu_up_write(&cgroup_threadgroup_rwsem);
mutex_unlock(&cgroup_mutex);
return ret;
}
@@ -6309,14 +6310,11 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
if (!ns_capable(user_ns, CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
- mutex_lock(&cgroup_mutex);
+ /* It is not safe to take cgroup_mutex here */
spin_lock_irq(&css_set_lock);
-
cset = task_css_set(current);
get_css_set(cset);
-
spin_unlock_irq(&css_set_lock);
- mutex_unlock(&cgroup_mutex);
new_ns = alloc_cgroup_ns();
if (IS_ERR(new_ns)) {
diff --git a/kernel/module.c b/kernel/module.c
index 5f71aa63e..6458a2f17 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2687,13 +2687,18 @@ static inline void kmemleak_load_module(const struct module *mod,
#endif
#ifdef CONFIG_MODULE_SIG
-static int module_sig_check(struct load_info *info)
+static int module_sig_check(struct load_info *info, int flags)
{
int err = -ENOKEY;
const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
const void *mod = info->hdr;
- if (info->len > markerlen &&
+ /*
+ * Require flags == 0, as a module with version information
+ * removed is no longer the module that was signed
+ */
+ if (flags == 0 &&
+ info->len > markerlen &&
memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) {
/* We truncate the module to discard the signature */
info->len -= markerlen;
@@ -2712,7 +2717,7 @@ static int module_sig_check(struct load_info *info)
return err;
}
#else /* !CONFIG_MODULE_SIG */
-static int module_sig_check(struct load_info *info)
+static int module_sig_check(struct load_info *info, int flags)
{
return 0;
}
@@ -3498,7 +3503,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
long err;
char *after_dashes;
- err = module_sig_check(info);
+ err = module_sig_check(info, flags);
if (err)
goto free_copy;