summaryrefslogtreecommitdiff
path: root/ipc
diff options
context:
space:
mode:
Diffstat (limited to 'ipc')
-rw-r--r--ipc/kdbus/bus.c6
-rw-r--r--ipc/kdbus/connection.c53
-rw-r--r--ipc/kdbus/connection.h9
-rw-r--r--ipc/kdbus/domain.c4
-rw-r--r--ipc/kdbus/endpoint.c4
-rw-r--r--ipc/kdbus/fs.c4
-rw-r--r--ipc/kdbus/handle.c4
-rw-r--r--ipc/kdbus/limits.h2
-rw-r--r--ipc/kdbus/main.c13
-rw-r--r--ipc/kdbus/metadata.c21
-rw-r--r--ipc/kdbus/names.c762
-rw-r--r--ipc/kdbus/names.h55
-rw-r--r--ipc/kdbus/node.c309
-rw-r--r--ipc/kdbus/node.h7
-rw-r--r--ipc/mqueue.c54
-rw-r--r--ipc/msg.c50
-rw-r--r--ipc/sem.c51
-rw-r--r--ipc/shm.c14
-rw-r--r--ipc/util.c28
-rw-r--r--ipc/util.h2
20 files changed, 842 insertions, 610 deletions
diff --git a/ipc/kdbus/bus.c b/ipc/kdbus/bus.c
index a67f825bd..e636d3478 100644
--- a/ipc/kdbus/bus.c
+++ b/ipc/kdbus/bus.c
@@ -167,7 +167,7 @@ static struct kdbus_bus *kdbus_bus_new(struct kdbus_domain *domain,
return b;
exit_unref:
- kdbus_node_deactivate(&b->node);
+ kdbus_node_drain(&b->node);
kdbus_node_unref(&b->node);
return ERR_PTR(ret);
}
@@ -416,11 +416,11 @@ exit:
ret = kdbus_args_clear(&args, ret);
if (ret < 0) {
if (ep) {
- kdbus_node_deactivate(&ep->node);
+ kdbus_node_drain(&ep->node);
kdbus_ep_unref(ep);
}
if (bus) {
- kdbus_node_deactivate(&bus->node);
+ kdbus_node_drain(&bus->node);
kdbus_bus_unref(bus);
}
return ERR_PTR(ret);
diff --git a/ipc/kdbus/connection.c b/ipc/kdbus/connection.c
index aa3296ea4..ef63d6533 100644
--- a/ipc/kdbus/connection.c
+++ b/ipc/kdbus/connection.c
@@ -129,9 +129,7 @@ static struct kdbus_conn *kdbus_conn_new(struct kdbus_ep *ep,
#endif
mutex_init(&conn->lock);
INIT_LIST_HEAD(&conn->names_list);
- INIT_LIST_HEAD(&conn->names_queue_list);
INIT_LIST_HEAD(&conn->reply_list);
- atomic_set(&conn->name_count, 0);
atomic_set(&conn->request_count, 0);
atomic_set(&conn->lost_count, 0);
INIT_DELAYED_WORK(&conn->work, kdbus_reply_list_scan_work);
@@ -284,7 +282,6 @@ static void __kdbus_conn_free(struct kref *kref)
WARN_ON(delayed_work_pending(&conn->work));
WARN_ON(!list_empty(&conn->queue.msg_list));
WARN_ON(!list_empty(&conn->names_list));
- WARN_ON(!list_empty(&conn->names_queue_list));
WARN_ON(!list_empty(&conn->reply_list));
if (conn->user) {
@@ -618,12 +615,13 @@ int kdbus_conn_disconnect(struct kdbus_conn *conn, bool ensure_queue_empty)
*/
bool kdbus_conn_has_name(struct kdbus_conn *conn, const char *name)
{
- struct kdbus_name_entry *e;
+ struct kdbus_name_owner *owner;
lockdep_assert_held(&conn->ep->bus->name_registry->rwlock);
- list_for_each_entry(e, &conn->names_list, conn_entry)
- if (strcmp(e->name, name) == 0)
+ list_for_each_entry(owner, &conn->names_list, conn_entry)
+ if (!(owner->flags & KDBUS_NAME_IN_QUEUE) &&
+ !strcmp(name, owner->name->name))
return true;
return false;
@@ -1052,6 +1050,7 @@ static int kdbus_pin_dst(struct kdbus_bus *bus,
struct kdbus_conn **out_dst)
{
const struct kdbus_msg *msg = staging->msg;
+ struct kdbus_name_owner *owner = NULL;
struct kdbus_name_entry *name = NULL;
struct kdbus_conn *dst = NULL;
int ret;
@@ -1070,7 +1069,9 @@ static int kdbus_pin_dst(struct kdbus_bus *bus,
} else {
name = kdbus_name_lookup_unlocked(bus->name_registry,
staging->dst_name);
- if (!name)
+ if (name)
+ owner = kdbus_name_get_owner(name);
+ if (!owner)
return -ESRCH;
/*
@@ -1082,19 +1083,14 @@ static int kdbus_pin_dst(struct kdbus_bus *bus,
* owns the given name.
*/
if (msg->dst_id != KDBUS_DST_ID_NAME &&
- msg->dst_id != name->conn->id)
+ msg->dst_id != owner->conn->id)
return -EREMCHG;
- if (!name->conn && name->activator)
- dst = kdbus_conn_ref(name->activator);
- else
- dst = kdbus_conn_ref(name->conn);
-
if ((msg->flags & KDBUS_MSG_NO_AUTO_START) &&
- kdbus_conn_is_activator(dst)) {
- ret = -EADDRNOTAVAIL;
- goto error;
- }
+ kdbus_conn_is_activator(owner->conn))
+ return -EADDRNOTAVAIL;
+
+ dst = kdbus_conn_ref(owner->conn);
}
*out_name = name;
@@ -1383,7 +1379,7 @@ static bool kdbus_conn_policy_query_all(struct kdbus_conn *conn,
struct kdbus_conn *whom,
unsigned int access)
{
- struct kdbus_name_entry *ne;
+ struct kdbus_name_owner *owner;
bool pass = false;
int res;
@@ -1392,10 +1388,14 @@ static bool kdbus_conn_policy_query_all(struct kdbus_conn *conn,
down_read(&db->entries_rwlock);
mutex_lock(&whom->lock);
- list_for_each_entry(ne, &whom->names_list, conn_entry) {
- res = kdbus_policy_query_unlocked(db, conn_creds ? : conn->cred,
- ne->name,
- kdbus_strhash(ne->name));
+ list_for_each_entry(owner, &whom->names_list, conn_entry) {
+ if (owner->flags & KDBUS_NAME_IN_QUEUE)
+ continue;
+
+ res = kdbus_policy_query_unlocked(db,
+ conn_creds ? : conn->cred,
+ owner->name->name,
+ kdbus_strhash(owner->name->name));
if (res >= (int)access) {
pass = true;
break;
@@ -1713,6 +1713,7 @@ int kdbus_cmd_conn_info(struct kdbus_conn *conn, void __user *argp)
struct kdbus_meta_conn *conn_meta = NULL;
struct kdbus_pool_slice *slice = NULL;
struct kdbus_name_entry *entry = NULL;
+ struct kdbus_name_owner *owner = NULL;
struct kdbus_conn *owner_conn = NULL;
struct kdbus_item *meta_items = NULL;
struct kdbus_info info = {};
@@ -1749,15 +1750,17 @@ int kdbus_cmd_conn_info(struct kdbus_conn *conn, void __user *argp)
if (name) {
entry = kdbus_name_lookup_unlocked(bus->name_registry, name);
- if (!entry || !entry->conn ||
+ if (entry)
+ owner = kdbus_name_get_owner(entry);
+ if (!owner ||
!kdbus_conn_policy_see_name(conn, current_cred(), name) ||
- (cmd->id != 0 && entry->conn->id != cmd->id)) {
+ (cmd->id != 0 && owner->conn->id != cmd->id)) {
/* pretend a name doesn't exist if you cannot see it */
ret = -ESRCH;
goto exit;
}
- owner_conn = kdbus_conn_ref(entry->conn);
+ owner_conn = kdbus_conn_ref(owner->conn);
} else if (cmd->id > 0) {
owner_conn = kdbus_bus_find_conn_by_id(bus, cmd->id);
if (!owner_conn || !kdbus_conn_policy_see(conn, current_cred(),
diff --git a/ipc/kdbus/connection.h b/ipc/kdbus/connection.h
index 8e0180ace..1ad082014 100644
--- a/ipc/kdbus/connection.h
+++ b/ipc/kdbus/connection.h
@@ -30,6 +30,7 @@
KDBUS_HELLO_POLICY_HOLDER | \
KDBUS_HELLO_MONITOR)
+struct kdbus_name_entry;
struct kdbus_quota;
struct kdbus_staging;
@@ -61,7 +62,6 @@ struct kdbus_staging;
* @cred: The credentials of the connection at creation time
* @pid: Pid at creation time
* @root_path: Root path at creation time
- * @name_count: Number of owned well-known names
* @request_count: Number of pending requests issued by this
* connection that are waiting for replies from
* other peers
@@ -70,9 +70,8 @@ struct kdbus_staging;
* @queue: The message queue associated with this connection
* @quota: Array of per-user quota indexed by user->id
* @n_quota: Number of elements in quota array
- * @activator_of: Well-known name entry this connection acts as an
* @names_list: List of well-known names
- * @names_queue_list: Well-known names this connection waits for
+ * @name_count: Number of owned well-known names
* @privileged: Whether this connection is privileged on the domain
* @owner: Owned by the same user as the bus owner
*/
@@ -102,7 +101,6 @@ struct kdbus_conn {
const struct cred *cred;
struct pid *pid;
struct path root_path;
- atomic_t name_count;
atomic_t request_count;
atomic_t lost_count;
wait_queue_head_t wait;
@@ -112,9 +110,8 @@ struct kdbus_conn {
unsigned int n_quota;
/* protected by registry->rwlock */
- struct kdbus_name_entry *activator_of;
struct list_head names_list;
- struct list_head names_queue_list;
+ unsigned int name_count;
bool privileged:1;
bool owner:1;
diff --git a/ipc/kdbus/domain.c b/ipc/kdbus/domain.c
index ac9f760c1..5d52d009d 100644
--- a/ipc/kdbus/domain.c
+++ b/ipc/kdbus/domain.c
@@ -60,7 +60,7 @@ static struct kdbus_node *kdbus_domain_control_new(struct kdbus_domain *domain,
return node;
exit_free:
- kdbus_node_deactivate(node);
+ kdbus_node_drain(node);
kdbus_node_unref(node);
return ERR_PTR(ret);
}
@@ -114,7 +114,7 @@ struct kdbus_domain *kdbus_domain_new(unsigned int access)
return d;
exit_unref:
- kdbus_node_deactivate(&d->node);
+ kdbus_node_drain(&d->node);
kdbus_node_unref(&d->node);
return ERR_PTR(ret);
}
diff --git a/ipc/kdbus/endpoint.c b/ipc/kdbus/endpoint.c
index 44e7a20de..5694ff6dc 100644
--- a/ipc/kdbus/endpoint.c
+++ b/ipc/kdbus/endpoint.c
@@ -146,7 +146,7 @@ struct kdbus_ep *kdbus_ep_new(struct kdbus_bus *bus, const char *name,
return e;
exit_unref:
- kdbus_node_deactivate(&e->node);
+ kdbus_node_drain(&e->node);
kdbus_node_unref(&e->node);
return ERR_PTR(ret);
}
@@ -262,7 +262,7 @@ exit:
ret = kdbus_args_clear(&args, ret);
if (ret < 0) {
if (ep) {
- kdbus_node_deactivate(&ep->node);
+ kdbus_node_drain(&ep->node);
kdbus_ep_unref(ep);
}
return ERR_PTR(ret);
diff --git a/ipc/kdbus/fs.c b/ipc/kdbus/fs.c
index 09c480924..6330c61e5 100644
--- a/ipc/kdbus/fs.c
+++ b/ipc/kdbus/fs.c
@@ -320,7 +320,7 @@ static void fs_super_kill(struct super_block *sb)
struct kdbus_domain *domain = sb->s_fs_info;
if (domain) {
- kdbus_node_deactivate(&domain->node);
+ kdbus_node_drain(&domain->node);
domain->dentry = NULL;
}
@@ -353,7 +353,7 @@ static struct dentry *fs_super_mount(struct file_system_type *fs_type,
sb = sget(fs_type, NULL, fs_super_set, flags, domain);
if (IS_ERR(sb)) {
- kdbus_node_deactivate(&domain->node);
+ kdbus_node_drain(&domain->node);
kdbus_domain_unref(domain);
return ERR_CAST(sb);
}
diff --git a/ipc/kdbus/handle.c b/ipc/kdbus/handle.c
index fc60932d6..2f82c2a32 100644
--- a/ipc/kdbus/handle.c
+++ b/ipc/kdbus/handle.c
@@ -310,13 +310,13 @@ static int kdbus_handle_release(struct inode *inode, struct file *file)
switch (handle->type) {
case KDBUS_HANDLE_BUS_OWNER:
if (handle->bus_owner) {
- kdbus_node_deactivate(&handle->bus_owner->node);
+ kdbus_node_drain(&handle->bus_owner->node);
kdbus_bus_unref(handle->bus_owner);
}
break;
case KDBUS_HANDLE_EP_OWNER:
if (handle->ep_owner) {
- kdbus_node_deactivate(&handle->ep_owner->node);
+ kdbus_node_drain(&handle->ep_owner->node);
kdbus_ep_unref(handle->ep_owner);
}
break;
diff --git a/ipc/kdbus/limits.h b/ipc/kdbus/limits.h
index c54925a25..bd47119cd 100644
--- a/ipc/kdbus/limits.h
+++ b/ipc/kdbus/limits.h
@@ -41,7 +41,7 @@
#define KDBUS_SYSNAME_MAX_LEN 63
/* maximum number of matches per connection */
-#define KDBUS_MATCH_MAX 256
+#define KDBUS_MATCH_MAX 4096
/* maximum number of queued messages from the same individual user */
#define KDBUS_CONN_MAX_MSGS 256
diff --git a/ipc/kdbus/main.c b/ipc/kdbus/main.c
index 1ad4dc8da..c2117ea53 100644
--- a/ipc/kdbus/main.c
+++ b/ipc/kdbus/main.c
@@ -75,16 +75,13 @@
* 'ยป struct kdbus_ep *ep (owned)
*/
-/* kdbus mount-point /sys/fs/kdbus */
-static struct kobject *kdbus_dir;
-
static int __init kdbus_init(void)
{
int ret;
- kdbus_dir = kobject_create_and_add(KBUILD_MODNAME, fs_kobj);
- if (!kdbus_dir)
- return -ENOMEM;
+ ret = sysfs_create_mount_point(fs_kobj, KBUILD_MODNAME);
+ if (ret)
+ return ret;
ret = kdbus_fs_init();
if (ret < 0) {
@@ -96,14 +93,14 @@ static int __init kdbus_init(void)
return 0;
exit_dir:
- kobject_put(kdbus_dir);
+ sysfs_remove_mount_point(fs_kobj, KBUILD_MODNAME);
return ret;
}
static void __exit kdbus_exit(void)
{
kdbus_fs_exit();
- kobject_put(kdbus_dir);
+ sysfs_remove_mount_point(fs_kobj, KBUILD_MODNAME);
ida_destroy(&kdbus_node_ida);
}
diff --git a/ipc/kdbus/metadata.c b/ipc/kdbus/metadata.c
index d4973a90a..71ca475a8 100644
--- a/ipc/kdbus/metadata.c
+++ b/ipc/kdbus/metadata.c
@@ -603,7 +603,7 @@ static void kdbus_meta_conn_collect_timestamp(struct kdbus_meta_conn *mc,
static int kdbus_meta_conn_collect_names(struct kdbus_meta_conn *mc,
struct kdbus_conn *conn)
{
- const struct kdbus_name_entry *e;
+ const struct kdbus_name_owner *owner;
struct kdbus_item *item;
size_t slen, size;
@@ -611,9 +611,11 @@ static int kdbus_meta_conn_collect_names(struct kdbus_meta_conn *mc,
size = 0;
/* open-code length calculation to avoid final padding */
- list_for_each_entry(e, &conn->names_list, conn_entry)
- size = KDBUS_ALIGN8(size) + KDBUS_ITEM_HEADER_SIZE +
- sizeof(struct kdbus_name) + strlen(e->name) + 1;
+ list_for_each_entry(owner, &conn->names_list, conn_entry)
+ if (!(owner->flags & KDBUS_NAME_IN_QUEUE))
+ size = KDBUS_ALIGN8(size) + KDBUS_ITEM_HEADER_SIZE +
+ sizeof(struct kdbus_name) +
+ strlen(owner->name->name) + 1;
if (!size)
return 0;
@@ -626,12 +628,15 @@ static int kdbus_meta_conn_collect_names(struct kdbus_meta_conn *mc,
mc->owned_names_items = item;
mc->owned_names_size = size;
- list_for_each_entry(e, &conn->names_list, conn_entry) {
- slen = strlen(e->name) + 1;
+ list_for_each_entry(owner, &conn->names_list, conn_entry) {
+ if (owner->flags & KDBUS_NAME_IN_QUEUE)
+ continue;
+
+ slen = strlen(owner->name->name) + 1;
kdbus_item_set(item, KDBUS_ITEM_OWNED_NAME, NULL,
sizeof(struct kdbus_name) + slen);
- item->name.flags = e->flags;
- memcpy(item->name.name, e->name, slen);
+ item->name.flags = owner->flags;
+ memcpy(item->name.name, owner->name->name, slen);
item = KDBUS_ITEM_NEXT(item);
}
diff --git a/ipc/kdbus/names.c b/ipc/kdbus/names.c
index 057f8061c..bf44ca3f1 100644
--- a/ipc/kdbus/names.c
+++ b/ipc/kdbus/names.c
@@ -34,167 +34,128 @@
#include "notify.h"
#include "policy.h"
-struct kdbus_name_pending {
- u64 flags;
- struct kdbus_conn *conn;
- struct kdbus_name_entry *name;
- struct list_head conn_entry;
- struct list_head name_entry;
-};
+#define KDBUS_NAME_SAVED_MASK (KDBUS_NAME_ALLOW_REPLACEMENT | \
+ KDBUS_NAME_QUEUE)
-static int kdbus_name_pending_new(struct kdbus_name_entry *e,
- struct kdbus_conn *conn, u64 flags)
+static bool kdbus_name_owner_is_used(struct kdbus_name_owner *owner)
{
- struct kdbus_name_pending *p;
-
- kdbus_conn_assert_active(conn);
-
- p = kmalloc(sizeof(*p), GFP_KERNEL);
- if (!p)
- return -ENOMEM;
-
- p->flags = flags;
- p->conn = conn;
- p->name = e;
- list_add_tail(&p->conn_entry, &conn->names_queue_list);
- list_add_tail(&p->name_entry, &e->queue);
-
- return 0;
+ return !list_empty(&owner->name_entry) ||
+ owner == owner->name->activator;
}
-static void kdbus_name_pending_free(struct kdbus_name_pending *p)
+static struct kdbus_name_owner *
+kdbus_name_owner_new(struct kdbus_conn *conn, struct kdbus_name_entry *name,
+ u64 flags)
{
- if (!p)
- return;
+ struct kdbus_name_owner *owner;
- list_del(&p->name_entry);
- list_del(&p->conn_entry);
- kfree(p);
-}
-
-static struct kdbus_name_entry *
-kdbus_name_entry_new(struct kdbus_name_registry *r, u32 hash, const char *name)
-{
- struct kdbus_name_entry *e;
- size_t namelen;
+ kdbus_conn_assert_active(conn);
- namelen = strlen(name);
+ if (conn->name_count >= KDBUS_CONN_MAX_NAMES)
+ return ERR_PTR(-E2BIG);
- e = kmalloc(sizeof(*e) + namelen + 1, GFP_KERNEL);
- if (!e)
+ owner = kmalloc(sizeof(*owner), GFP_KERNEL);
+ if (!owner)
return ERR_PTR(-ENOMEM);
- e->name_id = ++r->name_seq_last;
- e->flags = 0;
- e->conn = NULL;
- e->activator = NULL;
- INIT_LIST_HEAD(&e->queue);
- INIT_LIST_HEAD(&e->conn_entry);
- hash_add(r->entries_hash, &e->hentry, hash);
- memcpy(e->name, name, namelen + 1);
+ owner->flags = flags & KDBUS_NAME_SAVED_MASK;
+ owner->conn = conn;
+ owner->name = name;
+ list_add_tail(&owner->conn_entry, &conn->names_list);
+ INIT_LIST_HEAD(&owner->name_entry);
- return e;
+ ++conn->name_count;
+ return owner;
}
-static void kdbus_name_entry_free(struct kdbus_name_entry *e)
+static void kdbus_name_owner_free(struct kdbus_name_owner *owner)
{
- if (!e)
+ if (!owner)
return;
- WARN_ON(!list_empty(&e->conn_entry));
- WARN_ON(!list_empty(&e->queue));
- WARN_ON(e->activator);
- WARN_ON(e->conn);
-
- hash_del(&e->hentry);
- kfree(e);
+ WARN_ON(kdbus_name_owner_is_used(owner));
+ --owner->conn->name_count;
+ list_del(&owner->conn_entry);
+ kfree(owner);
}
-static void kdbus_name_entry_set_owner(struct kdbus_name_entry *e,
- struct kdbus_conn *conn, u64 flags)
+static struct kdbus_name_owner *
+kdbus_name_owner_find(struct kdbus_name_entry *name, struct kdbus_conn *conn)
{
- WARN_ON(e->conn);
+ struct kdbus_name_owner *owner;
- e->conn = kdbus_conn_ref(conn);
- e->flags = flags;
- atomic_inc(&conn->name_count);
- list_add_tail(&e->conn_entry, &e->conn->names_list);
+ /*
+ * Use conn->names_list over name->queue to make sure boundaries of
+ * this linear search are controlled by the connection itself.
+ * Furthermore, this will find normal owners as well as activators
+ * without any additional code.
+ */
+ list_for_each_entry(owner, &conn->names_list, conn_entry)
+ if (owner->name == name)
+ return owner;
+
+ return NULL;
}
-static void kdbus_name_entry_remove_owner(struct kdbus_name_entry *e)
+static bool kdbus_name_entry_is_used(struct kdbus_name_entry *name)
{
- WARN_ON(!e->conn);
-
- list_del_init(&e->conn_entry);
- atomic_dec(&e->conn->name_count);
- e->flags = 0;
- e->conn = kdbus_conn_unref(e->conn);
+ return !list_empty(&name->queue) || name->activator;
}
-static void kdbus_name_entry_replace_owner(struct kdbus_name_entry *e,
- struct kdbus_conn *conn, u64 flags)
+static struct kdbus_name_owner *
+kdbus_name_entry_first(struct kdbus_name_entry *name)
{
- if (WARN_ON(!e->conn) || WARN_ON(conn == e->conn))
- return;
-
- kdbus_notify_name_change(conn->ep->bus, KDBUS_ITEM_NAME_CHANGE,
- e->conn->id, conn->id,
- e->flags, flags, e->name);
- kdbus_name_entry_remove_owner(e);
- kdbus_name_entry_set_owner(e, conn, flags);
+ return list_first_entry_or_null(&name->queue, struct kdbus_name_owner,
+ name_entry);
}
-/**
- * kdbus_name_is_valid() - check if a name is valid
- * @p: The name to check
- * @allow_wildcard: Whether or not to allow a wildcard name
- *
- * A name is valid if all of the following criterias are met:
- *
- * - The name has two or more elements separated by a period ('.') character.
- * - All elements must contain at least one character.
- * - Each element must only contain the ASCII characters "[A-Z][a-z][0-9]_-"
- * and must not begin with a digit.
- * - The name must not exceed KDBUS_NAME_MAX_LEN.
- * - If @allow_wildcard is true, the name may end on '.*'
- */
-bool kdbus_name_is_valid(const char *p, bool allow_wildcard)
+static struct kdbus_name_entry *
+kdbus_name_entry_new(struct kdbus_name_registry *r, u32 hash,
+ const char *name_str)
{
- bool dot, found_dot = false;
- const char *q;
+ struct kdbus_name_entry *name;
+ size_t namelen;
- for (dot = true, q = p; *q; q++) {
- if (*q == '.') {
- if (dot)
- return false;
+ lockdep_assert_held(&r->rwlock);
- found_dot = true;
- dot = true;
- } else {
- bool good;
+ namelen = strlen(name_str);
- good = isalpha(*q) || (!dot && isdigit(*q)) ||
- *q == '_' || *q == '-' ||
- (allow_wildcard && dot &&
- *q == '*' && *(q + 1) == '\0');
+ name = kmalloc(sizeof(*name) + namelen + 1, GFP_KERNEL);
+ if (!name)
+ return ERR_PTR(-ENOMEM);
- if (!good)
- return false;
+ name->name_id = ++r->name_seq_last;
+ name->activator = NULL;
+ INIT_LIST_HEAD(&name->queue);
+ hash_add(r->entries_hash, &name->hentry, hash);
+ memcpy(name->name, name_str, namelen + 1);
- dot = false;
- }
- }
+ return name;
+}
- if (q - p > KDBUS_NAME_MAX_LEN)
- return false;
+static void kdbus_name_entry_free(struct kdbus_name_entry *name)
+{
+ if (!name)
+ return;
- if (dot)
- return false;
+ WARN_ON(kdbus_name_entry_is_used(name));
+ hash_del(&name->hentry);
+ kfree(name);
+}
- if (!found_dot)
- return false;
+static struct kdbus_name_entry *
+kdbus_name_entry_find(struct kdbus_name_registry *r, u32 hash,
+ const char *name_str)
+{
+ struct kdbus_name_entry *name;
- return true;
+ lockdep_assert_held(&r->rwlock);
+
+ hash_for_each_possible(r->entries_hash, name, hentry, hash)
+ if (!strcmp(name->name, name_str))
+ return name;
+
+ return NULL;
}
/**
@@ -218,32 +179,19 @@ struct kdbus_name_registry *kdbus_name_registry_new(void)
}
/**
- * kdbus_name_registry_free() - drop a name reg's reference
- * @reg: The name registry, may be %NULL
+ * kdbus_name_registry_free() - free name registry
+ * @r: name registry to free, or NULL
*
- * Cleanup the name registry's internal structures.
+ * Free a name registry and cleanup all internal objects. This is a no-op if
+ * you pass NULL as registry.
*/
-void kdbus_name_registry_free(struct kdbus_name_registry *reg)
+void kdbus_name_registry_free(struct kdbus_name_registry *r)
{
- if (!reg)
+ if (!r)
return;
- WARN_ON(!hash_empty(reg->entries_hash));
- kfree(reg);
-}
-
-static struct kdbus_name_entry *
-kdbus_name_find(struct kdbus_name_registry *reg, u32 hash, const char *name)
-{
- struct kdbus_name_entry *e;
-
- lockdep_assert_held(&reg->rwlock);
-
- hash_for_each_possible(reg->entries_hash, e, hentry, hash)
- if (strcmp(e->name, name) == 0)
- return e;
-
- return NULL;
+ WARN_ON(!hash_empty(r->entries_hash));
+ kfree(r);
}
/**
@@ -260,169 +208,286 @@ kdbus_name_find(struct kdbus_name_registry *reg, u32 hash, const char *name)
struct kdbus_name_entry *
kdbus_name_lookup_unlocked(struct kdbus_name_registry *reg, const char *name)
{
- return kdbus_name_find(reg, kdbus_strhash(name), name);
+ return kdbus_name_entry_find(reg, kdbus_strhash(name), name);
}
-/**
- * kdbus_name_acquire() - acquire a name
- * @reg: The name registry
- * @conn: The connection to pin this entry to
- * @name: The name to acquire
- * @flags: Acquisition flags (KDBUS_NAME_*)
- * @return_flags: Pointer to return flags for the acquired name
- * (KDBUS_NAME_*), may be %NULL
- *
- * Callers must ensure that @conn is either a privileged bus user or has
- * sufficient privileges in the policy-db to own the well-known name @name.
- *
- * Return: 0 success, negative error number on failure.
- */
-int kdbus_name_acquire(struct kdbus_name_registry *reg,
- struct kdbus_conn *conn, const char *name,
- u64 flags, u64 *return_flags)
+static int kdbus_name_become_activator(struct kdbus_name_owner *owner,
+ u64 *return_flags)
{
- struct kdbus_name_entry *e;
- u64 rflags = 0;
- int ret = 0;
- u32 hash;
+ if (kdbus_name_owner_is_used(owner))
+ return -EALREADY;
+ if (owner->name->activator)
+ return -EEXIST;
- kdbus_conn_assert_active(conn);
+ owner->name->activator = owner;
+ owner->flags |= KDBUS_NAME_ACTIVATOR;
- down_write(&reg->rwlock);
-
- if (!kdbus_conn_policy_own_name(conn, current_cred(), name)) {
- ret = -EPERM;
- goto exit_unlock;
+ if (kdbus_name_entry_first(owner->name)) {
+ owner->flags |= KDBUS_NAME_IN_QUEUE;
+ } else {
+ owner->flags |= KDBUS_NAME_PRIMARY;
+ kdbus_notify_name_change(owner->conn->ep->bus,
+ KDBUS_ITEM_NAME_ADD,
+ 0, owner->conn->id,
+ 0, owner->flags,
+ owner->name->name);
}
- hash = kdbus_strhash(name);
- e = kdbus_name_find(reg, hash, name);
- if (!e) {
- /* claim new name */
+ if (return_flags)
+ *return_flags = owner->flags | KDBUS_NAME_ACQUIRED;
- if (conn->activator_of) {
- ret = -EINVAL;
- goto exit_unlock;
- }
+ return 0;
+}
- e = kdbus_name_entry_new(reg, hash, name);
- if (IS_ERR(e)) {
- ret = PTR_ERR(e);
- goto exit_unlock;
- }
+static int kdbus_name_update(struct kdbus_name_owner *owner, u64 flags,
+ u64 *return_flags)
+{
+ struct kdbus_name_owner *primary, *activator;
+ struct kdbus_name_entry *name;
+ struct kdbus_bus *bus;
+ u64 nflags = 0;
+ int ret = 0;
- if (kdbus_conn_is_activator(conn)) {
- e->activator = kdbus_conn_ref(conn);
- conn->activator_of = e;
+ name = owner->name;
+ bus = owner->conn->ep->bus;
+ primary = kdbus_name_entry_first(name);
+ activator = name->activator;
+
+ /* cannot be activator and acquire a name */
+ if (owner == activator)
+ return -EUCLEAN;
+
+ /* update saved flags */
+ owner->flags = flags & KDBUS_NAME_SAVED_MASK;
+
+ if (!primary) {
+ /*
+ * No primary owner (but maybe an activator). Take over the
+ * name.
+ */
+
+ list_add(&owner->name_entry, &name->queue);
+ owner->flags |= KDBUS_NAME_PRIMARY;
+ nflags |= KDBUS_NAME_ACQUIRED;
+
+ /* move messages to new owner on activation */
+ if (activator) {
+ kdbus_conn_move_messages(owner->conn, activator->conn,
+ name->name_id);
+ kdbus_notify_name_change(bus, KDBUS_ITEM_NAME_CHANGE,
+ activator->conn->id, owner->conn->id,
+ activator->flags, owner->flags,
+ name->name);
+ activator->flags &= ~KDBUS_NAME_PRIMARY;
+ activator->flags |= KDBUS_NAME_IN_QUEUE;
+ } else {
+ kdbus_notify_name_change(bus, KDBUS_ITEM_NAME_ADD,
+ 0, owner->conn->id,
+ 0, owner->flags,
+ name->name);
}
- kdbus_name_entry_set_owner(e, conn, flags);
- kdbus_notify_name_change(e->conn->ep->bus, KDBUS_ITEM_NAME_ADD,
- 0, e->conn->id, 0, e->flags, e->name);
- } else if (e->conn == conn || e == conn->activator_of) {
- /* connection already owns that name */
- ret = -EALREADY;
- } else if (kdbus_conn_is_activator(conn)) {
- /* activator claims existing name */
-
- if (conn->activator_of) {
- ret = -EINVAL; /* multiple names not allowed */
- } else if (e->activator) {
- ret = -EEXIST; /* only one activator per name */
+ } else if (owner == primary) {
+ /*
+ * Already the primary owner of the name, flags were already
+ * updated. Nothing to do.
+ */
+
+ owner->flags |= KDBUS_NAME_PRIMARY;
+
+ } else if ((primary->flags & KDBUS_NAME_ALLOW_REPLACEMENT) &&
+ (flags & KDBUS_NAME_REPLACE_EXISTING)) {
+ /*
+ * We're not the primary owner but can replace it. Move us
+ * ahead of the primary owner and acquire the name (possibly
+ * skipping queued owners ahead of us).
+ */
+
+ list_del_init(&owner->name_entry);
+ list_add(&owner->name_entry, &name->queue);
+ owner->flags |= KDBUS_NAME_PRIMARY;
+ nflags |= KDBUS_NAME_ACQUIRED;
+
+ kdbus_notify_name_change(bus, KDBUS_ITEM_NAME_CHANGE,
+ primary->conn->id, owner->conn->id,
+ primary->flags, owner->flags,
+ name->name);
+
+ /* requeue old primary, or drop if queueing not wanted */
+ if (primary->flags & KDBUS_NAME_QUEUE) {
+ primary->flags &= ~KDBUS_NAME_PRIMARY;
+ primary->flags |= KDBUS_NAME_IN_QUEUE;
} else {
- e->activator = kdbus_conn_ref(conn);
- conn->activator_of = e;
- }
- } else if (e->flags & KDBUS_NAME_ACTIVATOR) {
- /* claim name of an activator */
-
- kdbus_conn_move_messages(conn, e->activator, 0);
- kdbus_name_entry_replace_owner(e, conn, flags);
- } else if ((flags & KDBUS_NAME_REPLACE_EXISTING) &&
- (e->flags & KDBUS_NAME_ALLOW_REPLACEMENT)) {
- /* claim name of a previous owner */
-
- if (e->flags & KDBUS_NAME_QUEUE) {
- /* move owner back to queue if they asked for it */
- ret = kdbus_name_pending_new(e, e->conn, e->flags);
- if (ret < 0)
- goto exit_unlock;
+ list_del_init(&primary->name_entry);
+ kdbus_name_owner_free(primary);
}
- kdbus_name_entry_replace_owner(e, conn, flags);
} else if (flags & KDBUS_NAME_QUEUE) {
- /* add to waiting-queue of the name */
-
- ret = kdbus_name_pending_new(e, conn, flags);
- if (ret >= 0)
- /* tell the caller that we queued it */
- rflags |= KDBUS_NAME_IN_QUEUE;
+ /*
+ * Name is already occupied and we cannot take it over, but
+ * queuing is allowed. Put us silently on the queue, if not
+ * already there.
+ */
+
+ owner->flags |= KDBUS_NAME_IN_QUEUE;
+ if (!kdbus_name_owner_is_used(owner)) {
+ list_add_tail(&owner->name_entry, &name->queue);
+ nflags |= KDBUS_NAME_ACQUIRED;
+ }
+ } else if (kdbus_name_owner_is_used(owner)) {
+ /*
+ * Already queued on name, but re-queueing was not requested.
+ * Make sure to unlink it from the name, the caller is
+ * responsible for releasing it.
+ */
+
+ list_del_init(&owner->name_entry);
} else {
- /* the name is busy, return a failure */
+ /*
+ * Name is already claimed and queueing is not requested.
+ * Return error to the caller.
+ */
+
ret = -EEXIST;
}
- if (ret == 0 && return_flags)
- *return_flags = rflags;
+ if (return_flags)
+ *return_flags = owner->flags | nflags;
-exit_unlock:
+ return ret;
+}
+
+int kdbus_name_acquire(struct kdbus_name_registry *reg,
+ struct kdbus_conn *conn, const char *name_str,
+ u64 flags, u64 *return_flags)
+{
+ struct kdbus_name_entry *name = NULL;
+ struct kdbus_name_owner *owner = NULL;
+ u32 hash;
+ int ret;
+
+ kdbus_conn_assert_active(conn);
+
+ down_write(&reg->rwlock);
+
+ /*
+ * Verify the connection has access to the name. Do this before testing
+ * for double-acquisitions and other errors to make sure we do not leak
+ * information about this name through possible custom endpoints.
+ */
+ if (!kdbus_conn_policy_own_name(conn, current_cred(), name_str)) {
+ ret = -EPERM;
+ goto exit;
+ }
+
+ /*
+ * Lookup the name entry. If it already exists, search for an owner
+ * entry as we might already own that name. If either does not exist,
+ * we will allocate a fresh one.
+ */
+ hash = kdbus_strhash(name_str);
+ name = kdbus_name_entry_find(reg, hash, name_str);
+ if (name) {
+ owner = kdbus_name_owner_find(name, conn);
+ } else {
+ name = kdbus_name_entry_new(reg, hash, name_str);
+ if (IS_ERR(name)) {
+ ret = PTR_ERR(name);
+ name = NULL;
+ goto exit;
+ }
+ }
+
+ /* create name owner object if not already queued */
+ if (!owner) {
+ owner = kdbus_name_owner_new(conn, name, flags);
+ if (IS_ERR(owner)) {
+ ret = PTR_ERR(owner);
+ owner = NULL;
+ goto exit;
+ }
+ }
+
+ if (flags & KDBUS_NAME_ACTIVATOR)
+ ret = kdbus_name_become_activator(owner, return_flags);
+ else
+ ret = kdbus_name_update(owner, flags, return_flags);
+ if (ret < 0)
+ goto exit;
+
+exit:
+ if (owner && !kdbus_name_owner_is_used(owner))
+ kdbus_name_owner_free(owner);
+ if (name && !kdbus_name_entry_is_used(name))
+ kdbus_name_entry_free(name);
up_write(&reg->rwlock);
kdbus_notify_flush(conn->ep->bus);
return ret;
}
-static void kdbus_name_release_unlocked(struct kdbus_name_registry *reg,
- struct kdbus_name_entry *e)
+static void kdbus_name_release_unlocked(struct kdbus_name_owner *owner)
{
- struct kdbus_name_pending *p;
-
- lockdep_assert_held(&reg->rwlock);
-
- p = list_first_entry_or_null(&e->queue, struct kdbus_name_pending,
- name_entry);
-
- if (p) {
- /* give it to first active waiter in the queue */
- kdbus_name_entry_replace_owner(e, p->conn, p->flags);
- kdbus_name_pending_free(p);
- } else if (e->activator && e->activator != e->conn) {
- /* hand it back to an active activator connection */
- kdbus_conn_move_messages(e->activator, e->conn, e->name_id);
- kdbus_name_entry_replace_owner(e, e->activator,
- KDBUS_NAME_ACTIVATOR);
- } else {
- /* release the name */
- kdbus_notify_name_change(e->conn->ep->bus,
- KDBUS_ITEM_NAME_REMOVE,
- e->conn->id, 0, e->flags, 0, e->name);
- kdbus_name_entry_remove_owner(e);
- kdbus_name_entry_free(e);
+ struct kdbus_name_owner *primary, *next;
+ struct kdbus_name_entry *name;
+
+ name = owner->name;
+ primary = kdbus_name_entry_first(name);
+
+ list_del_init(&owner->name_entry);
+ if (owner == name->activator)
+ name->activator = NULL;
+
+ if (!primary || owner == primary) {
+ next = kdbus_name_entry_first(name);
+ if (!next)
+ next = name->activator;
+
+ if (next) {
+ /* hand to next in queue */
+ next->flags &= ~KDBUS_NAME_IN_QUEUE;
+ next->flags |= KDBUS_NAME_PRIMARY;
+ if (next == name->activator)
+ kdbus_conn_move_messages(next->conn,
+ owner->conn,
+ name->name_id);
+
+ kdbus_notify_name_change(owner->conn->ep->bus,
+ KDBUS_ITEM_NAME_CHANGE,
+ owner->conn->id, next->conn->id,
+ owner->flags, next->flags,
+ name->name);
+ } else {
+ kdbus_notify_name_change(owner->conn->ep->bus,
+ KDBUS_ITEM_NAME_REMOVE,
+ owner->conn->id, 0,
+ owner->flags, 0,
+ name->name);
+ }
}
+
+ kdbus_name_owner_free(owner);
+ if (!kdbus_name_entry_is_used(name))
+ kdbus_name_entry_free(name);
}
static int kdbus_name_release(struct kdbus_name_registry *reg,
struct kdbus_conn *conn,
- const char *name)
+ const char *name_str)
{
- struct kdbus_name_pending *p;
- struct kdbus_name_entry *e;
+ struct kdbus_name_owner *owner;
+ struct kdbus_name_entry *name;
int ret = 0;
down_write(&reg->rwlock);
- e = kdbus_name_find(reg, kdbus_strhash(name), name);
- if (!e) {
- ret = -ESRCH;
- } else if (e->conn == conn) {
- kdbus_name_release_unlocked(reg, e);
+ name = kdbus_name_entry_find(reg, kdbus_strhash(name_str), name_str);
+ if (name) {
+ owner = kdbus_name_owner_find(name, conn);
+ if (owner)
+ kdbus_name_release_unlocked(owner);
+ else
+ ret = -EADDRINUSE;
} else {
- ret = -EADDRINUSE;
- list_for_each_entry(p, &e->queue, name_entry) {
- if (p->conn == conn) {
- kdbus_name_pending_free(p);
- ret = 0;
- break;
- }
- }
+ ret = -ESRCH;
}
up_write(&reg->rwlock);
@@ -438,33 +503,74 @@ static int kdbus_name_release(struct kdbus_name_registry *reg,
void kdbus_name_release_all(struct kdbus_name_registry *reg,
struct kdbus_conn *conn)
{
- struct kdbus_name_pending *p;
- struct kdbus_conn *activator = NULL;
- struct kdbus_name_entry *e;
+ struct kdbus_name_owner *owner;
down_write(&reg->rwlock);
- if (conn->activator_of) {
- activator = conn->activator_of->activator;
- conn->activator_of->activator = NULL;
- }
-
- while ((p = list_first_entry_or_null(&conn->names_queue_list,
- struct kdbus_name_pending,
- conn_entry)))
- kdbus_name_pending_free(p);
- while ((e = list_first_entry_or_null(&conn->names_list,
- struct kdbus_name_entry,
- conn_entry)))
- kdbus_name_release_unlocked(reg, e);
+ while ((owner = list_first_entry_or_null(&conn->names_list,
+ struct kdbus_name_owner,
+ conn_entry)))
+ kdbus_name_release_unlocked(owner);
up_write(&reg->rwlock);
- kdbus_conn_unref(activator);
kdbus_notify_flush(conn->ep->bus);
}
/**
+ * kdbus_name_is_valid() - check if a name is valid
+ * @p: The name to check
+ * @allow_wildcard: Whether or not to allow a wildcard name
+ *
+ * A name is valid if all of the following criterias are met:
+ *
+ * - The name has two or more elements separated by a period ('.') character.
+ * - All elements must contain at least one character.
+ * - Each element must only contain the ASCII characters "[A-Z][a-z][0-9]_-"
+ * and must not begin with a digit.
+ * - The name must not exceed KDBUS_NAME_MAX_LEN.
+ * - If @allow_wildcard is true, the name may end on '.*'
+ */
+bool kdbus_name_is_valid(const char *p, bool allow_wildcard)
+{
+ bool dot, found_dot = false;
+ const char *q;
+
+ for (dot = true, q = p; *q; q++) {
+ if (*q == '.') {
+ if (dot)
+ return false;
+
+ found_dot = true;
+ dot = true;
+ } else {
+ bool good;
+
+ good = isalpha(*q) || (!dot && isdigit(*q)) ||
+ *q == '_' || *q == '-' ||
+ (allow_wildcard && dot &&
+ *q == '*' && *(q + 1) == '\0');
+
+ if (!good)
+ return false;
+
+ dot = false;
+ }
+ }
+
+ if (q - p > KDBUS_NAME_MAX_LEN)
+ return false;
+
+ if (dot)
+ return false;
+
+ if (!found_dot)
+ return false;
+
+ return true;
+}
+
+/**
* kdbus_cmd_name_acquire() - handle KDBUS_CMD_NAME_ACQUIRE
* @conn: connection to operate on
* @argp: command payload
@@ -503,20 +609,9 @@ int kdbus_cmd_name_acquire(struct kdbus_conn *conn, void __user *argp)
goto exit;
}
- /*
- * Do atomic_inc_return here to reserve our slot, then decrement
- * it before returning.
- */
- if (atomic_inc_return(&conn->name_count) > KDBUS_CONN_MAX_NAMES) {
- ret = -E2BIG;
- goto exit_dec;
- }
-
ret = kdbus_name_acquire(conn->ep->bus->name_registry, conn, item_name,
cmd->flags, &cmd->return_flags);
-exit_dec:
- atomic_dec(&conn->name_count);
exit:
return kdbus_args_clear(&args, ret);
}
@@ -559,7 +654,7 @@ static int kdbus_list_write(struct kdbus_conn *conn,
struct kdbus_conn *c,
struct kdbus_pool_slice *slice,
size_t *pos,
- struct kdbus_name_entry *e,
+ struct kdbus_name_owner *o,
bool write)
{
struct kvec kvec[4];
@@ -580,22 +675,22 @@ static int kdbus_list_write(struct kdbus_conn *conn,
u64 flags;
} h = {};
- if (e && !kdbus_conn_policy_see_name_unlocked(conn, current_cred(),
- e->name))
+ if (o && !kdbus_conn_policy_see_name_unlocked(conn, current_cred(),
+ o->name->name))
return 0;
kdbus_kvec_set(&kvec[cnt++], &info, sizeof(info), &info.size);
/* append name */
- if (e) {
- size_t slen = strlen(e->name) + 1;
+ if (o) {
+ size_t slen = strlen(o->name->name) + 1;
h.size = offsetof(struct kdbus_item, name.name) + slen;
h.type = KDBUS_ITEM_OWNED_NAME;
- h.flags = e->flags;
+ h.flags = o->flags;
kdbus_kvec_set(&kvec[cnt++], &h, sizeof(h), &info.size);
- kdbus_kvec_set(&kvec[cnt++], e->name, slen, &info.size);
+ kdbus_kvec_set(&kvec[cnt++], o->name->name, slen, &info.size);
cnt += !!kdbus_kvec_pad(&kvec[cnt], &info.size);
}
@@ -625,63 +720,52 @@ static int kdbus_list_all(struct kdbus_conn *conn, u64 flags,
if (kdbus_conn_is_monitor(c))
continue;
- /* skip activators */
- if (!(flags & KDBUS_LIST_ACTIVATORS) &&
- kdbus_conn_is_activator(c))
- continue;
-
/* all names the connection owns */
- if (flags & (KDBUS_LIST_NAMES | KDBUS_LIST_ACTIVATORS)) {
- struct kdbus_name_entry *e;
+ if (flags & (KDBUS_LIST_NAMES |
+ KDBUS_LIST_ACTIVATORS |
+ KDBUS_LIST_QUEUED)) {
+ struct kdbus_name_owner *o;
- list_for_each_entry(e, &c->names_list, conn_entry) {
- struct kdbus_conn *a = e->activator;
+ list_for_each_entry(o, &c->names_list, conn_entry) {
+ if (o->flags & KDBUS_NAME_ACTIVATOR) {
+ if (!(flags & KDBUS_LIST_ACTIVATORS))
+ continue;
- if ((flags & KDBUS_LIST_ACTIVATORS) &&
- a && a != c) {
- ret = kdbus_list_write(conn, a, slice,
- &p, e, write);
+ ret = kdbus_list_write(conn, c, slice,
+ &p, o, write);
if (ret < 0) {
mutex_unlock(&c->lock);
return ret;
}
added = true;
- }
+ } else if (o->flags & KDBUS_NAME_IN_QUEUE) {
+ if (!(flags & KDBUS_LIST_QUEUED))
+ continue;
- if (flags & KDBUS_LIST_NAMES ||
- kdbus_conn_is_activator(c)) {
ret = kdbus_list_write(conn, c, slice,
- &p, e, write);
+ &p, o, write);
if (ret < 0) {
mutex_unlock(&c->lock);
return ret;
}
added = true;
- }
- }
- }
+ } else if (flags & KDBUS_LIST_NAMES) {
+ ret = kdbus_list_write(conn, c, slice,
+ &p, o, write);
+ if (ret < 0) {
+ mutex_unlock(&c->lock);
+ return ret;
+ }
- /* queue of names the connection is currently waiting for */
- if (flags & KDBUS_LIST_QUEUED) {
- struct kdbus_name_pending *q;
-
- list_for_each_entry(q, &c->names_queue_list,
- conn_entry) {
- ret = kdbus_list_write(conn, c, slice, &p,
- q->name, write);
- if (ret < 0) {
- mutex_unlock(&c->lock);
- return ret;
+ added = true;
}
-
- added = true;
}
}
/* nothing added so far, just add the unique ID */
- if (!added && flags & KDBUS_LIST_UNIQUE) {
+ if (!added && (flags & KDBUS_LIST_UNIQUE)) {
ret = kdbus_list_write(conn, c, slice, &p, NULL, write);
if (ret < 0)
return ret;
diff --git a/ipc/kdbus/names.h b/ipc/kdbus/names.h
index 3dd258929..edac59ddd 100644
--- a/ipc/kdbus/names.h
+++ b/ipc/kdbus/names.h
@@ -18,6 +18,10 @@
#include <linux/hashtable.h>
#include <linux/rwsem.h>
+struct kdbus_name_entry;
+struct kdbus_name_owner;
+struct kdbus_name_registry;
+
/**
* struct kdbus_name_registry - names registered for a bus
* @entries_hash: Map of entries
@@ -32,27 +36,37 @@ struct kdbus_name_registry {
/**
* struct kdbus_name_entry - well-know name entry
- * @name_id: Sequence number of name entry to be able to uniquely
+ * @name_id: sequence number of name entry to be able to uniquely
* identify a name over its registration lifetime
- * @flags: KDBUS_NAME_* flags
- * @conn: Connection owning the name
- * @activator: Connection of the activator queuing incoming messages
- * @queue: List of queued connections
- * @conn_entry: Entry in connection
- * @hentry: Entry in registry map
- * @name: The well-known name
+ * @activator: activator of this name, or NULL
+ * @queue: list of queued owners
+ * @hentry: entry in registry map
+ * @name: well-known name
*/
struct kdbus_name_entry {
u64 name_id;
- u64 flags;
- struct kdbus_conn *conn;
- struct kdbus_conn *activator;
+ struct kdbus_name_owner *activator;
struct list_head queue;
- struct list_head conn_entry;
struct hlist_node hentry;
char name[];
};
+/**
+ * struct kdbus_name_owner - owner of a well-known name
+ * @flags: KDBUS_NAME_* flags of this owner
+ * @conn: connection owning the name
+ * @name: name that is owned
+ * @conn_entry: link into @conn
+ * @name_entry: link into @name
+ */
+struct kdbus_name_owner {
+ u64 flags;
+ struct kdbus_conn *conn;
+ struct kdbus_name_entry *name;
+ struct list_head conn_entry;
+ struct list_head name_entry;
+};
+
bool kdbus_name_is_valid(const char *p, bool allow_wildcard);
struct kdbus_name_registry *kdbus_name_registry_new(void);
@@ -71,4 +85,21 @@ int kdbus_cmd_name_acquire(struct kdbus_conn *conn, void __user *argp);
int kdbus_cmd_name_release(struct kdbus_conn *conn, void __user *argp);
int kdbus_cmd_list(struct kdbus_conn *conn, void __user *argp);
+/**
+ * kdbus_name_get_owner() - get current owner of a name
+ * @name: name to get current owner of
+ *
+ * This returns a pointer to the current owner of a name (or its activator if
+ * there is no owner). The caller must make sure @name is valid and does not
+ * vanish.
+ *
+ * Return: Pointer to current owner or NULL if there is none.
+ */
+static inline struct kdbus_name_owner *
+kdbus_name_get_owner(struct kdbus_name_entry *name)
+{
+ return list_first_entry_or_null(&name->queue, struct kdbus_name_owner,
+ name_entry) ? : name->activator;
+}
+
#endif
diff --git a/ipc/kdbus/node.c b/ipc/kdbus/node.c
index 89f58bc85..986aca39c 100644
--- a/ipc/kdbus/node.c
+++ b/ipc/kdbus/node.c
@@ -111,33 +111,27 @@
*
* All nodes can be deactivated via kdbus_node_deactivate() at any time. You can
* call this multiple times, even in parallel or on nodes that were never
- * linked, and it will just work. The only restriction is, you must not hold an
- * active reference when calling kdbus_node_deactivate().
- * By deactivating a node, it is immediately marked inactive. Then, we wait for
- * all active references to be released (called 'draining' the node). This
- * shouldn't take very long as we don't perform long-lasting operations while
- * holding an active reference. Note that once the node is marked inactive, no
- * new active references can be acquired.
- * Once all active references are dropped, the node is considered 'drained'. Now
- * kdbus_node_deactivate() is called on each child of the node before we
- * continue deactivating our node. That is, once all children are entirely
- * deactivated, we call ->release_cb() of our node. ->release_cb() can release
- * any resources on that node which are bound to the "active" state of a node.
- * When done, we unlink the node from its parent rb-tree, mark it as
- * 'released' and return.
- * If kdbus_node_deactivate() is called multiple times (even in parallel), all
- * but one caller will just wait until the node is fully deactivated. That is,
- * one random caller of kdbus_node_deactivate() is selected to call
- * ->release_cb() and cleanup the node. Only once all this is done, all other
- * callers will return from kdbus_node_deactivate(). That is, it doesn't matter
- * whether you're the selected caller or not, it will only return after
- * everything is fully done.
+ * linked, and it will just work. Furthermore, all children will be deactivated
+ * recursively as well. If a node is deactivated, there might still be active
+ * references that were acquired before calling kdbus_node_deactivate(). The
+ * owner of an object must call kdbus_node_drain() (which is a superset of
+ * kdbus_node_deactivate()) before dropping their reference. This will
+ * deactivate the node and also synchronously wait for all active references to
+ * be dropped. Hence, once kdbus_node_drain() returns, the node is fully
+ * released and no active references exist, anymore.
+ * kdbus_node_drain() can be called at any times, multiple times, and in
+ * parallel on multiple threads. All calls are synchronized internally and will
+ * return only once the node is fully drained. The only restriction is, you
+ * must not hold an active reference when calling kdbus_node_drain() (unlike
+ * deactivation, which allows the caller to hold an active reference).
*
* When a node is activated, we acquire a normal object reference to the node.
- * This reference is dropped after deactivation is fully done (and only iff the
+ * This reference is dropped after deactivation is fully done (and only if the
* node really was activated). This allows callers to link+activate a child node
- * and then drop all refs. The node will be deactivated together with the
- * parent, and then be freed when this reference is dropped.
+ * and then drop all refs. This has the effect that nobody owns a reference to
+ * the node, except for the parent node. Hence, if the parent is deactivated
+ * (and thus all children are deactivated, too), this will automatically
+ * release the child node.
*
* Currently, nodes provide a bunch of resources that external code can use
* directly. This includes:
@@ -198,11 +192,10 @@
* is even unique if linked but not activated, yet. This might
* change in the future, though. Code should not rely on this.
*
- * * node->lock: lock to protect node->children, node->rb, node->parent
+ * * node->lock: lock to protect node->children, node->rb, node->parent
* * node->parent: Reference to parent node. This is set during LINK time
- * and is dropped during destruction. You must not access
- * it unless you hold an active reference to the node or if
- * you know the node is dead.
+ * and is dropped during destruction. You can freely access
+ * this field, but it may be NULL (root node).
* * node->children: rb-tree of all linked children of this node. You must
* not access this directly, but use one of the iterator
* or lookup helpers.
@@ -265,6 +258,18 @@ static unsigned int kdbus_node_name_hash(const char *name)
* @name: name to compare the node with
* @node: node to compare the name with
*
+ * This compares a query string against a kdbus node. If the kdbus node has the
+ * given name, this returns 0. Otherwise, this returns >0 / <0 depending
+ * whether the query string is greater / less than the node.
+ *
+ * Note: If @node is drained but has the name @name, this returns 1. The
+ * reason for this is that we treat drained nodes as "renamed". The
+ * slot of such nodes is no longer occupied and new nodes can claim it.
+ * Obviously, this has the side-effect that you cannot match drained
+ * nodes, as they will never return 0 on name-matches. But this is
+ * intentional, as there is no reason why anyone would ever want to match
+ * on drained nodes.
+ *
* Return: 0 if @name and @hash exactly match the information in @node, or
* an integer less than or greater than zero if @name is found, respectively,
* to be less than or be greater than the string stored in @node.
@@ -272,10 +277,16 @@ static unsigned int kdbus_node_name_hash(const char *name)
static int kdbus_node_name_compare(unsigned int hash, const char *name,
const struct kdbus_node *node)
{
+ int ret;
+
if (hash != node->hash)
return hash - node->hash;
- return strcmp(name, node->name);
+ ret = strcmp(name, node->name);
+ if (ret != 0)
+ return ret;
+
+ return atomic_read(&node->active) == KDBUS_NODE_DRAINED;
}
/**
@@ -312,7 +323,7 @@ void kdbus_node_init(struct kdbus_node *node, unsigned int type)
* ID could be allocated. You must not activate a node if linking failed! It is
* safe to deactivate it, though.
*
- * Once you linked a node, you must call kdbus_node_deactivate() before you drop
+ * Once you linked a node, you must call kdbus_node_drain() before you drop
* the last reference (even if you never activate the node).
*
* Return: 0 on success. negative error otherwise.
@@ -419,7 +430,7 @@ struct kdbus_node *kdbus_node_ref(struct kdbus_node *node)
*
* Note that this calls into ->free_cb() and thus _might_ sleep. The ->free_cb()
* callbacks must not acquire any outer locks, though. So you can safely drop
- * references while holding locks.
+ * references while holding locks (apart from node->parent->lock).
*
* If @node is NULL, this is a no-op.
*
@@ -431,7 +442,16 @@ struct kdbus_node *kdbus_node_unref(struct kdbus_node *node)
struct kdbus_node safe = *node;
WARN_ON(atomic_read(&node->active) != KDBUS_NODE_DRAINED);
- WARN_ON(!RB_EMPTY_NODE(&node->rb));
+
+ if (node->parent) {
+ mutex_lock(&node->parent->lock);
+ if (!RB_EMPTY_NODE(&node->rb)) {
+ rb_erase(&node->rb,
+ &node->parent->children);
+ RB_CLEAR_NODE(&node->rb);
+ }
+ mutex_unlock(&node->parent->lock);
+ }
if (node->free_cb)
node->free_cb(node);
@@ -439,12 +459,6 @@ struct kdbus_node *kdbus_node_unref(struct kdbus_node *node)
ida_simple_remove(&kdbus_node_ida, safe.id);
kfree(safe.name);
-
- /*
- * kdbusfs relies on the parent to be available even after the
- * node was deactivated and unlinked. Therefore, we pin it
- * until a node is destroyed.
- */
kdbus_node_unref(safe.parent);
}
@@ -534,78 +548,149 @@ bool kdbus_node_activate(struct kdbus_node *node)
}
/**
- * kdbus_node_deactivate() - deactivate a node
- * @node: The node to deactivate.
+ * kdbus_node_recurse_unlock() - advance iterator on a tree
+ * @start: node at which the iteration started
+ * @node: previously visited node
*
- * This function recursively deactivates this node and all its children. It
- * returns only once all children and the node itself were recursively disabled
- * (even if you call this function multiple times in parallel).
+ * This helper advances an iterator by one, when traversing a node tree. It is
+ * supposed to be used like this:
*
- * It is safe to call this function on _any_ node that was initialized _any_
- * number of times.
+ * struct kdbus_node *n;
*
- * This call may sleep, as it waits for all active references to be dropped.
+ * n = start;
+ * while (n) {
+ * mutex_lock(&n->lock);
+ * ... visit @n ...
+ * n = kdbus_node_recurse_unlock(start, n);
+ * }
+ *
+ * This helpers takes as input the start-node of the iteration and the current
+ * position. It returns a pointer to the next node to visit. The caller must
+ * hold a reference to @start during the whole iteration. Furthermore, @node
+ * must be locked when entering this helper. On return, the lock is released.
+ *
+ * The order of visit is pre-order traversal.
+ *
+ * If @node is deactivated before recursing its children, then it is guaranteed
+ * that all children will be visited. If @node is still active, new nodes might
+ * be inserted during traversal, and thus might be missed.
+ *
+ * Also note that the node-locks are released while traversing children. You
+ * must not rely on the locks to be held during the whole traversal. Each node
+ * that is visited is pinned by this helper, so the caller can rely on owning a
+ * reference. It is dropped, once all of the children of the node have been
+ * visited (recursively).
+ *
+ * You *must not* bail out of a traversal early, otherwise you'll leak
+ * ref-counts to all nodes in the current depth-path.
+ *
+ * Return: Reference to next node, or NULL.
*/
-void kdbus_node_deactivate(struct kdbus_node *node)
+static struct kdbus_node *kdbus_node_recurse_unlock(struct kdbus_node *start,
+ struct kdbus_node *node)
{
- struct kdbus_node *pos, *child;
+ struct kdbus_node *t, *prev = NULL;
struct rb_node *rb;
- int v_pre, v_post;
- pos = node;
+ lockdep_assert_held(&node->lock);
- /*
- * To avoid recursion, we perform back-tracking while deactivating
- * nodes. For each node we enter, we first mark the active-counter as
- * deactivated by adding BIAS. If the node as children, we set the first
- * child as current position and start over. If the node has no
- * children, we drain the node by waiting for all active refs to be
- * dropped and then releasing the node.
- *
- * After the node is released, we set its parent as current position
- * and start over. If the current position was the initial node, we're
- * done.
- *
- * Note that this function can be called in parallel by multiple
- * callers. We make sure that each node is only released once, and any
- * racing caller will wait until the other thread fully released that
- * node.
- */
+ rb = rb_first(&node->children);
+ if (!rb) {
+ do {
+ mutex_unlock(&node->lock);
+ kdbus_node_unref(prev);
+
+ if (node == start)
+ return NULL;
+
+ prev = node;
+ node = node->parent;
+
+ mutex_lock(&node->lock);
+ rb = rb_next(&prev->rb);
+ } while (!rb);
+ }
+
+ t = kdbus_node_ref(kdbus_node_from_rb(rb));
+ mutex_unlock(&node->lock);
+ kdbus_node_unref(prev);
+ return t;
+}
+
+/**
+ * kdbus_node_deactivate() - deactivate a node
+ * @node: node to deactivate
+ *
+ * This recursively deactivates the passed node and all its children. The nodes
+ * are marked as deactivated, but they're not drained. Hence, even after this
+ * call returns, there might still be someone holding an active reference to
+ * any of the nodes. However, no new active references can be acquired after
+ * this returns.
+ *
+ * It is safe to call this multiple times (even in parallel). Each call is
+ * guaranteed to only return after _all_ nodes have been deactivated.
+ */
+void kdbus_node_deactivate(struct kdbus_node *node)
+{
+ struct kdbus_node *pos;
+ int v;
+
+ pos = node;
+ while (pos) {
+ mutex_lock(&pos->lock);
- for (;;) {
/*
- * Add BIAS to node->active to mark it as inactive. If it was
+ * Add BIAS to pos->active to mark it as inactive. If it was
* never active before, immediately mark it as RELEASE_INACTIVE
- * so we remember this state.
- * We cannot remember v_pre as we might iterate into the
- * children, overwriting v_pre, before we can release our node.
+ * so that this case can be detected later on.
+ * If the node was already deactivated, make sure to still
+ * recurse into the children. Otherwise, we might return before
+ * a racing thread finished deactivating all children. But we
+ * want to guarantee that the whole tree is deactivated once
+ * this returns.
*/
- mutex_lock(&pos->lock);
- v_pre = atomic_read(&pos->active);
- if (v_pre >= 0)
+ v = atomic_read(&pos->active);
+ if (v >= 0)
atomic_add_return(KDBUS_NODE_BIAS, &pos->active);
- else if (v_pre == KDBUS_NODE_NEW)
+ else if (v == KDBUS_NODE_NEW)
atomic_set(&pos->active, KDBUS_NODE_RELEASE_DIRECT);
- mutex_unlock(&pos->lock);
+ pos = kdbus_node_recurse_unlock(node, pos);
+ }
+}
+
+/**
+ * kdbus_node_drain() - drain a node
+ * @node: node to drain
+ *
+ * This function recursively deactivates this node and all its children and
+ * then waits for all active references to be dropped. This function is a
+ * superset of kdbus_node_deactivate(), as it additionally drains all nodes. It
+ * returns only once all children and the node itself were recursively drained
+ * (even if you call this function multiple times in parallel).
+ *
+ * It is safe to call this function on _any_ node that was initialized _any_
+ * number of times.
+ *
+ * This call may sleep, as it waits for all active references to be dropped.
+ */
+void kdbus_node_drain(struct kdbus_node *node)
+{
+ struct kdbus_node *pos;
+ int v;
+
+ kdbus_node_deactivate(node);
+
+ pos = node;
+ while (pos) {
/* wait until all active references were dropped */
wait_event(pos->waitq,
atomic_read(&pos->active) <= KDBUS_NODE_BIAS);
- mutex_lock(&pos->lock);
- /* recurse into first child if any */
- rb = rb_first(&pos->children);
- if (rb) {
- child = kdbus_node_ref(kdbus_node_from_rb(rb));
- mutex_unlock(&pos->lock);
- pos = child;
- continue;
- }
-
/* mark object as RELEASE */
- v_post = atomic_read(&pos->active);
- if (v_post == KDBUS_NODE_BIAS ||
- v_post == KDBUS_NODE_RELEASE_DIRECT)
+ mutex_lock(&pos->lock);
+ v = atomic_read(&pos->active);
+ if (v == KDBUS_NODE_BIAS || v == KDBUS_NODE_RELEASE_DIRECT)
atomic_set(&pos->active, KDBUS_NODE_RELEASE);
mutex_unlock(&pos->lock);
@@ -614,20 +699,9 @@ void kdbus_node_deactivate(struct kdbus_node *node)
* perform the actual release. Otherwise, we wait until the
* release is done and the node is marked as DRAINED.
*/
- if (v_post == KDBUS_NODE_BIAS ||
- v_post == KDBUS_NODE_RELEASE_DIRECT) {
+ if (v == KDBUS_NODE_BIAS || v == KDBUS_NODE_RELEASE_DIRECT) {
if (pos->release_cb)
- pos->release_cb(pos, v_post == KDBUS_NODE_BIAS);
-
- if (pos->parent) {
- mutex_lock(&pos->parent->lock);
- if (!RB_EMPTY_NODE(&pos->rb)) {
- rb_erase(&pos->rb,
- &pos->parent->children);
- RB_CLEAR_NODE(&pos->rb);
- }
- mutex_unlock(&pos->parent->lock);
- }
+ pos->release_cb(pos, v == KDBUS_NODE_BIAS);
/* mark as DRAINED */
atomic_set(&pos->active, KDBUS_NODE_DRAINED);
@@ -646,7 +720,7 @@ void kdbus_node_deactivate(struct kdbus_node *node)
* immediately went to NODE_RELEASE_DIRECT. In that case
* we must not drop the reference.
*/
- if (v_post == KDBUS_NODE_BIAS)
+ if (v == KDBUS_NODE_BIAS)
kdbus_node_unref(pos);
} else {
/* wait until object is DRAINED */
@@ -654,19 +728,8 @@ void kdbus_node_deactivate(struct kdbus_node *node)
atomic_read(&pos->active) == KDBUS_NODE_DRAINED);
}
- /*
- * We're done with the current node. Continue on its parent
- * again, which will try deactivating its next child, or itself
- * if no child is left.
- * If we've reached our initial node again, we are done and
- * can safely return.
- */
- if (pos == node)
- break;
-
- child = pos;
- pos = pos->parent;
- kdbus_node_unref(child);
+ mutex_lock(&pos->lock);
+ pos = kdbus_node_recurse_unlock(node, pos);
}
}
@@ -859,18 +922,6 @@ struct kdbus_node *kdbus_node_next_child(struct kdbus_node *node,
pos = kdbus_node_from_rb(rb);
if (kdbus_node_acquire(pos))
goto exit;
- } else if (RB_EMPTY_NODE(&prev->rb)) {
- /*
- * The current iterator is no longer linked to the rb-tree. Use
- * its hash value and name to find the next _higher_ node and
- * acquire it. If we got it, return it as next element.
- * Otherwise, the loop below will find the next active node.
- */
- pos = node_find_closest_unlocked(node, prev->hash, prev->name);
- if (!pos)
- goto exit;
- if (kdbus_node_acquire(pos))
- goto exit;
} else {
/*
* The current iterator is still linked to the parent. Set it
diff --git a/ipc/kdbus/node.h b/ipc/kdbus/node.h
index 970e02b08..16c6fd574 100644
--- a/ipc/kdbus/node.h
+++ b/ipc/kdbus/node.h
@@ -32,6 +32,7 @@ typedef void (*kdbus_node_free_t) (struct kdbus_node *node);
typedef void (*kdbus_node_release_t) (struct kdbus_node *node, bool was_active);
struct kdbus_node {
+ struct mutex lock;
atomic_t refcnt;
atomic_t active;
wait_queue_head_t waitq;
@@ -49,10 +50,9 @@ struct kdbus_node {
unsigned int hash;
unsigned int id;
struct kdbus_node *parent; /* may be NULL */
-
- /* valid iff active */
- struct mutex lock;
struct rb_node rb;
+
+ /* dynamic list of children */
struct rb_root children;
};
@@ -72,6 +72,7 @@ bool kdbus_node_is_active(struct kdbus_node *node);
bool kdbus_node_is_deactivated(struct kdbus_node *node);
bool kdbus_node_activate(struct kdbus_node *node);
void kdbus_node_deactivate(struct kdbus_node *node);
+void kdbus_node_drain(struct kdbus_node *node);
bool kdbus_node_acquire(struct kdbus_node *node);
void kdbus_node_release(struct kdbus_node *node);
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index c3fc5c2b6..161a1807e 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -47,8 +47,7 @@
#define RECV 1
#define STATE_NONE 0
-#define STATE_PENDING 1
-#define STATE_READY 2
+#define STATE_READY 1
struct posix_msg_tree_node {
struct rb_node rb_node;
@@ -568,15 +567,12 @@ static int wq_sleep(struct mqueue_inode_info *info, int sr,
wq_add(info, sr, ewp);
for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
+ __set_current_state(TASK_INTERRUPTIBLE);
spin_unlock(&info->lock);
time = schedule_hrtimeout_range_clock(timeout, 0,
HRTIMER_MODE_ABS, CLOCK_REALTIME);
- while (ewp->state == STATE_PENDING)
- cpu_relax();
-
if (ewp->state == STATE_READY) {
retval = 0;
goto out;
@@ -904,11 +900,15 @@ out_name:
* list of waiting receivers. A sender checks that list before adding the new
* message into the message array. If there is a waiting receiver, then it
* bypasses the message array and directly hands the message over to the
- * receiver.
- * The receiver accepts the message and returns without grabbing the queue
- * spinlock. Therefore an intermediate STATE_PENDING state and memory barriers
- * are necessary. The same algorithm is used for sysv semaphores, see
- * ipc/sem.c for more details.
+ * receiver. The receiver accepts the message and returns without grabbing the
+ * queue spinlock:
+ *
+ * - Set pointer to message.
+ * - Queue the receiver task for later wakeup (without the info->lock).
+ * - Update its state to STATE_READY. Now the receiver can continue.
+ * - Wake up the process after the lock is dropped. Should the process wake up
+ * before this wakeup (due to a timeout or a signal) it will either see
+ * STATE_READY and continue or acquire the lock to check the state again.
*
* The same algorithm is used for senders.
*/
@@ -916,21 +916,29 @@ out_name:
/* pipelined_send() - send a message directly to the task waiting in
* sys_mq_timedreceive() (without inserting message into a queue).
*/
-static inline void pipelined_send(struct mqueue_inode_info *info,
+static inline void pipelined_send(struct wake_q_head *wake_q,
+ struct mqueue_inode_info *info,
struct msg_msg *message,
struct ext_wait_queue *receiver)
{
receiver->msg = message;
list_del(&receiver->list);
- receiver->state = STATE_PENDING;
- wake_up_process(receiver->task);
- smp_wmb();
+ wake_q_add(wake_q, receiver->task);
+ /*
+ * Rely on the implicit cmpxchg barrier from wake_q_add such
+ * that we can ensure that updating receiver->state is the last
+ * write operation: As once set, the receiver can continue,
+ * and if we don't have the reference count from the wake_q,
+ * yet, at that point we can later have a use-after-free
+ * condition and bogus wakeup.
+ */
receiver->state = STATE_READY;
}
/* pipelined_receive() - if there is task waiting in sys_mq_timedsend()
* gets its message and put to the queue (we have one free place for sure). */
-static inline void pipelined_receive(struct mqueue_inode_info *info)
+static inline void pipelined_receive(struct wake_q_head *wake_q,
+ struct mqueue_inode_info *info)
{
struct ext_wait_queue *sender = wq_get_first_waiter(info, SEND);
@@ -941,10 +949,9 @@ static inline void pipelined_receive(struct mqueue_inode_info *info)
}
if (msg_insert(sender->msg, info))
return;
+
list_del(&sender->list);
- sender->state = STATE_PENDING;
- wake_up_process(sender->task);
- smp_wmb();
+ wake_q_add(wake_q, sender->task);
sender->state = STATE_READY;
}
@@ -962,6 +969,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
struct timespec ts;
struct posix_msg_tree_node *new_leaf = NULL;
int ret = 0;
+ WAKE_Q(wake_q);
if (u_abs_timeout) {
int res = prepare_timeout(u_abs_timeout, &expires, &ts);
@@ -1045,7 +1053,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
} else {
receiver = wq_get_first_waiter(info, RECV);
if (receiver) {
- pipelined_send(info, msg_ptr, receiver);
+ pipelined_send(&wake_q, info, msg_ptr, receiver);
} else {
/* adds message to the queue */
ret = msg_insert(msg_ptr, info);
@@ -1058,6 +1066,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
}
out_unlock:
spin_unlock(&info->lock);
+ wake_up_q(&wake_q);
out_free:
if (ret)
free_msg(msg_ptr);
@@ -1144,14 +1153,17 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
msg_ptr = wait.msg;
}
} else {
+ WAKE_Q(wake_q);
+
msg_ptr = msg_get(info);
inode->i_atime = inode->i_mtime = inode->i_ctime =
CURRENT_TIME;
/* There is now free space in queue. */
- pipelined_receive(info);
+ pipelined_receive(&wake_q, info);
spin_unlock(&info->lock);
+ wake_up_q(&wake_q);
ret = 0;
}
if (ret == 0) {
diff --git a/ipc/msg.c b/ipc/msg.c
index 2b6fdbb9e..66c4f567e 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -76,7 +76,7 @@ struct msg_sender {
static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id)
{
- struct kern_ipc_perm *ipcp = ipc_obtain_object(&msg_ids(ns), id);
+ struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&msg_ids(ns), id);
if (IS_ERR(ipcp))
return ERR_CAST(ipcp);
@@ -196,7 +196,7 @@ static void expunge_all(struct msg_queue *msq, int res)
* or dealing with -EAGAIN cases. See lockless receive part 1
* and 2 in do_msgrcv().
*/
- smp_mb();
+ smp_wmb(); /* barrier (B) */
msr->r_msg = ERR_PTR(res);
}
}
@@ -580,7 +580,8 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
/* initialize pipelined send ordering */
msr->r_msg = NULL;
wake_up_process(msr->r_tsk);
- smp_mb(); /* see barrier comment below */
+ /* barrier (B) see barrier comment below */
+ smp_wmb();
msr->r_msg = ERR_PTR(-E2BIG);
} else {
msr->r_msg = NULL;
@@ -589,11 +590,12 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
wake_up_process(msr->r_tsk);
/*
* Ensure that the wakeup is visible before
- * setting r_msg, as the receiving end depends
- * on it. See lockless receive part 1 and 2 in
- * do_msgrcv().
+ * setting r_msg, as the receiving can otherwise
+ * exit - once r_msg is set, the receiver can
+ * continue. See lockless receive part 1 and 2
+ * in do_msgrcv(). Barrier (B).
*/
- smp_mb();
+ smp_wmb();
msr->r_msg = msg;
return 1;
@@ -932,12 +934,38 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl
/* Lockless receive, part 2:
* Wait until pipelined_send or expunge_all are outside of
* wake_up_process(). There is a race with exit(), see
- * ipc/mqueue.c for the details.
+ * ipc/mqueue.c for the details. The correct serialization
+ * ensures that a receiver cannot continue without the wakeup
+ * being visibible _before_ setting r_msg:
+ *
+ * CPU 0 CPU 1
+ * <loop receiver>
+ * smp_rmb(); (A) <-- pair -. <waker thread>
+ * <load ->r_msg> | msr->r_msg = NULL;
+ * | wake_up_process();
+ * <continue> `------> smp_wmb(); (B)
+ * msr->r_msg = msg;
+ *
+ * Where (A) orders the message value read and where (B) orders
+ * the write to the r_msg -- done in both pipelined_send and
+ * expunge_all.
*/
- msg = (struct msg_msg *)msr_d.r_msg;
- while (msg == NULL) {
- cpu_relax();
+ for (;;) {
+ /*
+ * Pairs with writer barrier in pipelined_send
+ * or expunge_all.
+ */
+ smp_rmb(); /* barrier (A) */
msg = (struct msg_msg *)msr_d.r_msg;
+ if (msg)
+ break;
+
+ /*
+ * The cpu_relax() call is a compiler barrier
+ * which forces everything in this loop to be
+ * re-loaded.
+ */
+ cpu_relax();
}
/* Lockless receive, part 3:
diff --git a/ipc/sem.c b/ipc/sem.c
index d1a6edd17..b471e5a38 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -253,6 +253,16 @@ static void sem_rcu_free(struct rcu_head *head)
}
/*
+ * spin_unlock_wait() and !spin_is_locked() are not memory barriers, they
+ * are only control barriers.
+ * The code must pair with spin_unlock(&sem->lock) or
+ * spin_unlock(&sem_perm.lock), thus just the control barrier is insufficient.
+ *
+ * smp_rmb() is sufficient, as writes cannot pass the control barrier.
+ */
+#define ipc_smp_acquire__after_spin_is_unlocked() smp_rmb()
+
+/*
* Wait until all currently ongoing simple ops have completed.
* Caller must own sem_perm.lock.
* New simple ops cannot start, because simple ops first check
@@ -275,6 +285,7 @@ static void sem_wait_array(struct sem_array *sma)
sem = sma->sem_base + i;
spin_unlock_wait(&sem->lock);
}
+ ipc_smp_acquire__after_spin_is_unlocked();
}
/*
@@ -327,13 +338,12 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
/* Then check that the global lock is free */
if (!spin_is_locked(&sma->sem_perm.lock)) {
/*
- * The ipc object lock check must be visible on all
- * cores before rechecking the complex count. Otherwise
- * we can race with another thread that does:
+ * We need a memory barrier with acquire semantics,
+ * otherwise we can race with another thread that does:
* complex_count++;
* spin_unlock(sem_perm.lock);
*/
- smp_rmb();
+ ipc_smp_acquire__after_spin_is_unlocked();
/*
* Now repeat the test of complex_count:
@@ -391,7 +401,7 @@ static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns,
struct kern_ipc_perm *ipcp;
struct sem_array *sma;
- ipcp = ipc_obtain_object(&sem_ids(ns), id);
+ ipcp = ipc_obtain_object_idr(&sem_ids(ns), id);
if (IS_ERR(ipcp))
return ERR_CAST(ipcp);
@@ -410,7 +420,7 @@ static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns,
static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id)
{
- struct kern_ipc_perm *ipcp = ipc_obtain_object(&sem_ids(ns), id);
+ struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id);
if (IS_ERR(ipcp))
return ERR_CAST(ipcp);
@@ -2074,17 +2084,28 @@ void exit_sem(struct task_struct *tsk)
rcu_read_lock();
un = list_entry_rcu(ulp->list_proc.next,
struct sem_undo, list_proc);
- if (&un->list_proc == &ulp->list_proc)
- semid = -1;
- else
- semid = un->semid;
+ if (&un->list_proc == &ulp->list_proc) {
+ /*
+ * We must wait for freeary() before freeing this ulp,
+ * in case we raced with last sem_undo. There is a small
+ * possibility where we exit while freeary() didn't
+ * finish unlocking sem_undo_list.
+ */
+ spin_unlock_wait(&ulp->lock);
+ rcu_read_unlock();
+ break;
+ }
+ spin_lock(&ulp->lock);
+ semid = un->semid;
+ spin_unlock(&ulp->lock);
+ /* exit_sem raced with IPC_RMID, nothing to do */
if (semid == -1) {
rcu_read_unlock();
- break;
+ continue;
}
- sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, un->semid);
+ sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid);
/* exit_sem raced with IPC_RMID, nothing to do */
if (IS_ERR(sma)) {
rcu_read_unlock();
@@ -2112,9 +2133,11 @@ void exit_sem(struct task_struct *tsk)
ipc_assert_locked_object(&sma->sem_perm);
list_del(&un->list_id);
- spin_lock(&ulp->lock);
+ /* we are the last process using this ulp, acquiring ulp->lock
+ * isn't required. Besides that, we are also protected against
+ * IPC_RMID as we hold sma->sem_perm lock now
+ */
list_del_rcu(&un->list_proc);
- spin_unlock(&ulp->lock);
/* perform adjustments registered in un */
for (i = 0; i < sma->sem_nsems; i++) {
diff --git a/ipc/shm.c b/ipc/shm.c
index d2f284c1f..4aef24d91 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -129,7 +129,7 @@ void __init shm_init(void)
static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id)
{
- struct kern_ipc_perm *ipcp = ipc_obtain_object(&shm_ids(ns), id);
+ struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
if (IS_ERR(ipcp))
return ERR_CAST(ipcp);
@@ -155,8 +155,11 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
{
struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
- if (IS_ERR(ipcp))
- return (struct shmid_kernel *)ipcp;
+ /*
+ * We raced in the idr lookup or with shm_destroy(). Either way, the
+ * ID is busted.
+ */
+ BUG_ON(IS_ERR(ipcp));
return container_of(ipcp, struct shmid_kernel, shm_perm);
}
@@ -191,7 +194,6 @@ static void shm_open(struct vm_area_struct *vma)
struct shmid_kernel *shp;
shp = shm_lock(sfd->ns, sfd->id);
- BUG_ON(IS_ERR(shp));
shp->shm_atim = get_seconds();
shp->shm_lprid = task_tgid_vnr(current);
shp->shm_nattch++;
@@ -258,7 +260,6 @@ static void shm_close(struct vm_area_struct *vma)
down_write(&shm_ids(ns).rwsem);
/* remove from the list of attaches of the shm segment */
shp = shm_lock(ns, sfd->id);
- BUG_ON(IS_ERR(shp));
shp->shm_lprid = task_tgid_vnr(current);
shp->shm_dtim = get_seconds();
shp->shm_nattch--;
@@ -544,7 +545,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
if ((shmflg & SHM_NORESERVE) &&
sysctl_overcommit_memory != OVERCOMMIT_NEVER)
acctflag = VM_NORESERVE;
- file = shmem_file_setup(name, size, acctflag, 0);
+ file = shmem_kernel_file_setup(name, size, acctflag);
}
error = PTR_ERR(file);
if (IS_ERR(file))
@@ -1191,7 +1192,6 @@ out_fput:
out_nattch:
down_write(&shm_ids(ns).rwsem);
shp = shm_lock(ns, shmid);
- BUG_ON(IS_ERR(shp));
shp->shm_nattch--;
if (shm_may_destroy(ns, shp))
shm_destroy(ns, shp);
diff --git a/ipc/util.c b/ipc/util.c
index ff3323ef8..be4230020 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -467,10 +467,7 @@ void ipc_rcu_free(struct rcu_head *head)
{
struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
- if (is_vmalloc_addr(p))
- vfree(p);
- else
- kfree(p);
+ kvfree(p);
}
/**
@@ -558,7 +555,7 @@ void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out)
* Call inside the RCU critical section.
* The ipc object is *not* locked on exit.
*/
-struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id)
+struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id)
{
struct kern_ipc_perm *out;
int lid = ipcid_to_idx(id);
@@ -584,21 +581,24 @@ struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
struct kern_ipc_perm *out;
rcu_read_lock();
- out = ipc_obtain_object(ids, id);
+ out = ipc_obtain_object_idr(ids, id);
if (IS_ERR(out))
- goto err1;
+ goto err;
spin_lock(&out->lock);
- /* ipc_rmid() may have already freed the ID while ipc_lock
- * was spinning: here verify that the structure is still valid
+ /*
+ * ipc_rmid() may have already freed the ID while ipc_lock()
+ * was spinning: here verify that the structure is still valid.
+ * Upon races with RMID, return -EIDRM, thus indicating that
+ * the ID points to a removed identifier.
*/
if (ipc_valid_object(out))
return out;
spin_unlock(&out->lock);
- out = ERR_PTR(-EINVAL);
-err1:
+ out = ERR_PTR(-EIDRM);
+err:
rcu_read_unlock();
return out;
}
@@ -608,7 +608,7 @@ err1:
* @ids: ipc identifier set
* @id: ipc id to look for
*
- * Similar to ipc_obtain_object() but also checks
+ * Similar to ipc_obtain_object_idr() but also checks
* the ipc object reference counter.
*
* Call inside the RCU critical section.
@@ -616,13 +616,13 @@ err1:
*/
struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id)
{
- struct kern_ipc_perm *out = ipc_obtain_object(ids, id);
+ struct kern_ipc_perm *out = ipc_obtain_object_idr(ids, id);
if (IS_ERR(out))
goto out;
if (ipc_checkid(out, id))
- return ERR_PTR(-EIDRM);
+ return ERR_PTR(-EINVAL);
out:
return out;
}
diff --git a/ipc/util.h b/ipc/util.h
index 1a5a0fcd0..3a8a5a0ec 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -132,7 +132,7 @@ void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head));
void ipc_rcu_free(struct rcu_head *head);
struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
-struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id);
+struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id);
void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out);