diff options
Diffstat (limited to 'ipc')
-rw-r--r-- | ipc/kdbus/bus.c | 6 | ||||
-rw-r--r-- | ipc/kdbus/connection.c | 53 | ||||
-rw-r--r-- | ipc/kdbus/connection.h | 9 | ||||
-rw-r--r-- | ipc/kdbus/domain.c | 4 | ||||
-rw-r--r-- | ipc/kdbus/endpoint.c | 4 | ||||
-rw-r--r-- | ipc/kdbus/fs.c | 4 | ||||
-rw-r--r-- | ipc/kdbus/handle.c | 4 | ||||
-rw-r--r-- | ipc/kdbus/limits.h | 2 | ||||
-rw-r--r-- | ipc/kdbus/main.c | 13 | ||||
-rw-r--r-- | ipc/kdbus/metadata.c | 21 | ||||
-rw-r--r-- | ipc/kdbus/names.c | 762 | ||||
-rw-r--r-- | ipc/kdbus/names.h | 55 | ||||
-rw-r--r-- | ipc/kdbus/node.c | 309 | ||||
-rw-r--r-- | ipc/kdbus/node.h | 7 | ||||
-rw-r--r-- | ipc/mqueue.c | 54 | ||||
-rw-r--r-- | ipc/msg.c | 50 | ||||
-rw-r--r-- | ipc/sem.c | 51 | ||||
-rw-r--r-- | ipc/shm.c | 14 | ||||
-rw-r--r-- | ipc/util.c | 28 | ||||
-rw-r--r-- | ipc/util.h | 2 |
20 files changed, 842 insertions, 610 deletions
diff --git a/ipc/kdbus/bus.c b/ipc/kdbus/bus.c index a67f825bd..e636d3478 100644 --- a/ipc/kdbus/bus.c +++ b/ipc/kdbus/bus.c @@ -167,7 +167,7 @@ static struct kdbus_bus *kdbus_bus_new(struct kdbus_domain *domain, return b; exit_unref: - kdbus_node_deactivate(&b->node); + kdbus_node_drain(&b->node); kdbus_node_unref(&b->node); return ERR_PTR(ret); } @@ -416,11 +416,11 @@ exit: ret = kdbus_args_clear(&args, ret); if (ret < 0) { if (ep) { - kdbus_node_deactivate(&ep->node); + kdbus_node_drain(&ep->node); kdbus_ep_unref(ep); } if (bus) { - kdbus_node_deactivate(&bus->node); + kdbus_node_drain(&bus->node); kdbus_bus_unref(bus); } return ERR_PTR(ret); diff --git a/ipc/kdbus/connection.c b/ipc/kdbus/connection.c index aa3296ea4..ef63d6533 100644 --- a/ipc/kdbus/connection.c +++ b/ipc/kdbus/connection.c @@ -129,9 +129,7 @@ static struct kdbus_conn *kdbus_conn_new(struct kdbus_ep *ep, #endif mutex_init(&conn->lock); INIT_LIST_HEAD(&conn->names_list); - INIT_LIST_HEAD(&conn->names_queue_list); INIT_LIST_HEAD(&conn->reply_list); - atomic_set(&conn->name_count, 0); atomic_set(&conn->request_count, 0); atomic_set(&conn->lost_count, 0); INIT_DELAYED_WORK(&conn->work, kdbus_reply_list_scan_work); @@ -284,7 +282,6 @@ static void __kdbus_conn_free(struct kref *kref) WARN_ON(delayed_work_pending(&conn->work)); WARN_ON(!list_empty(&conn->queue.msg_list)); WARN_ON(!list_empty(&conn->names_list)); - WARN_ON(!list_empty(&conn->names_queue_list)); WARN_ON(!list_empty(&conn->reply_list)); if (conn->user) { @@ -618,12 +615,13 @@ int kdbus_conn_disconnect(struct kdbus_conn *conn, bool ensure_queue_empty) */ bool kdbus_conn_has_name(struct kdbus_conn *conn, const char *name) { - struct kdbus_name_entry *e; + struct kdbus_name_owner *owner; lockdep_assert_held(&conn->ep->bus->name_registry->rwlock); - list_for_each_entry(e, &conn->names_list, conn_entry) - if (strcmp(e->name, name) == 0) + list_for_each_entry(owner, &conn->names_list, conn_entry) + if (!(owner->flags & KDBUS_NAME_IN_QUEUE) && + !strcmp(name, owner->name->name)) return true; return false; @@ -1052,6 +1050,7 @@ static int kdbus_pin_dst(struct kdbus_bus *bus, struct kdbus_conn **out_dst) { const struct kdbus_msg *msg = staging->msg; + struct kdbus_name_owner *owner = NULL; struct kdbus_name_entry *name = NULL; struct kdbus_conn *dst = NULL; int ret; @@ -1070,7 +1069,9 @@ static int kdbus_pin_dst(struct kdbus_bus *bus, } else { name = kdbus_name_lookup_unlocked(bus->name_registry, staging->dst_name); - if (!name) + if (name) + owner = kdbus_name_get_owner(name); + if (!owner) return -ESRCH; /* @@ -1082,19 +1083,14 @@ static int kdbus_pin_dst(struct kdbus_bus *bus, * owns the given name. */ if (msg->dst_id != KDBUS_DST_ID_NAME && - msg->dst_id != name->conn->id) + msg->dst_id != owner->conn->id) return -EREMCHG; - if (!name->conn && name->activator) - dst = kdbus_conn_ref(name->activator); - else - dst = kdbus_conn_ref(name->conn); - if ((msg->flags & KDBUS_MSG_NO_AUTO_START) && - kdbus_conn_is_activator(dst)) { - ret = -EADDRNOTAVAIL; - goto error; - } + kdbus_conn_is_activator(owner->conn)) + return -EADDRNOTAVAIL; + + dst = kdbus_conn_ref(owner->conn); } *out_name = name; @@ -1383,7 +1379,7 @@ static bool kdbus_conn_policy_query_all(struct kdbus_conn *conn, struct kdbus_conn *whom, unsigned int access) { - struct kdbus_name_entry *ne; + struct kdbus_name_owner *owner; bool pass = false; int res; @@ -1392,10 +1388,14 @@ static bool kdbus_conn_policy_query_all(struct kdbus_conn *conn, down_read(&db->entries_rwlock); mutex_lock(&whom->lock); - list_for_each_entry(ne, &whom->names_list, conn_entry) { - res = kdbus_policy_query_unlocked(db, conn_creds ? : conn->cred, - ne->name, - kdbus_strhash(ne->name)); + list_for_each_entry(owner, &whom->names_list, conn_entry) { + if (owner->flags & KDBUS_NAME_IN_QUEUE) + continue; + + res = kdbus_policy_query_unlocked(db, + conn_creds ? : conn->cred, + owner->name->name, + kdbus_strhash(owner->name->name)); if (res >= (int)access) { pass = true; break; @@ -1713,6 +1713,7 @@ int kdbus_cmd_conn_info(struct kdbus_conn *conn, void __user *argp) struct kdbus_meta_conn *conn_meta = NULL; struct kdbus_pool_slice *slice = NULL; struct kdbus_name_entry *entry = NULL; + struct kdbus_name_owner *owner = NULL; struct kdbus_conn *owner_conn = NULL; struct kdbus_item *meta_items = NULL; struct kdbus_info info = {}; @@ -1749,15 +1750,17 @@ int kdbus_cmd_conn_info(struct kdbus_conn *conn, void __user *argp) if (name) { entry = kdbus_name_lookup_unlocked(bus->name_registry, name); - if (!entry || !entry->conn || + if (entry) + owner = kdbus_name_get_owner(entry); + if (!owner || !kdbus_conn_policy_see_name(conn, current_cred(), name) || - (cmd->id != 0 && entry->conn->id != cmd->id)) { + (cmd->id != 0 && owner->conn->id != cmd->id)) { /* pretend a name doesn't exist if you cannot see it */ ret = -ESRCH; goto exit; } - owner_conn = kdbus_conn_ref(entry->conn); + owner_conn = kdbus_conn_ref(owner->conn); } else if (cmd->id > 0) { owner_conn = kdbus_bus_find_conn_by_id(bus, cmd->id); if (!owner_conn || !kdbus_conn_policy_see(conn, current_cred(), diff --git a/ipc/kdbus/connection.h b/ipc/kdbus/connection.h index 8e0180ace..1ad082014 100644 --- a/ipc/kdbus/connection.h +++ b/ipc/kdbus/connection.h @@ -30,6 +30,7 @@ KDBUS_HELLO_POLICY_HOLDER | \ KDBUS_HELLO_MONITOR) +struct kdbus_name_entry; struct kdbus_quota; struct kdbus_staging; @@ -61,7 +62,6 @@ struct kdbus_staging; * @cred: The credentials of the connection at creation time * @pid: Pid at creation time * @root_path: Root path at creation time - * @name_count: Number of owned well-known names * @request_count: Number of pending requests issued by this * connection that are waiting for replies from * other peers @@ -70,9 +70,8 @@ struct kdbus_staging; * @queue: The message queue associated with this connection * @quota: Array of per-user quota indexed by user->id * @n_quota: Number of elements in quota array - * @activator_of: Well-known name entry this connection acts as an * @names_list: List of well-known names - * @names_queue_list: Well-known names this connection waits for + * @name_count: Number of owned well-known names * @privileged: Whether this connection is privileged on the domain * @owner: Owned by the same user as the bus owner */ @@ -102,7 +101,6 @@ struct kdbus_conn { const struct cred *cred; struct pid *pid; struct path root_path; - atomic_t name_count; atomic_t request_count; atomic_t lost_count; wait_queue_head_t wait; @@ -112,9 +110,8 @@ struct kdbus_conn { unsigned int n_quota; /* protected by registry->rwlock */ - struct kdbus_name_entry *activator_of; struct list_head names_list; - struct list_head names_queue_list; + unsigned int name_count; bool privileged:1; bool owner:1; diff --git a/ipc/kdbus/domain.c b/ipc/kdbus/domain.c index ac9f760c1..5d52d009d 100644 --- a/ipc/kdbus/domain.c +++ b/ipc/kdbus/domain.c @@ -60,7 +60,7 @@ static struct kdbus_node *kdbus_domain_control_new(struct kdbus_domain *domain, return node; exit_free: - kdbus_node_deactivate(node); + kdbus_node_drain(node); kdbus_node_unref(node); return ERR_PTR(ret); } @@ -114,7 +114,7 @@ struct kdbus_domain *kdbus_domain_new(unsigned int access) return d; exit_unref: - kdbus_node_deactivate(&d->node); + kdbus_node_drain(&d->node); kdbus_node_unref(&d->node); return ERR_PTR(ret); } diff --git a/ipc/kdbus/endpoint.c b/ipc/kdbus/endpoint.c index 44e7a20de..5694ff6dc 100644 --- a/ipc/kdbus/endpoint.c +++ b/ipc/kdbus/endpoint.c @@ -146,7 +146,7 @@ struct kdbus_ep *kdbus_ep_new(struct kdbus_bus *bus, const char *name, return e; exit_unref: - kdbus_node_deactivate(&e->node); + kdbus_node_drain(&e->node); kdbus_node_unref(&e->node); return ERR_PTR(ret); } @@ -262,7 +262,7 @@ exit: ret = kdbus_args_clear(&args, ret); if (ret < 0) { if (ep) { - kdbus_node_deactivate(&ep->node); + kdbus_node_drain(&ep->node); kdbus_ep_unref(ep); } return ERR_PTR(ret); diff --git a/ipc/kdbus/fs.c b/ipc/kdbus/fs.c index 09c480924..6330c61e5 100644 --- a/ipc/kdbus/fs.c +++ b/ipc/kdbus/fs.c @@ -320,7 +320,7 @@ static void fs_super_kill(struct super_block *sb) struct kdbus_domain *domain = sb->s_fs_info; if (domain) { - kdbus_node_deactivate(&domain->node); + kdbus_node_drain(&domain->node); domain->dentry = NULL; } @@ -353,7 +353,7 @@ static struct dentry *fs_super_mount(struct file_system_type *fs_type, sb = sget(fs_type, NULL, fs_super_set, flags, domain); if (IS_ERR(sb)) { - kdbus_node_deactivate(&domain->node); + kdbus_node_drain(&domain->node); kdbus_domain_unref(domain); return ERR_CAST(sb); } diff --git a/ipc/kdbus/handle.c b/ipc/kdbus/handle.c index fc60932d6..2f82c2a32 100644 --- a/ipc/kdbus/handle.c +++ b/ipc/kdbus/handle.c @@ -310,13 +310,13 @@ static int kdbus_handle_release(struct inode *inode, struct file *file) switch (handle->type) { case KDBUS_HANDLE_BUS_OWNER: if (handle->bus_owner) { - kdbus_node_deactivate(&handle->bus_owner->node); + kdbus_node_drain(&handle->bus_owner->node); kdbus_bus_unref(handle->bus_owner); } break; case KDBUS_HANDLE_EP_OWNER: if (handle->ep_owner) { - kdbus_node_deactivate(&handle->ep_owner->node); + kdbus_node_drain(&handle->ep_owner->node); kdbus_ep_unref(handle->ep_owner); } break; diff --git a/ipc/kdbus/limits.h b/ipc/kdbus/limits.h index c54925a25..bd47119cd 100644 --- a/ipc/kdbus/limits.h +++ b/ipc/kdbus/limits.h @@ -41,7 +41,7 @@ #define KDBUS_SYSNAME_MAX_LEN 63 /* maximum number of matches per connection */ -#define KDBUS_MATCH_MAX 256 +#define KDBUS_MATCH_MAX 4096 /* maximum number of queued messages from the same individual user */ #define KDBUS_CONN_MAX_MSGS 256 diff --git a/ipc/kdbus/main.c b/ipc/kdbus/main.c index 1ad4dc8da..c2117ea53 100644 --- a/ipc/kdbus/main.c +++ b/ipc/kdbus/main.c @@ -75,16 +75,13 @@ * 'ยป struct kdbus_ep *ep (owned) */ -/* kdbus mount-point /sys/fs/kdbus */ -static struct kobject *kdbus_dir; - static int __init kdbus_init(void) { int ret; - kdbus_dir = kobject_create_and_add(KBUILD_MODNAME, fs_kobj); - if (!kdbus_dir) - return -ENOMEM; + ret = sysfs_create_mount_point(fs_kobj, KBUILD_MODNAME); + if (ret) + return ret; ret = kdbus_fs_init(); if (ret < 0) { @@ -96,14 +93,14 @@ static int __init kdbus_init(void) return 0; exit_dir: - kobject_put(kdbus_dir); + sysfs_remove_mount_point(fs_kobj, KBUILD_MODNAME); return ret; } static void __exit kdbus_exit(void) { kdbus_fs_exit(); - kobject_put(kdbus_dir); + sysfs_remove_mount_point(fs_kobj, KBUILD_MODNAME); ida_destroy(&kdbus_node_ida); } diff --git a/ipc/kdbus/metadata.c b/ipc/kdbus/metadata.c index d4973a90a..71ca475a8 100644 --- a/ipc/kdbus/metadata.c +++ b/ipc/kdbus/metadata.c @@ -603,7 +603,7 @@ static void kdbus_meta_conn_collect_timestamp(struct kdbus_meta_conn *mc, static int kdbus_meta_conn_collect_names(struct kdbus_meta_conn *mc, struct kdbus_conn *conn) { - const struct kdbus_name_entry *e; + const struct kdbus_name_owner *owner; struct kdbus_item *item; size_t slen, size; @@ -611,9 +611,11 @@ static int kdbus_meta_conn_collect_names(struct kdbus_meta_conn *mc, size = 0; /* open-code length calculation to avoid final padding */ - list_for_each_entry(e, &conn->names_list, conn_entry) - size = KDBUS_ALIGN8(size) + KDBUS_ITEM_HEADER_SIZE + - sizeof(struct kdbus_name) + strlen(e->name) + 1; + list_for_each_entry(owner, &conn->names_list, conn_entry) + if (!(owner->flags & KDBUS_NAME_IN_QUEUE)) + size = KDBUS_ALIGN8(size) + KDBUS_ITEM_HEADER_SIZE + + sizeof(struct kdbus_name) + + strlen(owner->name->name) + 1; if (!size) return 0; @@ -626,12 +628,15 @@ static int kdbus_meta_conn_collect_names(struct kdbus_meta_conn *mc, mc->owned_names_items = item; mc->owned_names_size = size; - list_for_each_entry(e, &conn->names_list, conn_entry) { - slen = strlen(e->name) + 1; + list_for_each_entry(owner, &conn->names_list, conn_entry) { + if (owner->flags & KDBUS_NAME_IN_QUEUE) + continue; + + slen = strlen(owner->name->name) + 1; kdbus_item_set(item, KDBUS_ITEM_OWNED_NAME, NULL, sizeof(struct kdbus_name) + slen); - item->name.flags = e->flags; - memcpy(item->name.name, e->name, slen); + item->name.flags = owner->flags; + memcpy(item->name.name, owner->name->name, slen); item = KDBUS_ITEM_NEXT(item); } diff --git a/ipc/kdbus/names.c b/ipc/kdbus/names.c index 057f8061c..bf44ca3f1 100644 --- a/ipc/kdbus/names.c +++ b/ipc/kdbus/names.c @@ -34,167 +34,128 @@ #include "notify.h" #include "policy.h" -struct kdbus_name_pending { - u64 flags; - struct kdbus_conn *conn; - struct kdbus_name_entry *name; - struct list_head conn_entry; - struct list_head name_entry; -}; +#define KDBUS_NAME_SAVED_MASK (KDBUS_NAME_ALLOW_REPLACEMENT | \ + KDBUS_NAME_QUEUE) -static int kdbus_name_pending_new(struct kdbus_name_entry *e, - struct kdbus_conn *conn, u64 flags) +static bool kdbus_name_owner_is_used(struct kdbus_name_owner *owner) { - struct kdbus_name_pending *p; - - kdbus_conn_assert_active(conn); - - p = kmalloc(sizeof(*p), GFP_KERNEL); - if (!p) - return -ENOMEM; - - p->flags = flags; - p->conn = conn; - p->name = e; - list_add_tail(&p->conn_entry, &conn->names_queue_list); - list_add_tail(&p->name_entry, &e->queue); - - return 0; + return !list_empty(&owner->name_entry) || + owner == owner->name->activator; } -static void kdbus_name_pending_free(struct kdbus_name_pending *p) +static struct kdbus_name_owner * +kdbus_name_owner_new(struct kdbus_conn *conn, struct kdbus_name_entry *name, + u64 flags) { - if (!p) - return; + struct kdbus_name_owner *owner; - list_del(&p->name_entry); - list_del(&p->conn_entry); - kfree(p); -} - -static struct kdbus_name_entry * -kdbus_name_entry_new(struct kdbus_name_registry *r, u32 hash, const char *name) -{ - struct kdbus_name_entry *e; - size_t namelen; + kdbus_conn_assert_active(conn); - namelen = strlen(name); + if (conn->name_count >= KDBUS_CONN_MAX_NAMES) + return ERR_PTR(-E2BIG); - e = kmalloc(sizeof(*e) + namelen + 1, GFP_KERNEL); - if (!e) + owner = kmalloc(sizeof(*owner), GFP_KERNEL); + if (!owner) return ERR_PTR(-ENOMEM); - e->name_id = ++r->name_seq_last; - e->flags = 0; - e->conn = NULL; - e->activator = NULL; - INIT_LIST_HEAD(&e->queue); - INIT_LIST_HEAD(&e->conn_entry); - hash_add(r->entries_hash, &e->hentry, hash); - memcpy(e->name, name, namelen + 1); + owner->flags = flags & KDBUS_NAME_SAVED_MASK; + owner->conn = conn; + owner->name = name; + list_add_tail(&owner->conn_entry, &conn->names_list); + INIT_LIST_HEAD(&owner->name_entry); - return e; + ++conn->name_count; + return owner; } -static void kdbus_name_entry_free(struct kdbus_name_entry *e) +static void kdbus_name_owner_free(struct kdbus_name_owner *owner) { - if (!e) + if (!owner) return; - WARN_ON(!list_empty(&e->conn_entry)); - WARN_ON(!list_empty(&e->queue)); - WARN_ON(e->activator); - WARN_ON(e->conn); - - hash_del(&e->hentry); - kfree(e); + WARN_ON(kdbus_name_owner_is_used(owner)); + --owner->conn->name_count; + list_del(&owner->conn_entry); + kfree(owner); } -static void kdbus_name_entry_set_owner(struct kdbus_name_entry *e, - struct kdbus_conn *conn, u64 flags) +static struct kdbus_name_owner * +kdbus_name_owner_find(struct kdbus_name_entry *name, struct kdbus_conn *conn) { - WARN_ON(e->conn); + struct kdbus_name_owner *owner; - e->conn = kdbus_conn_ref(conn); - e->flags = flags; - atomic_inc(&conn->name_count); - list_add_tail(&e->conn_entry, &e->conn->names_list); + /* + * Use conn->names_list over name->queue to make sure boundaries of + * this linear search are controlled by the connection itself. + * Furthermore, this will find normal owners as well as activators + * without any additional code. + */ + list_for_each_entry(owner, &conn->names_list, conn_entry) + if (owner->name == name) + return owner; + + return NULL; } -static void kdbus_name_entry_remove_owner(struct kdbus_name_entry *e) +static bool kdbus_name_entry_is_used(struct kdbus_name_entry *name) { - WARN_ON(!e->conn); - - list_del_init(&e->conn_entry); - atomic_dec(&e->conn->name_count); - e->flags = 0; - e->conn = kdbus_conn_unref(e->conn); + return !list_empty(&name->queue) || name->activator; } -static void kdbus_name_entry_replace_owner(struct kdbus_name_entry *e, - struct kdbus_conn *conn, u64 flags) +static struct kdbus_name_owner * +kdbus_name_entry_first(struct kdbus_name_entry *name) { - if (WARN_ON(!e->conn) || WARN_ON(conn == e->conn)) - return; - - kdbus_notify_name_change(conn->ep->bus, KDBUS_ITEM_NAME_CHANGE, - e->conn->id, conn->id, - e->flags, flags, e->name); - kdbus_name_entry_remove_owner(e); - kdbus_name_entry_set_owner(e, conn, flags); + return list_first_entry_or_null(&name->queue, struct kdbus_name_owner, + name_entry); } -/** - * kdbus_name_is_valid() - check if a name is valid - * @p: The name to check - * @allow_wildcard: Whether or not to allow a wildcard name - * - * A name is valid if all of the following criterias are met: - * - * - The name has two or more elements separated by a period ('.') character. - * - All elements must contain at least one character. - * - Each element must only contain the ASCII characters "[A-Z][a-z][0-9]_-" - * and must not begin with a digit. - * - The name must not exceed KDBUS_NAME_MAX_LEN. - * - If @allow_wildcard is true, the name may end on '.*' - */ -bool kdbus_name_is_valid(const char *p, bool allow_wildcard) +static struct kdbus_name_entry * +kdbus_name_entry_new(struct kdbus_name_registry *r, u32 hash, + const char *name_str) { - bool dot, found_dot = false; - const char *q; + struct kdbus_name_entry *name; + size_t namelen; - for (dot = true, q = p; *q; q++) { - if (*q == '.') { - if (dot) - return false; + lockdep_assert_held(&r->rwlock); - found_dot = true; - dot = true; - } else { - bool good; + namelen = strlen(name_str); - good = isalpha(*q) || (!dot && isdigit(*q)) || - *q == '_' || *q == '-' || - (allow_wildcard && dot && - *q == '*' && *(q + 1) == '\0'); + name = kmalloc(sizeof(*name) + namelen + 1, GFP_KERNEL); + if (!name) + return ERR_PTR(-ENOMEM); - if (!good) - return false; + name->name_id = ++r->name_seq_last; + name->activator = NULL; + INIT_LIST_HEAD(&name->queue); + hash_add(r->entries_hash, &name->hentry, hash); + memcpy(name->name, name_str, namelen + 1); - dot = false; - } - } + return name; +} - if (q - p > KDBUS_NAME_MAX_LEN) - return false; +static void kdbus_name_entry_free(struct kdbus_name_entry *name) +{ + if (!name) + return; - if (dot) - return false; + WARN_ON(kdbus_name_entry_is_used(name)); + hash_del(&name->hentry); + kfree(name); +} - if (!found_dot) - return false; +static struct kdbus_name_entry * +kdbus_name_entry_find(struct kdbus_name_registry *r, u32 hash, + const char *name_str) +{ + struct kdbus_name_entry *name; - return true; + lockdep_assert_held(&r->rwlock); + + hash_for_each_possible(r->entries_hash, name, hentry, hash) + if (!strcmp(name->name, name_str)) + return name; + + return NULL; } /** @@ -218,32 +179,19 @@ struct kdbus_name_registry *kdbus_name_registry_new(void) } /** - * kdbus_name_registry_free() - drop a name reg's reference - * @reg: The name registry, may be %NULL + * kdbus_name_registry_free() - free name registry + * @r: name registry to free, or NULL * - * Cleanup the name registry's internal structures. + * Free a name registry and cleanup all internal objects. This is a no-op if + * you pass NULL as registry. */ -void kdbus_name_registry_free(struct kdbus_name_registry *reg) +void kdbus_name_registry_free(struct kdbus_name_registry *r) { - if (!reg) + if (!r) return; - WARN_ON(!hash_empty(reg->entries_hash)); - kfree(reg); -} - -static struct kdbus_name_entry * -kdbus_name_find(struct kdbus_name_registry *reg, u32 hash, const char *name) -{ - struct kdbus_name_entry *e; - - lockdep_assert_held(®->rwlock); - - hash_for_each_possible(reg->entries_hash, e, hentry, hash) - if (strcmp(e->name, name) == 0) - return e; - - return NULL; + WARN_ON(!hash_empty(r->entries_hash)); + kfree(r); } /** @@ -260,169 +208,286 @@ kdbus_name_find(struct kdbus_name_registry *reg, u32 hash, const char *name) struct kdbus_name_entry * kdbus_name_lookup_unlocked(struct kdbus_name_registry *reg, const char *name) { - return kdbus_name_find(reg, kdbus_strhash(name), name); + return kdbus_name_entry_find(reg, kdbus_strhash(name), name); } -/** - * kdbus_name_acquire() - acquire a name - * @reg: The name registry - * @conn: The connection to pin this entry to - * @name: The name to acquire - * @flags: Acquisition flags (KDBUS_NAME_*) - * @return_flags: Pointer to return flags for the acquired name - * (KDBUS_NAME_*), may be %NULL - * - * Callers must ensure that @conn is either a privileged bus user or has - * sufficient privileges in the policy-db to own the well-known name @name. - * - * Return: 0 success, negative error number on failure. - */ -int kdbus_name_acquire(struct kdbus_name_registry *reg, - struct kdbus_conn *conn, const char *name, - u64 flags, u64 *return_flags) +static int kdbus_name_become_activator(struct kdbus_name_owner *owner, + u64 *return_flags) { - struct kdbus_name_entry *e; - u64 rflags = 0; - int ret = 0; - u32 hash; + if (kdbus_name_owner_is_used(owner)) + return -EALREADY; + if (owner->name->activator) + return -EEXIST; - kdbus_conn_assert_active(conn); + owner->name->activator = owner; + owner->flags |= KDBUS_NAME_ACTIVATOR; - down_write(®->rwlock); - - if (!kdbus_conn_policy_own_name(conn, current_cred(), name)) { - ret = -EPERM; - goto exit_unlock; + if (kdbus_name_entry_first(owner->name)) { + owner->flags |= KDBUS_NAME_IN_QUEUE; + } else { + owner->flags |= KDBUS_NAME_PRIMARY; + kdbus_notify_name_change(owner->conn->ep->bus, + KDBUS_ITEM_NAME_ADD, + 0, owner->conn->id, + 0, owner->flags, + owner->name->name); } - hash = kdbus_strhash(name); - e = kdbus_name_find(reg, hash, name); - if (!e) { - /* claim new name */ + if (return_flags) + *return_flags = owner->flags | KDBUS_NAME_ACQUIRED; - if (conn->activator_of) { - ret = -EINVAL; - goto exit_unlock; - } + return 0; +} - e = kdbus_name_entry_new(reg, hash, name); - if (IS_ERR(e)) { - ret = PTR_ERR(e); - goto exit_unlock; - } +static int kdbus_name_update(struct kdbus_name_owner *owner, u64 flags, + u64 *return_flags) +{ + struct kdbus_name_owner *primary, *activator; + struct kdbus_name_entry *name; + struct kdbus_bus *bus; + u64 nflags = 0; + int ret = 0; - if (kdbus_conn_is_activator(conn)) { - e->activator = kdbus_conn_ref(conn); - conn->activator_of = e; + name = owner->name; + bus = owner->conn->ep->bus; + primary = kdbus_name_entry_first(name); + activator = name->activator; + + /* cannot be activator and acquire a name */ + if (owner == activator) + return -EUCLEAN; + + /* update saved flags */ + owner->flags = flags & KDBUS_NAME_SAVED_MASK; + + if (!primary) { + /* + * No primary owner (but maybe an activator). Take over the + * name. + */ + + list_add(&owner->name_entry, &name->queue); + owner->flags |= KDBUS_NAME_PRIMARY; + nflags |= KDBUS_NAME_ACQUIRED; + + /* move messages to new owner on activation */ + if (activator) { + kdbus_conn_move_messages(owner->conn, activator->conn, + name->name_id); + kdbus_notify_name_change(bus, KDBUS_ITEM_NAME_CHANGE, + activator->conn->id, owner->conn->id, + activator->flags, owner->flags, + name->name); + activator->flags &= ~KDBUS_NAME_PRIMARY; + activator->flags |= KDBUS_NAME_IN_QUEUE; + } else { + kdbus_notify_name_change(bus, KDBUS_ITEM_NAME_ADD, + 0, owner->conn->id, + 0, owner->flags, + name->name); } - kdbus_name_entry_set_owner(e, conn, flags); - kdbus_notify_name_change(e->conn->ep->bus, KDBUS_ITEM_NAME_ADD, - 0, e->conn->id, 0, e->flags, e->name); - } else if (e->conn == conn || e == conn->activator_of) { - /* connection already owns that name */ - ret = -EALREADY; - } else if (kdbus_conn_is_activator(conn)) { - /* activator claims existing name */ - - if (conn->activator_of) { - ret = -EINVAL; /* multiple names not allowed */ - } else if (e->activator) { - ret = -EEXIST; /* only one activator per name */ + } else if (owner == primary) { + /* + * Already the primary owner of the name, flags were already + * updated. Nothing to do. + */ + + owner->flags |= KDBUS_NAME_PRIMARY; + + } else if ((primary->flags & KDBUS_NAME_ALLOW_REPLACEMENT) && + (flags & KDBUS_NAME_REPLACE_EXISTING)) { + /* + * We're not the primary owner but can replace it. Move us + * ahead of the primary owner and acquire the name (possibly + * skipping queued owners ahead of us). + */ + + list_del_init(&owner->name_entry); + list_add(&owner->name_entry, &name->queue); + owner->flags |= KDBUS_NAME_PRIMARY; + nflags |= KDBUS_NAME_ACQUIRED; + + kdbus_notify_name_change(bus, KDBUS_ITEM_NAME_CHANGE, + primary->conn->id, owner->conn->id, + primary->flags, owner->flags, + name->name); + + /* requeue old primary, or drop if queueing not wanted */ + if (primary->flags & KDBUS_NAME_QUEUE) { + primary->flags &= ~KDBUS_NAME_PRIMARY; + primary->flags |= KDBUS_NAME_IN_QUEUE; } else { - e->activator = kdbus_conn_ref(conn); - conn->activator_of = e; - } - } else if (e->flags & KDBUS_NAME_ACTIVATOR) { - /* claim name of an activator */ - - kdbus_conn_move_messages(conn, e->activator, 0); - kdbus_name_entry_replace_owner(e, conn, flags); - } else if ((flags & KDBUS_NAME_REPLACE_EXISTING) && - (e->flags & KDBUS_NAME_ALLOW_REPLACEMENT)) { - /* claim name of a previous owner */ - - if (e->flags & KDBUS_NAME_QUEUE) { - /* move owner back to queue if they asked for it */ - ret = kdbus_name_pending_new(e, e->conn, e->flags); - if (ret < 0) - goto exit_unlock; + list_del_init(&primary->name_entry); + kdbus_name_owner_free(primary); } - kdbus_name_entry_replace_owner(e, conn, flags); } else if (flags & KDBUS_NAME_QUEUE) { - /* add to waiting-queue of the name */ - - ret = kdbus_name_pending_new(e, conn, flags); - if (ret >= 0) - /* tell the caller that we queued it */ - rflags |= KDBUS_NAME_IN_QUEUE; + /* + * Name is already occupied and we cannot take it over, but + * queuing is allowed. Put us silently on the queue, if not + * already there. + */ + + owner->flags |= KDBUS_NAME_IN_QUEUE; + if (!kdbus_name_owner_is_used(owner)) { + list_add_tail(&owner->name_entry, &name->queue); + nflags |= KDBUS_NAME_ACQUIRED; + } + } else if (kdbus_name_owner_is_used(owner)) { + /* + * Already queued on name, but re-queueing was not requested. + * Make sure to unlink it from the name, the caller is + * responsible for releasing it. + */ + + list_del_init(&owner->name_entry); } else { - /* the name is busy, return a failure */ + /* + * Name is already claimed and queueing is not requested. + * Return error to the caller. + */ + ret = -EEXIST; } - if (ret == 0 && return_flags) - *return_flags = rflags; + if (return_flags) + *return_flags = owner->flags | nflags; -exit_unlock: + return ret; +} + +int kdbus_name_acquire(struct kdbus_name_registry *reg, + struct kdbus_conn *conn, const char *name_str, + u64 flags, u64 *return_flags) +{ + struct kdbus_name_entry *name = NULL; + struct kdbus_name_owner *owner = NULL; + u32 hash; + int ret; + + kdbus_conn_assert_active(conn); + + down_write(®->rwlock); + + /* + * Verify the connection has access to the name. Do this before testing + * for double-acquisitions and other errors to make sure we do not leak + * information about this name through possible custom endpoints. + */ + if (!kdbus_conn_policy_own_name(conn, current_cred(), name_str)) { + ret = -EPERM; + goto exit; + } + + /* + * Lookup the name entry. If it already exists, search for an owner + * entry as we might already own that name. If either does not exist, + * we will allocate a fresh one. + */ + hash = kdbus_strhash(name_str); + name = kdbus_name_entry_find(reg, hash, name_str); + if (name) { + owner = kdbus_name_owner_find(name, conn); + } else { + name = kdbus_name_entry_new(reg, hash, name_str); + if (IS_ERR(name)) { + ret = PTR_ERR(name); + name = NULL; + goto exit; + } + } + + /* create name owner object if not already queued */ + if (!owner) { + owner = kdbus_name_owner_new(conn, name, flags); + if (IS_ERR(owner)) { + ret = PTR_ERR(owner); + owner = NULL; + goto exit; + } + } + + if (flags & KDBUS_NAME_ACTIVATOR) + ret = kdbus_name_become_activator(owner, return_flags); + else + ret = kdbus_name_update(owner, flags, return_flags); + if (ret < 0) + goto exit; + +exit: + if (owner && !kdbus_name_owner_is_used(owner)) + kdbus_name_owner_free(owner); + if (name && !kdbus_name_entry_is_used(name)) + kdbus_name_entry_free(name); up_write(®->rwlock); kdbus_notify_flush(conn->ep->bus); return ret; } -static void kdbus_name_release_unlocked(struct kdbus_name_registry *reg, - struct kdbus_name_entry *e) +static void kdbus_name_release_unlocked(struct kdbus_name_owner *owner) { - struct kdbus_name_pending *p; - - lockdep_assert_held(®->rwlock); - - p = list_first_entry_or_null(&e->queue, struct kdbus_name_pending, - name_entry); - - if (p) { - /* give it to first active waiter in the queue */ - kdbus_name_entry_replace_owner(e, p->conn, p->flags); - kdbus_name_pending_free(p); - } else if (e->activator && e->activator != e->conn) { - /* hand it back to an active activator connection */ - kdbus_conn_move_messages(e->activator, e->conn, e->name_id); - kdbus_name_entry_replace_owner(e, e->activator, - KDBUS_NAME_ACTIVATOR); - } else { - /* release the name */ - kdbus_notify_name_change(e->conn->ep->bus, - KDBUS_ITEM_NAME_REMOVE, - e->conn->id, 0, e->flags, 0, e->name); - kdbus_name_entry_remove_owner(e); - kdbus_name_entry_free(e); + struct kdbus_name_owner *primary, *next; + struct kdbus_name_entry *name; + + name = owner->name; + primary = kdbus_name_entry_first(name); + + list_del_init(&owner->name_entry); + if (owner == name->activator) + name->activator = NULL; + + if (!primary || owner == primary) { + next = kdbus_name_entry_first(name); + if (!next) + next = name->activator; + + if (next) { + /* hand to next in queue */ + next->flags &= ~KDBUS_NAME_IN_QUEUE; + next->flags |= KDBUS_NAME_PRIMARY; + if (next == name->activator) + kdbus_conn_move_messages(next->conn, + owner->conn, + name->name_id); + + kdbus_notify_name_change(owner->conn->ep->bus, + KDBUS_ITEM_NAME_CHANGE, + owner->conn->id, next->conn->id, + owner->flags, next->flags, + name->name); + } else { + kdbus_notify_name_change(owner->conn->ep->bus, + KDBUS_ITEM_NAME_REMOVE, + owner->conn->id, 0, + owner->flags, 0, + name->name); + } } + + kdbus_name_owner_free(owner); + if (!kdbus_name_entry_is_used(name)) + kdbus_name_entry_free(name); } static int kdbus_name_release(struct kdbus_name_registry *reg, struct kdbus_conn *conn, - const char *name) + const char *name_str) { - struct kdbus_name_pending *p; - struct kdbus_name_entry *e; + struct kdbus_name_owner *owner; + struct kdbus_name_entry *name; int ret = 0; down_write(®->rwlock); - e = kdbus_name_find(reg, kdbus_strhash(name), name); - if (!e) { - ret = -ESRCH; - } else if (e->conn == conn) { - kdbus_name_release_unlocked(reg, e); + name = kdbus_name_entry_find(reg, kdbus_strhash(name_str), name_str); + if (name) { + owner = kdbus_name_owner_find(name, conn); + if (owner) + kdbus_name_release_unlocked(owner); + else + ret = -EADDRINUSE; } else { - ret = -EADDRINUSE; - list_for_each_entry(p, &e->queue, name_entry) { - if (p->conn == conn) { - kdbus_name_pending_free(p); - ret = 0; - break; - } - } + ret = -ESRCH; } up_write(®->rwlock); @@ -438,33 +503,74 @@ static int kdbus_name_release(struct kdbus_name_registry *reg, void kdbus_name_release_all(struct kdbus_name_registry *reg, struct kdbus_conn *conn) { - struct kdbus_name_pending *p; - struct kdbus_conn *activator = NULL; - struct kdbus_name_entry *e; + struct kdbus_name_owner *owner; down_write(®->rwlock); - if (conn->activator_of) { - activator = conn->activator_of->activator; - conn->activator_of->activator = NULL; - } - - while ((p = list_first_entry_or_null(&conn->names_queue_list, - struct kdbus_name_pending, - conn_entry))) - kdbus_name_pending_free(p); - while ((e = list_first_entry_or_null(&conn->names_list, - struct kdbus_name_entry, - conn_entry))) - kdbus_name_release_unlocked(reg, e); + while ((owner = list_first_entry_or_null(&conn->names_list, + struct kdbus_name_owner, + conn_entry))) + kdbus_name_release_unlocked(owner); up_write(®->rwlock); - kdbus_conn_unref(activator); kdbus_notify_flush(conn->ep->bus); } /** + * kdbus_name_is_valid() - check if a name is valid + * @p: The name to check + * @allow_wildcard: Whether or not to allow a wildcard name + * + * A name is valid if all of the following criterias are met: + * + * - The name has two or more elements separated by a period ('.') character. + * - All elements must contain at least one character. + * - Each element must only contain the ASCII characters "[A-Z][a-z][0-9]_-" + * and must not begin with a digit. + * - The name must not exceed KDBUS_NAME_MAX_LEN. + * - If @allow_wildcard is true, the name may end on '.*' + */ +bool kdbus_name_is_valid(const char *p, bool allow_wildcard) +{ + bool dot, found_dot = false; + const char *q; + + for (dot = true, q = p; *q; q++) { + if (*q == '.') { + if (dot) + return false; + + found_dot = true; + dot = true; + } else { + bool good; + + good = isalpha(*q) || (!dot && isdigit(*q)) || + *q == '_' || *q == '-' || + (allow_wildcard && dot && + *q == '*' && *(q + 1) == '\0'); + + if (!good) + return false; + + dot = false; + } + } + + if (q - p > KDBUS_NAME_MAX_LEN) + return false; + + if (dot) + return false; + + if (!found_dot) + return false; + + return true; +} + +/** * kdbus_cmd_name_acquire() - handle KDBUS_CMD_NAME_ACQUIRE * @conn: connection to operate on * @argp: command payload @@ -503,20 +609,9 @@ int kdbus_cmd_name_acquire(struct kdbus_conn *conn, void __user *argp) goto exit; } - /* - * Do atomic_inc_return here to reserve our slot, then decrement - * it before returning. - */ - if (atomic_inc_return(&conn->name_count) > KDBUS_CONN_MAX_NAMES) { - ret = -E2BIG; - goto exit_dec; - } - ret = kdbus_name_acquire(conn->ep->bus->name_registry, conn, item_name, cmd->flags, &cmd->return_flags); -exit_dec: - atomic_dec(&conn->name_count); exit: return kdbus_args_clear(&args, ret); } @@ -559,7 +654,7 @@ static int kdbus_list_write(struct kdbus_conn *conn, struct kdbus_conn *c, struct kdbus_pool_slice *slice, size_t *pos, - struct kdbus_name_entry *e, + struct kdbus_name_owner *o, bool write) { struct kvec kvec[4]; @@ -580,22 +675,22 @@ static int kdbus_list_write(struct kdbus_conn *conn, u64 flags; } h = {}; - if (e && !kdbus_conn_policy_see_name_unlocked(conn, current_cred(), - e->name)) + if (o && !kdbus_conn_policy_see_name_unlocked(conn, current_cred(), + o->name->name)) return 0; kdbus_kvec_set(&kvec[cnt++], &info, sizeof(info), &info.size); /* append name */ - if (e) { - size_t slen = strlen(e->name) + 1; + if (o) { + size_t slen = strlen(o->name->name) + 1; h.size = offsetof(struct kdbus_item, name.name) + slen; h.type = KDBUS_ITEM_OWNED_NAME; - h.flags = e->flags; + h.flags = o->flags; kdbus_kvec_set(&kvec[cnt++], &h, sizeof(h), &info.size); - kdbus_kvec_set(&kvec[cnt++], e->name, slen, &info.size); + kdbus_kvec_set(&kvec[cnt++], o->name->name, slen, &info.size); cnt += !!kdbus_kvec_pad(&kvec[cnt], &info.size); } @@ -625,63 +720,52 @@ static int kdbus_list_all(struct kdbus_conn *conn, u64 flags, if (kdbus_conn_is_monitor(c)) continue; - /* skip activators */ - if (!(flags & KDBUS_LIST_ACTIVATORS) && - kdbus_conn_is_activator(c)) - continue; - /* all names the connection owns */ - if (flags & (KDBUS_LIST_NAMES | KDBUS_LIST_ACTIVATORS)) { - struct kdbus_name_entry *e; + if (flags & (KDBUS_LIST_NAMES | + KDBUS_LIST_ACTIVATORS | + KDBUS_LIST_QUEUED)) { + struct kdbus_name_owner *o; - list_for_each_entry(e, &c->names_list, conn_entry) { - struct kdbus_conn *a = e->activator; + list_for_each_entry(o, &c->names_list, conn_entry) { + if (o->flags & KDBUS_NAME_ACTIVATOR) { + if (!(flags & KDBUS_LIST_ACTIVATORS)) + continue; - if ((flags & KDBUS_LIST_ACTIVATORS) && - a && a != c) { - ret = kdbus_list_write(conn, a, slice, - &p, e, write); + ret = kdbus_list_write(conn, c, slice, + &p, o, write); if (ret < 0) { mutex_unlock(&c->lock); return ret; } added = true; - } + } else if (o->flags & KDBUS_NAME_IN_QUEUE) { + if (!(flags & KDBUS_LIST_QUEUED)) + continue; - if (flags & KDBUS_LIST_NAMES || - kdbus_conn_is_activator(c)) { ret = kdbus_list_write(conn, c, slice, - &p, e, write); + &p, o, write); if (ret < 0) { mutex_unlock(&c->lock); return ret; } added = true; - } - } - } + } else if (flags & KDBUS_LIST_NAMES) { + ret = kdbus_list_write(conn, c, slice, + &p, o, write); + if (ret < 0) { + mutex_unlock(&c->lock); + return ret; + } - /* queue of names the connection is currently waiting for */ - if (flags & KDBUS_LIST_QUEUED) { - struct kdbus_name_pending *q; - - list_for_each_entry(q, &c->names_queue_list, - conn_entry) { - ret = kdbus_list_write(conn, c, slice, &p, - q->name, write); - if (ret < 0) { - mutex_unlock(&c->lock); - return ret; + added = true; } - - added = true; } } /* nothing added so far, just add the unique ID */ - if (!added && flags & KDBUS_LIST_UNIQUE) { + if (!added && (flags & KDBUS_LIST_UNIQUE)) { ret = kdbus_list_write(conn, c, slice, &p, NULL, write); if (ret < 0) return ret; diff --git a/ipc/kdbus/names.h b/ipc/kdbus/names.h index 3dd258929..edac59ddd 100644 --- a/ipc/kdbus/names.h +++ b/ipc/kdbus/names.h @@ -18,6 +18,10 @@ #include <linux/hashtable.h> #include <linux/rwsem.h> +struct kdbus_name_entry; +struct kdbus_name_owner; +struct kdbus_name_registry; + /** * struct kdbus_name_registry - names registered for a bus * @entries_hash: Map of entries @@ -32,27 +36,37 @@ struct kdbus_name_registry { /** * struct kdbus_name_entry - well-know name entry - * @name_id: Sequence number of name entry to be able to uniquely + * @name_id: sequence number of name entry to be able to uniquely * identify a name over its registration lifetime - * @flags: KDBUS_NAME_* flags - * @conn: Connection owning the name - * @activator: Connection of the activator queuing incoming messages - * @queue: List of queued connections - * @conn_entry: Entry in connection - * @hentry: Entry in registry map - * @name: The well-known name + * @activator: activator of this name, or NULL + * @queue: list of queued owners + * @hentry: entry in registry map + * @name: well-known name */ struct kdbus_name_entry { u64 name_id; - u64 flags; - struct kdbus_conn *conn; - struct kdbus_conn *activator; + struct kdbus_name_owner *activator; struct list_head queue; - struct list_head conn_entry; struct hlist_node hentry; char name[]; }; +/** + * struct kdbus_name_owner - owner of a well-known name + * @flags: KDBUS_NAME_* flags of this owner + * @conn: connection owning the name + * @name: name that is owned + * @conn_entry: link into @conn + * @name_entry: link into @name + */ +struct kdbus_name_owner { + u64 flags; + struct kdbus_conn *conn; + struct kdbus_name_entry *name; + struct list_head conn_entry; + struct list_head name_entry; +}; + bool kdbus_name_is_valid(const char *p, bool allow_wildcard); struct kdbus_name_registry *kdbus_name_registry_new(void); @@ -71,4 +85,21 @@ int kdbus_cmd_name_acquire(struct kdbus_conn *conn, void __user *argp); int kdbus_cmd_name_release(struct kdbus_conn *conn, void __user *argp); int kdbus_cmd_list(struct kdbus_conn *conn, void __user *argp); +/** + * kdbus_name_get_owner() - get current owner of a name + * @name: name to get current owner of + * + * This returns a pointer to the current owner of a name (or its activator if + * there is no owner). The caller must make sure @name is valid and does not + * vanish. + * + * Return: Pointer to current owner or NULL if there is none. + */ +static inline struct kdbus_name_owner * +kdbus_name_get_owner(struct kdbus_name_entry *name) +{ + return list_first_entry_or_null(&name->queue, struct kdbus_name_owner, + name_entry) ? : name->activator; +} + #endif diff --git a/ipc/kdbus/node.c b/ipc/kdbus/node.c index 89f58bc85..986aca39c 100644 --- a/ipc/kdbus/node.c +++ b/ipc/kdbus/node.c @@ -111,33 +111,27 @@ * * All nodes can be deactivated via kdbus_node_deactivate() at any time. You can * call this multiple times, even in parallel or on nodes that were never - * linked, and it will just work. The only restriction is, you must not hold an - * active reference when calling kdbus_node_deactivate(). - * By deactivating a node, it is immediately marked inactive. Then, we wait for - * all active references to be released (called 'draining' the node). This - * shouldn't take very long as we don't perform long-lasting operations while - * holding an active reference. Note that once the node is marked inactive, no - * new active references can be acquired. - * Once all active references are dropped, the node is considered 'drained'. Now - * kdbus_node_deactivate() is called on each child of the node before we - * continue deactivating our node. That is, once all children are entirely - * deactivated, we call ->release_cb() of our node. ->release_cb() can release - * any resources on that node which are bound to the "active" state of a node. - * When done, we unlink the node from its parent rb-tree, mark it as - * 'released' and return. - * If kdbus_node_deactivate() is called multiple times (even in parallel), all - * but one caller will just wait until the node is fully deactivated. That is, - * one random caller of kdbus_node_deactivate() is selected to call - * ->release_cb() and cleanup the node. Only once all this is done, all other - * callers will return from kdbus_node_deactivate(). That is, it doesn't matter - * whether you're the selected caller or not, it will only return after - * everything is fully done. + * linked, and it will just work. Furthermore, all children will be deactivated + * recursively as well. If a node is deactivated, there might still be active + * references that were acquired before calling kdbus_node_deactivate(). The + * owner of an object must call kdbus_node_drain() (which is a superset of + * kdbus_node_deactivate()) before dropping their reference. This will + * deactivate the node and also synchronously wait for all active references to + * be dropped. Hence, once kdbus_node_drain() returns, the node is fully + * released and no active references exist, anymore. + * kdbus_node_drain() can be called at any times, multiple times, and in + * parallel on multiple threads. All calls are synchronized internally and will + * return only once the node is fully drained. The only restriction is, you + * must not hold an active reference when calling kdbus_node_drain() (unlike + * deactivation, which allows the caller to hold an active reference). * * When a node is activated, we acquire a normal object reference to the node. - * This reference is dropped after deactivation is fully done (and only iff the + * This reference is dropped after deactivation is fully done (and only if the * node really was activated). This allows callers to link+activate a child node - * and then drop all refs. The node will be deactivated together with the - * parent, and then be freed when this reference is dropped. + * and then drop all refs. This has the effect that nobody owns a reference to + * the node, except for the parent node. Hence, if the parent is deactivated + * (and thus all children are deactivated, too), this will automatically + * release the child node. * * Currently, nodes provide a bunch of resources that external code can use * directly. This includes: @@ -198,11 +192,10 @@ * is even unique if linked but not activated, yet. This might * change in the future, though. Code should not rely on this. * - * * node->lock: lock to protect node->children, node->rb, node->parent + * * node->lock: lock to protect node->children, node->rb, node->parent * * node->parent: Reference to parent node. This is set during LINK time - * and is dropped during destruction. You must not access - * it unless you hold an active reference to the node or if - * you know the node is dead. + * and is dropped during destruction. You can freely access + * this field, but it may be NULL (root node). * * node->children: rb-tree of all linked children of this node. You must * not access this directly, but use one of the iterator * or lookup helpers. @@ -265,6 +258,18 @@ static unsigned int kdbus_node_name_hash(const char *name) * @name: name to compare the node with * @node: node to compare the name with * + * This compares a query string against a kdbus node. If the kdbus node has the + * given name, this returns 0. Otherwise, this returns >0 / <0 depending + * whether the query string is greater / less than the node. + * + * Note: If @node is drained but has the name @name, this returns 1. The + * reason for this is that we treat drained nodes as "renamed". The + * slot of such nodes is no longer occupied and new nodes can claim it. + * Obviously, this has the side-effect that you cannot match drained + * nodes, as they will never return 0 on name-matches. But this is + * intentional, as there is no reason why anyone would ever want to match + * on drained nodes. + * * Return: 0 if @name and @hash exactly match the information in @node, or * an integer less than or greater than zero if @name is found, respectively, * to be less than or be greater than the string stored in @node. @@ -272,10 +277,16 @@ static unsigned int kdbus_node_name_hash(const char *name) static int kdbus_node_name_compare(unsigned int hash, const char *name, const struct kdbus_node *node) { + int ret; + if (hash != node->hash) return hash - node->hash; - return strcmp(name, node->name); + ret = strcmp(name, node->name); + if (ret != 0) + return ret; + + return atomic_read(&node->active) == KDBUS_NODE_DRAINED; } /** @@ -312,7 +323,7 @@ void kdbus_node_init(struct kdbus_node *node, unsigned int type) * ID could be allocated. You must not activate a node if linking failed! It is * safe to deactivate it, though. * - * Once you linked a node, you must call kdbus_node_deactivate() before you drop + * Once you linked a node, you must call kdbus_node_drain() before you drop * the last reference (even if you never activate the node). * * Return: 0 on success. negative error otherwise. @@ -419,7 +430,7 @@ struct kdbus_node *kdbus_node_ref(struct kdbus_node *node) * * Note that this calls into ->free_cb() and thus _might_ sleep. The ->free_cb() * callbacks must not acquire any outer locks, though. So you can safely drop - * references while holding locks. + * references while holding locks (apart from node->parent->lock). * * If @node is NULL, this is a no-op. * @@ -431,7 +442,16 @@ struct kdbus_node *kdbus_node_unref(struct kdbus_node *node) struct kdbus_node safe = *node; WARN_ON(atomic_read(&node->active) != KDBUS_NODE_DRAINED); - WARN_ON(!RB_EMPTY_NODE(&node->rb)); + + if (node->parent) { + mutex_lock(&node->parent->lock); + if (!RB_EMPTY_NODE(&node->rb)) { + rb_erase(&node->rb, + &node->parent->children); + RB_CLEAR_NODE(&node->rb); + } + mutex_unlock(&node->parent->lock); + } if (node->free_cb) node->free_cb(node); @@ -439,12 +459,6 @@ struct kdbus_node *kdbus_node_unref(struct kdbus_node *node) ida_simple_remove(&kdbus_node_ida, safe.id); kfree(safe.name); - - /* - * kdbusfs relies on the parent to be available even after the - * node was deactivated and unlinked. Therefore, we pin it - * until a node is destroyed. - */ kdbus_node_unref(safe.parent); } @@ -534,78 +548,149 @@ bool kdbus_node_activate(struct kdbus_node *node) } /** - * kdbus_node_deactivate() - deactivate a node - * @node: The node to deactivate. + * kdbus_node_recurse_unlock() - advance iterator on a tree + * @start: node at which the iteration started + * @node: previously visited node * - * This function recursively deactivates this node and all its children. It - * returns only once all children and the node itself were recursively disabled - * (even if you call this function multiple times in parallel). + * This helper advances an iterator by one, when traversing a node tree. It is + * supposed to be used like this: * - * It is safe to call this function on _any_ node that was initialized _any_ - * number of times. + * struct kdbus_node *n; * - * This call may sleep, as it waits for all active references to be dropped. + * n = start; + * while (n) { + * mutex_lock(&n->lock); + * ... visit @n ... + * n = kdbus_node_recurse_unlock(start, n); + * } + * + * This helpers takes as input the start-node of the iteration and the current + * position. It returns a pointer to the next node to visit. The caller must + * hold a reference to @start during the whole iteration. Furthermore, @node + * must be locked when entering this helper. On return, the lock is released. + * + * The order of visit is pre-order traversal. + * + * If @node is deactivated before recursing its children, then it is guaranteed + * that all children will be visited. If @node is still active, new nodes might + * be inserted during traversal, and thus might be missed. + * + * Also note that the node-locks are released while traversing children. You + * must not rely on the locks to be held during the whole traversal. Each node + * that is visited is pinned by this helper, so the caller can rely on owning a + * reference. It is dropped, once all of the children of the node have been + * visited (recursively). + * + * You *must not* bail out of a traversal early, otherwise you'll leak + * ref-counts to all nodes in the current depth-path. + * + * Return: Reference to next node, or NULL. */ -void kdbus_node_deactivate(struct kdbus_node *node) +static struct kdbus_node *kdbus_node_recurse_unlock(struct kdbus_node *start, + struct kdbus_node *node) { - struct kdbus_node *pos, *child; + struct kdbus_node *t, *prev = NULL; struct rb_node *rb; - int v_pre, v_post; - pos = node; + lockdep_assert_held(&node->lock); - /* - * To avoid recursion, we perform back-tracking while deactivating - * nodes. For each node we enter, we first mark the active-counter as - * deactivated by adding BIAS. If the node as children, we set the first - * child as current position and start over. If the node has no - * children, we drain the node by waiting for all active refs to be - * dropped and then releasing the node. - * - * After the node is released, we set its parent as current position - * and start over. If the current position was the initial node, we're - * done. - * - * Note that this function can be called in parallel by multiple - * callers. We make sure that each node is only released once, and any - * racing caller will wait until the other thread fully released that - * node. - */ + rb = rb_first(&node->children); + if (!rb) { + do { + mutex_unlock(&node->lock); + kdbus_node_unref(prev); + + if (node == start) + return NULL; + + prev = node; + node = node->parent; + + mutex_lock(&node->lock); + rb = rb_next(&prev->rb); + } while (!rb); + } + + t = kdbus_node_ref(kdbus_node_from_rb(rb)); + mutex_unlock(&node->lock); + kdbus_node_unref(prev); + return t; +} + +/** + * kdbus_node_deactivate() - deactivate a node + * @node: node to deactivate + * + * This recursively deactivates the passed node and all its children. The nodes + * are marked as deactivated, but they're not drained. Hence, even after this + * call returns, there might still be someone holding an active reference to + * any of the nodes. However, no new active references can be acquired after + * this returns. + * + * It is safe to call this multiple times (even in parallel). Each call is + * guaranteed to only return after _all_ nodes have been deactivated. + */ +void kdbus_node_deactivate(struct kdbus_node *node) +{ + struct kdbus_node *pos; + int v; + + pos = node; + while (pos) { + mutex_lock(&pos->lock); - for (;;) { /* - * Add BIAS to node->active to mark it as inactive. If it was + * Add BIAS to pos->active to mark it as inactive. If it was * never active before, immediately mark it as RELEASE_INACTIVE - * so we remember this state. - * We cannot remember v_pre as we might iterate into the - * children, overwriting v_pre, before we can release our node. + * so that this case can be detected later on. + * If the node was already deactivated, make sure to still + * recurse into the children. Otherwise, we might return before + * a racing thread finished deactivating all children. But we + * want to guarantee that the whole tree is deactivated once + * this returns. */ - mutex_lock(&pos->lock); - v_pre = atomic_read(&pos->active); - if (v_pre >= 0) + v = atomic_read(&pos->active); + if (v >= 0) atomic_add_return(KDBUS_NODE_BIAS, &pos->active); - else if (v_pre == KDBUS_NODE_NEW) + else if (v == KDBUS_NODE_NEW) atomic_set(&pos->active, KDBUS_NODE_RELEASE_DIRECT); - mutex_unlock(&pos->lock); + pos = kdbus_node_recurse_unlock(node, pos); + } +} + +/** + * kdbus_node_drain() - drain a node + * @node: node to drain + * + * This function recursively deactivates this node and all its children and + * then waits for all active references to be dropped. This function is a + * superset of kdbus_node_deactivate(), as it additionally drains all nodes. It + * returns only once all children and the node itself were recursively drained + * (even if you call this function multiple times in parallel). + * + * It is safe to call this function on _any_ node that was initialized _any_ + * number of times. + * + * This call may sleep, as it waits for all active references to be dropped. + */ +void kdbus_node_drain(struct kdbus_node *node) +{ + struct kdbus_node *pos; + int v; + + kdbus_node_deactivate(node); + + pos = node; + while (pos) { /* wait until all active references were dropped */ wait_event(pos->waitq, atomic_read(&pos->active) <= KDBUS_NODE_BIAS); - mutex_lock(&pos->lock); - /* recurse into first child if any */ - rb = rb_first(&pos->children); - if (rb) { - child = kdbus_node_ref(kdbus_node_from_rb(rb)); - mutex_unlock(&pos->lock); - pos = child; - continue; - } - /* mark object as RELEASE */ - v_post = atomic_read(&pos->active); - if (v_post == KDBUS_NODE_BIAS || - v_post == KDBUS_NODE_RELEASE_DIRECT) + mutex_lock(&pos->lock); + v = atomic_read(&pos->active); + if (v == KDBUS_NODE_BIAS || v == KDBUS_NODE_RELEASE_DIRECT) atomic_set(&pos->active, KDBUS_NODE_RELEASE); mutex_unlock(&pos->lock); @@ -614,20 +699,9 @@ void kdbus_node_deactivate(struct kdbus_node *node) * perform the actual release. Otherwise, we wait until the * release is done and the node is marked as DRAINED. */ - if (v_post == KDBUS_NODE_BIAS || - v_post == KDBUS_NODE_RELEASE_DIRECT) { + if (v == KDBUS_NODE_BIAS || v == KDBUS_NODE_RELEASE_DIRECT) { if (pos->release_cb) - pos->release_cb(pos, v_post == KDBUS_NODE_BIAS); - - if (pos->parent) { - mutex_lock(&pos->parent->lock); - if (!RB_EMPTY_NODE(&pos->rb)) { - rb_erase(&pos->rb, - &pos->parent->children); - RB_CLEAR_NODE(&pos->rb); - } - mutex_unlock(&pos->parent->lock); - } + pos->release_cb(pos, v == KDBUS_NODE_BIAS); /* mark as DRAINED */ atomic_set(&pos->active, KDBUS_NODE_DRAINED); @@ -646,7 +720,7 @@ void kdbus_node_deactivate(struct kdbus_node *node) * immediately went to NODE_RELEASE_DIRECT. In that case * we must not drop the reference. */ - if (v_post == KDBUS_NODE_BIAS) + if (v == KDBUS_NODE_BIAS) kdbus_node_unref(pos); } else { /* wait until object is DRAINED */ @@ -654,19 +728,8 @@ void kdbus_node_deactivate(struct kdbus_node *node) atomic_read(&pos->active) == KDBUS_NODE_DRAINED); } - /* - * We're done with the current node. Continue on its parent - * again, which will try deactivating its next child, or itself - * if no child is left. - * If we've reached our initial node again, we are done and - * can safely return. - */ - if (pos == node) - break; - - child = pos; - pos = pos->parent; - kdbus_node_unref(child); + mutex_lock(&pos->lock); + pos = kdbus_node_recurse_unlock(node, pos); } } @@ -859,18 +922,6 @@ struct kdbus_node *kdbus_node_next_child(struct kdbus_node *node, pos = kdbus_node_from_rb(rb); if (kdbus_node_acquire(pos)) goto exit; - } else if (RB_EMPTY_NODE(&prev->rb)) { - /* - * The current iterator is no longer linked to the rb-tree. Use - * its hash value and name to find the next _higher_ node and - * acquire it. If we got it, return it as next element. - * Otherwise, the loop below will find the next active node. - */ - pos = node_find_closest_unlocked(node, prev->hash, prev->name); - if (!pos) - goto exit; - if (kdbus_node_acquire(pos)) - goto exit; } else { /* * The current iterator is still linked to the parent. Set it diff --git a/ipc/kdbus/node.h b/ipc/kdbus/node.h index 970e02b08..16c6fd574 100644 --- a/ipc/kdbus/node.h +++ b/ipc/kdbus/node.h @@ -32,6 +32,7 @@ typedef void (*kdbus_node_free_t) (struct kdbus_node *node); typedef void (*kdbus_node_release_t) (struct kdbus_node *node, bool was_active); struct kdbus_node { + struct mutex lock; atomic_t refcnt; atomic_t active; wait_queue_head_t waitq; @@ -49,10 +50,9 @@ struct kdbus_node { unsigned int hash; unsigned int id; struct kdbus_node *parent; /* may be NULL */ - - /* valid iff active */ - struct mutex lock; struct rb_node rb; + + /* dynamic list of children */ struct rb_root children; }; @@ -72,6 +72,7 @@ bool kdbus_node_is_active(struct kdbus_node *node); bool kdbus_node_is_deactivated(struct kdbus_node *node); bool kdbus_node_activate(struct kdbus_node *node); void kdbus_node_deactivate(struct kdbus_node *node); +void kdbus_node_drain(struct kdbus_node *node); bool kdbus_node_acquire(struct kdbus_node *node); void kdbus_node_release(struct kdbus_node *node); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index c3fc5c2b6..161a1807e 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -47,8 +47,7 @@ #define RECV 1 #define STATE_NONE 0 -#define STATE_PENDING 1 -#define STATE_READY 2 +#define STATE_READY 1 struct posix_msg_tree_node { struct rb_node rb_node; @@ -568,15 +567,12 @@ static int wq_sleep(struct mqueue_inode_info *info, int sr, wq_add(info, sr, ewp); for (;;) { - set_current_state(TASK_INTERRUPTIBLE); + __set_current_state(TASK_INTERRUPTIBLE); spin_unlock(&info->lock); time = schedule_hrtimeout_range_clock(timeout, 0, HRTIMER_MODE_ABS, CLOCK_REALTIME); - while (ewp->state == STATE_PENDING) - cpu_relax(); - if (ewp->state == STATE_READY) { retval = 0; goto out; @@ -904,11 +900,15 @@ out_name: * list of waiting receivers. A sender checks that list before adding the new * message into the message array. If there is a waiting receiver, then it * bypasses the message array and directly hands the message over to the - * receiver. - * The receiver accepts the message and returns without grabbing the queue - * spinlock. Therefore an intermediate STATE_PENDING state and memory barriers - * are necessary. The same algorithm is used for sysv semaphores, see - * ipc/sem.c for more details. + * receiver. The receiver accepts the message and returns without grabbing the + * queue spinlock: + * + * - Set pointer to message. + * - Queue the receiver task for later wakeup (without the info->lock). + * - Update its state to STATE_READY. Now the receiver can continue. + * - Wake up the process after the lock is dropped. Should the process wake up + * before this wakeup (due to a timeout or a signal) it will either see + * STATE_READY and continue or acquire the lock to check the state again. * * The same algorithm is used for senders. */ @@ -916,21 +916,29 @@ out_name: /* pipelined_send() - send a message directly to the task waiting in * sys_mq_timedreceive() (without inserting message into a queue). */ -static inline void pipelined_send(struct mqueue_inode_info *info, +static inline void pipelined_send(struct wake_q_head *wake_q, + struct mqueue_inode_info *info, struct msg_msg *message, struct ext_wait_queue *receiver) { receiver->msg = message; list_del(&receiver->list); - receiver->state = STATE_PENDING; - wake_up_process(receiver->task); - smp_wmb(); + wake_q_add(wake_q, receiver->task); + /* + * Rely on the implicit cmpxchg barrier from wake_q_add such + * that we can ensure that updating receiver->state is the last + * write operation: As once set, the receiver can continue, + * and if we don't have the reference count from the wake_q, + * yet, at that point we can later have a use-after-free + * condition and bogus wakeup. + */ receiver->state = STATE_READY; } /* pipelined_receive() - if there is task waiting in sys_mq_timedsend() * gets its message and put to the queue (we have one free place for sure). */ -static inline void pipelined_receive(struct mqueue_inode_info *info) +static inline void pipelined_receive(struct wake_q_head *wake_q, + struct mqueue_inode_info *info) { struct ext_wait_queue *sender = wq_get_first_waiter(info, SEND); @@ -941,10 +949,9 @@ static inline void pipelined_receive(struct mqueue_inode_info *info) } if (msg_insert(sender->msg, info)) return; + list_del(&sender->list); - sender->state = STATE_PENDING; - wake_up_process(sender->task); - smp_wmb(); + wake_q_add(wake_q, sender->task); sender->state = STATE_READY; } @@ -962,6 +969,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, struct timespec ts; struct posix_msg_tree_node *new_leaf = NULL; int ret = 0; + WAKE_Q(wake_q); if (u_abs_timeout) { int res = prepare_timeout(u_abs_timeout, &expires, &ts); @@ -1045,7 +1053,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, } else { receiver = wq_get_first_waiter(info, RECV); if (receiver) { - pipelined_send(info, msg_ptr, receiver); + pipelined_send(&wake_q, info, msg_ptr, receiver); } else { /* adds message to the queue */ ret = msg_insert(msg_ptr, info); @@ -1058,6 +1066,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, } out_unlock: spin_unlock(&info->lock); + wake_up_q(&wake_q); out_free: if (ret) free_msg(msg_ptr); @@ -1144,14 +1153,17 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, msg_ptr = wait.msg; } } else { + WAKE_Q(wake_q); + msg_ptr = msg_get(info); inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; /* There is now free space in queue. */ - pipelined_receive(info); + pipelined_receive(&wake_q, info); spin_unlock(&info->lock); + wake_up_q(&wake_q); ret = 0; } if (ret == 0) { @@ -76,7 +76,7 @@ struct msg_sender { static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id) { - struct kern_ipc_perm *ipcp = ipc_obtain_object(&msg_ids(ns), id); + struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&msg_ids(ns), id); if (IS_ERR(ipcp)) return ERR_CAST(ipcp); @@ -196,7 +196,7 @@ static void expunge_all(struct msg_queue *msq, int res) * or dealing with -EAGAIN cases. See lockless receive part 1 * and 2 in do_msgrcv(). */ - smp_mb(); + smp_wmb(); /* barrier (B) */ msr->r_msg = ERR_PTR(res); } } @@ -580,7 +580,8 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) /* initialize pipelined send ordering */ msr->r_msg = NULL; wake_up_process(msr->r_tsk); - smp_mb(); /* see barrier comment below */ + /* barrier (B) see barrier comment below */ + smp_wmb(); msr->r_msg = ERR_PTR(-E2BIG); } else { msr->r_msg = NULL; @@ -589,11 +590,12 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) wake_up_process(msr->r_tsk); /* * Ensure that the wakeup is visible before - * setting r_msg, as the receiving end depends - * on it. See lockless receive part 1 and 2 in - * do_msgrcv(). + * setting r_msg, as the receiving can otherwise + * exit - once r_msg is set, the receiver can + * continue. See lockless receive part 1 and 2 + * in do_msgrcv(). Barrier (B). */ - smp_mb(); + smp_wmb(); msr->r_msg = msg; return 1; @@ -932,12 +934,38 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl /* Lockless receive, part 2: * Wait until pipelined_send or expunge_all are outside of * wake_up_process(). There is a race with exit(), see - * ipc/mqueue.c for the details. + * ipc/mqueue.c for the details. The correct serialization + * ensures that a receiver cannot continue without the wakeup + * being visibible _before_ setting r_msg: + * + * CPU 0 CPU 1 + * <loop receiver> + * smp_rmb(); (A) <-- pair -. <waker thread> + * <load ->r_msg> | msr->r_msg = NULL; + * | wake_up_process(); + * <continue> `------> smp_wmb(); (B) + * msr->r_msg = msg; + * + * Where (A) orders the message value read and where (B) orders + * the write to the r_msg -- done in both pipelined_send and + * expunge_all. */ - msg = (struct msg_msg *)msr_d.r_msg; - while (msg == NULL) { - cpu_relax(); + for (;;) { + /* + * Pairs with writer barrier in pipelined_send + * or expunge_all. + */ + smp_rmb(); /* barrier (A) */ msg = (struct msg_msg *)msr_d.r_msg; + if (msg) + break; + + /* + * The cpu_relax() call is a compiler barrier + * which forces everything in this loop to be + * re-loaded. + */ + cpu_relax(); } /* Lockless receive, part 3: @@ -253,6 +253,16 @@ static void sem_rcu_free(struct rcu_head *head) } /* + * spin_unlock_wait() and !spin_is_locked() are not memory barriers, they + * are only control barriers. + * The code must pair with spin_unlock(&sem->lock) or + * spin_unlock(&sem_perm.lock), thus just the control barrier is insufficient. + * + * smp_rmb() is sufficient, as writes cannot pass the control barrier. + */ +#define ipc_smp_acquire__after_spin_is_unlocked() smp_rmb() + +/* * Wait until all currently ongoing simple ops have completed. * Caller must own sem_perm.lock. * New simple ops cannot start, because simple ops first check @@ -275,6 +285,7 @@ static void sem_wait_array(struct sem_array *sma) sem = sma->sem_base + i; spin_unlock_wait(&sem->lock); } + ipc_smp_acquire__after_spin_is_unlocked(); } /* @@ -327,13 +338,12 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, /* Then check that the global lock is free */ if (!spin_is_locked(&sma->sem_perm.lock)) { /* - * The ipc object lock check must be visible on all - * cores before rechecking the complex count. Otherwise - * we can race with another thread that does: + * We need a memory barrier with acquire semantics, + * otherwise we can race with another thread that does: * complex_count++; * spin_unlock(sem_perm.lock); */ - smp_rmb(); + ipc_smp_acquire__after_spin_is_unlocked(); /* * Now repeat the test of complex_count: @@ -391,7 +401,7 @@ static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp; struct sem_array *sma; - ipcp = ipc_obtain_object(&sem_ids(ns), id); + ipcp = ipc_obtain_object_idr(&sem_ids(ns), id); if (IS_ERR(ipcp)) return ERR_CAST(ipcp); @@ -410,7 +420,7 @@ static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns, static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) { - struct kern_ipc_perm *ipcp = ipc_obtain_object(&sem_ids(ns), id); + struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id); if (IS_ERR(ipcp)) return ERR_CAST(ipcp); @@ -2074,17 +2084,28 @@ void exit_sem(struct task_struct *tsk) rcu_read_lock(); un = list_entry_rcu(ulp->list_proc.next, struct sem_undo, list_proc); - if (&un->list_proc == &ulp->list_proc) - semid = -1; - else - semid = un->semid; + if (&un->list_proc == &ulp->list_proc) { + /* + * We must wait for freeary() before freeing this ulp, + * in case we raced with last sem_undo. There is a small + * possibility where we exit while freeary() didn't + * finish unlocking sem_undo_list. + */ + spin_unlock_wait(&ulp->lock); + rcu_read_unlock(); + break; + } + spin_lock(&ulp->lock); + semid = un->semid; + spin_unlock(&ulp->lock); + /* exit_sem raced with IPC_RMID, nothing to do */ if (semid == -1) { rcu_read_unlock(); - break; + continue; } - sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, un->semid); + sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid); /* exit_sem raced with IPC_RMID, nothing to do */ if (IS_ERR(sma)) { rcu_read_unlock(); @@ -2112,9 +2133,11 @@ void exit_sem(struct task_struct *tsk) ipc_assert_locked_object(&sma->sem_perm); list_del(&un->list_id); - spin_lock(&ulp->lock); + /* we are the last process using this ulp, acquiring ulp->lock + * isn't required. Besides that, we are also protected against + * IPC_RMID as we hold sma->sem_perm lock now + */ list_del_rcu(&un->list_proc); - spin_unlock(&ulp->lock); /* perform adjustments registered in un */ for (i = 0; i < sma->sem_nsems; i++) { @@ -129,7 +129,7 @@ void __init shm_init(void) static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id) { - struct kern_ipc_perm *ipcp = ipc_obtain_object(&shm_ids(ns), id); + struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&shm_ids(ns), id); if (IS_ERR(ipcp)) return ERR_CAST(ipcp); @@ -155,8 +155,11 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) { struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id); - if (IS_ERR(ipcp)) - return (struct shmid_kernel *)ipcp; + /* + * We raced in the idr lookup or with shm_destroy(). Either way, the + * ID is busted. + */ + BUG_ON(IS_ERR(ipcp)); return container_of(ipcp, struct shmid_kernel, shm_perm); } @@ -191,7 +194,6 @@ static void shm_open(struct vm_area_struct *vma) struct shmid_kernel *shp; shp = shm_lock(sfd->ns, sfd->id); - BUG_ON(IS_ERR(shp)); shp->shm_atim = get_seconds(); shp->shm_lprid = task_tgid_vnr(current); shp->shm_nattch++; @@ -258,7 +260,6 @@ static void shm_close(struct vm_area_struct *vma) down_write(&shm_ids(ns).rwsem); /* remove from the list of attaches of the shm segment */ shp = shm_lock(ns, sfd->id); - BUG_ON(IS_ERR(shp)); shp->shm_lprid = task_tgid_vnr(current); shp->shm_dtim = get_seconds(); shp->shm_nattch--; @@ -544,7 +545,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) if ((shmflg & SHM_NORESERVE) && sysctl_overcommit_memory != OVERCOMMIT_NEVER) acctflag = VM_NORESERVE; - file = shmem_file_setup(name, size, acctflag, 0); + file = shmem_kernel_file_setup(name, size, acctflag); } error = PTR_ERR(file); if (IS_ERR(file)) @@ -1191,7 +1192,6 @@ out_fput: out_nattch: down_write(&shm_ids(ns).rwsem); shp = shm_lock(ns, shmid); - BUG_ON(IS_ERR(shp)); shp->shm_nattch--; if (shm_may_destroy(ns, shp)) shm_destroy(ns, shp); diff --git a/ipc/util.c b/ipc/util.c index ff3323ef8..be4230020 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -467,10 +467,7 @@ void ipc_rcu_free(struct rcu_head *head) { struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); - if (is_vmalloc_addr(p)) - vfree(p); - else - kfree(p); + kvfree(p); } /** @@ -558,7 +555,7 @@ void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out) * Call inside the RCU critical section. * The ipc object is *not* locked on exit. */ -struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id) +struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id) { struct kern_ipc_perm *out; int lid = ipcid_to_idx(id); @@ -584,21 +581,24 @@ struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id) struct kern_ipc_perm *out; rcu_read_lock(); - out = ipc_obtain_object(ids, id); + out = ipc_obtain_object_idr(ids, id); if (IS_ERR(out)) - goto err1; + goto err; spin_lock(&out->lock); - /* ipc_rmid() may have already freed the ID while ipc_lock - * was spinning: here verify that the structure is still valid + /* + * ipc_rmid() may have already freed the ID while ipc_lock() + * was spinning: here verify that the structure is still valid. + * Upon races with RMID, return -EIDRM, thus indicating that + * the ID points to a removed identifier. */ if (ipc_valid_object(out)) return out; spin_unlock(&out->lock); - out = ERR_PTR(-EINVAL); -err1: + out = ERR_PTR(-EIDRM); +err: rcu_read_unlock(); return out; } @@ -608,7 +608,7 @@ err1: * @ids: ipc identifier set * @id: ipc id to look for * - * Similar to ipc_obtain_object() but also checks + * Similar to ipc_obtain_object_idr() but also checks * the ipc object reference counter. * * Call inside the RCU critical section. @@ -616,13 +616,13 @@ err1: */ struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id) { - struct kern_ipc_perm *out = ipc_obtain_object(ids, id); + struct kern_ipc_perm *out = ipc_obtain_object_idr(ids, id); if (IS_ERR(out)) goto out; if (ipc_checkid(out, id)) - return ERR_PTR(-EIDRM); + return ERR_PTR(-EINVAL); out: return out; } diff --git a/ipc/util.h b/ipc/util.h index 1a5a0fcd0..3a8a5a0ec 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -132,7 +132,7 @@ void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head)); void ipc_rcu_free(struct rcu_head *head); struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); -struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id); +struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id); void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out); void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out); |