/* * Copyright (C) 2013-2015 Kay Sievers * Copyright (C) 2013-2015 Greg Kroah-Hartman * Copyright (C) 2013-2015 Daniel Mack * Copyright (C) 2013-2015 David Herrmann * Copyright (C) 2013-2015 Linux Foundation * Copyright (C) 2014-2015 Djalal Harouni * * kdbus is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the * Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pool.h" #include "util.h" /** * struct kdbus_pool - the receiver's buffer * @f: The backing shmem file * @size: The size of the file * @accounted_size: Currently accounted memory in bytes * @lock: Pool data lock * @slices: All slices sorted by address * @slices_busy: Tree of allocated slices * @slices_free: Tree of free slices * * The receiver's buffer, managed as a pool of allocated and free * slices containing the queued messages. * * Messages sent with KDBUS_CMD_SEND are copied directly by the * sending process into the receiver's pool. * * Messages received with KDBUS_CMD_RECV just return the offset * to the data placed in the pool. * * The internally allocated memory needs to be returned by the receiver * with KDBUS_CMD_FREE. */ struct kdbus_pool { struct file *f; size_t size; size_t accounted_size; struct mutex lock; struct list_head slices; struct rb_root slices_busy; struct rb_root slices_free; }; /** * struct kdbus_pool_slice - allocated element in kdbus_pool * @pool: Pool this slice belongs to * @off: Offset of slice in the shmem file * @size: Size of slice * @entry: Entry in "all slices" list * @rb_node: Entry in free or busy list * @free: Unused slice * @accounted: Accounted as queue slice * @ref_kernel: Kernel holds a reference * @ref_user: Userspace holds a reference * * The pool has one or more slices, always spanning the entire size of the * pool. * * Every slice is an element in a list sorted by the buffer address, to * provide access to the next neighbor slice. * * Every slice is member in either the busy or the free tree. The free * tree is organized by slice size, the busy tree organized by buffer * offset. */ struct kdbus_pool_slice { struct kdbus_pool *pool; size_t off; size_t size; struct list_head entry; struct rb_node rb_node; bool free:1; bool accounted:1; bool ref_kernel:1; bool ref_user:1; }; static struct kdbus_pool_slice *kdbus_pool_slice_new(struct kdbus_pool *pool, size_t off, size_t size) { struct kdbus_pool_slice *slice; slice = kzalloc(sizeof(*slice), GFP_KERNEL); if (!slice) return NULL; slice->pool = pool; slice->off = off; slice->size = size; slice->free = true; return slice; } /* insert a slice into the free tree */ static void kdbus_pool_add_free_slice(struct kdbus_pool *pool, struct kdbus_pool_slice *slice) { struct rb_node **n; struct rb_node *pn = NULL; n = &pool->slices_free.rb_node; while (*n) { struct kdbus_pool_slice *pslice; pn = *n; pslice = rb_entry(pn, struct kdbus_pool_slice, rb_node); if (slice->size < pslice->size) n = &pn->rb_left; else n = &pn->rb_right; } rb_link_node(&slice->rb_node, pn, n); rb_insert_color(&slice->rb_node, &pool->slices_free); } /* insert a slice into the busy tree */ static void kdbus_pool_add_busy_slice(struct kdbus_pool *pool, struct kdbus_pool_slice *slice) { struct rb_node **n; struct rb_node *pn = NULL; n = &pool->slices_busy.rb_node; while (*n) { struct kdbus_pool_slice *pslice; pn = *n; pslice = rb_entry(pn, struct kdbus_pool_slice, rb_node); if (slice->off < pslice->off) n = &pn->rb_left; else if (slice->off > pslice->off) n = &pn->rb_right; else BUG(); } rb_link_node(&slice->rb_node, pn, n); rb_insert_color(&slice->rb_node, &pool->slices_busy); } static struct kdbus_pool_slice *kdbus_pool_find_slice(struct kdbus_pool *pool, size_t off) { struct rb_node *n; n = pool->slices_busy.rb_node; while (n) { struct kdbus_pool_slice *s; s = rb_entry(n, struct kdbus_pool_slice, rb_node); if (off < s->off) n = n->rb_left; else if (off > s->off) n = n->rb_right; else return s; } return NULL; } /** * kdbus_pool_slice_alloc() - allocate memory from a pool * @pool: The receiver's pool * @size: The number of bytes to allocate * @accounted: Whether this slice should be accounted for * * The returned slice is used for kdbus_pool_slice_release() to * free the allocated memory. If either @kvec or @iovec is non-NULL, the data * will be copied from kernel or userspace memory into the new slice at * offset 0. * * Return: the allocated slice on success, ERR_PTR on failure. */ struct kdbus_pool_slice *kdbus_pool_slice_alloc(struct kdbus_pool *pool, size_t size, bool accounted) { size_t slice_size = KDBUS_ALIGN8(size); struct rb_node *n, *found = NULL; struct kdbus_pool_slice *s; int ret = 0; if (WARN_ON(!size)) return ERR_PTR(-EINVAL); /* search a free slice with the closest matching size */ mutex_lock(&pool->lock); n = pool->slices_free.rb_node; while (n) { s = rb_entry(n, struct kdbus_pool_slice, rb_node); if (slice_size < s->size) { found = n; n = n->rb_left; } else if (slice_size > s->size) { n = n->rb_right; } else { found = n; break; } } /* no slice with the minimum size found in the pool */ if (!found) { ret = -EXFULL; goto exit_unlock; } /* no exact match, use the closest one */ if (!n) { struct kdbus_pool_slice *s_new; s = rb_entry(found, struct kdbus_pool_slice, rb_node); /* split-off the remainder of the size to its own slice */ s_new = kdbus_pool_slice_new(pool, s->off + slice_size, s->size - slice_size); if (!s_new) { ret = -ENOMEM; goto exit_unlock; } list_add(&s_new->entry, &s->entry); kdbus_pool_add_free_slice(pool, s_new); /* adjust our size now that we split-off another slice */ s->size = slice_size; } /* move slice from free to the busy tree */ rb_erase(found, &pool->slices_free); kdbus_pool_add_busy_slice(pool, s); WARN_ON(s->ref_kernel || s->ref_user); s->ref_kernel = true; s->free = false; s->accounted = accounted; if (accounted) pool->accounted_size += s->size; mutex_unlock(&pool->lock); return s; exit_unlock: mutex_unlock(&pool->lock); return ERR_PTR(ret); } static void __kdbus_pool_slice_release(struct kdbus_pool_slice *slice) { struct kdbus_pool *pool = slice->pool; /* don't free the slice if either has a reference */ if (slice->ref_kernel || slice->ref_user) return; if (WARN_ON(slice->free)) return; rb_erase(&slice->rb_node, &pool->slices_busy); /* merge with the next free slice */ if (!list_is_last(&slice->entry, &pool->slices)) { struct kdbus_pool_slice *s; s = list_entry(slice->entry.next, struct kdbus_pool_slice, entry); if (s->free) { rb_erase(&s->rb_node, &pool->slices_free); list_del(&s->entry); slice->size += s->size; kfree(s); } } /* merge with previous free slice */ if (pool->slices.next != &slice->entry) { struct kdbus_pool_slice *s; s = list_entry(slice->entry.prev, struct kdbus_pool_slice, entry); if (s->free) { rb_erase(&s->rb_node, &pool->slices_free); list_del(&slice->entry); s->size += slice->size; kfree(slice); slice = s; } } slice->free = true; kdbus_pool_add_free_slice(pool, slice); } /** * kdbus_pool_slice_release() - drop kernel-reference on allocated slice * @slice: Slice allocated from the pool * * This releases the kernel-reference on the given slice. If the * kernel-reference and the user-reference on a slice are dropped, the slice is * returned to the pool. * * So far, we do not implement full ref-counting on slices. Each, kernel and * user-space can have exactly one reference to a slice. If both are dropped at * the same time, the slice is released. */ void kdbus_pool_slice_release(struct kdbus_pool_slice *slice) { struct kdbus_pool *pool; if (!slice) return; /* @slice may be freed, so keep local ptr to @pool */ pool = slice->pool; mutex_lock(&pool->lock); /* kernel must own a ref to @slice to drop it */ WARN_ON(!slice->ref_kernel); slice->ref_kernel = false; /* no longer kernel-owned, de-account slice */ if (slice->accounted && !WARN_ON(pool->accounted_size < slice->size)) pool->accounted_size -= slice->size; __kdbus_pool_slice_release(slice); mutex_unlock(&pool->lock); } /** * kdbus_pool_release_offset() - release a public offset * @pool: pool to operate on * @off: offset to release * * This should be called whenever user-space frees a slice given to them. It * verifies the slice is available and public, and then drops it. It ensures * correct locking and barriers against queues. * * Return: 0 on success, ENXIO if the offset is invalid or not public. */ int kdbus_pool_release_offset(struct kdbus_pool *pool, size_t off) { struct kdbus_pool_slice *slice; int ret = 0; /* 'pool->size' is used as dummy offset for empty slices */ if (off == pool->size) return 0; mutex_lock(&pool->lock); slice = kdbus_pool_find_slice(pool, off); if (slice && slice->ref_user) { slice->ref_user = false; __kdbus_pool_slice_release(slice); } else { ret = -ENXIO; } mutex_unlock(&pool->lock); return ret; } /** * kdbus_pool_publish_empty() - publish empty slice to user-space * @pool: pool to operate on * @off: output storage for offset, or NULL * @size: output storage for size, or NULL * * This is the same as kdbus_pool_slice_publish(), but uses a dummy slice with * size 0. The returned offset points to the end of the pool and is never * returned on real slices. */ void kdbus_pool_publish_empty(struct kdbus_pool *pool, u64 *off, u64 *size) { if (off) *off = pool->size; if (size) *size = 0; } /** * kdbus_pool_slice_publish() - publish slice to user-space * @slice: The slice * @out_offset: Output storage for offset, or NULL * @out_size: Output storage for size, or NULL * * This prepares a slice to be published to user-space. * * This call combines the following operations: * * the memory region is flushed so the user's memory view is consistent * * the slice is marked as referenced by user-space, so user-space has to * call KDBUS_CMD_FREE to release it * * the offset and size of the slice are written to the given output * arguments, if non-NULL */ void kdbus_pool_slice_publish(struct kdbus_pool_slice *slice, u64 *out_offset, u64 *out_size) { mutex_lock(&slice->pool->lock); /* kernel must own a ref to @slice to gain a user-space ref */ WARN_ON(!slice->ref_kernel); slice->ref_user = true; mutex_unlock(&slice->pool->lock); if (out_offset) *out_offset = slice->off; if (out_size) *out_size = slice->size; } /** * kdbus_pool_slice_offset() - Get a slice's offset inside the pool * @slice: Slice to return the offset of * * Return: The internal offset @slice inside the pool. */ off_t kdbus_pool_slice_offset(const struct kdbus_pool_slice *slice) { return slice->off; } /** * kdbus_pool_slice_size() - get size of a pool slice * @slice: slice to query * * Return: size of the given slice */ size_t kdbus_pool_slice_size(const struct kdbus_pool_slice *slice) { return slice->size; } /** * kdbus_pool_new() - create a new pool * @name: Name of the (deleted) file which shows up in * /proc, used for debugging * @size: Maximum size of the pool * * Return: a new kdbus_pool on success, ERR_PTR on failure. */ struct kdbus_pool *kdbus_pool_new(const char *name, size_t size) { struct kdbus_pool_slice *s; struct kdbus_pool *p; struct file *f; char *n = NULL; int ret; p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return ERR_PTR(-ENOMEM); if (name) { n = kasprintf(GFP_KERNEL, KBUILD_MODNAME "-conn:%s", name); if (!n) { ret = -ENOMEM; goto exit_free; } } f = shmem_file_setup(n ?: KBUILD_MODNAME "-conn", size, 0); kfree(n); if (IS_ERR(f)) { ret = PTR_ERR(f); goto exit_free; } ret = get_write_access(file_inode(f)); if (ret < 0) goto exit_put_shmem; /* allocate first slice spanning the entire pool */ s = kdbus_pool_slice_new(p, 0, size); if (!s) { ret = -ENOMEM; goto exit_put_write; } p->f = f; p->size = size; p->slices_free = RB_ROOT; p->slices_busy = RB_ROOT; mutex_init(&p->lock); INIT_LIST_HEAD(&p->slices); list_add(&s->entry, &p->slices); kdbus_pool_add_free_slice(p, s); return p; exit_put_write: put_write_access(file_inode(f)); exit_put_shmem: fput(f); exit_free: kfree(p); return ERR_PTR(ret); } /** * kdbus_pool_free() - destroy pool * @pool: The receiver's pool */ void kdbus_pool_free(struct kdbus_pool *pool) { struct kdbus_pool_slice *s, *tmp; if (!pool) return; list_for_each_entry_safe(s, tmp, &pool->slices, entry) { list_del(&s->entry); kfree(s); } put_write_access(file_inode(pool->f)); fput(pool->f); kfree(pool); } /** * kdbus_pool_accounted() - retrieve accounting information * @pool: pool to query * @size: output for overall pool size * @acc: output for currently accounted size * * This returns accounting information of the pool. Note that the data might * change after the function returns, as the pool lock is dropped. You need to * protect the data via other means, if you need reliable accounting. */ void kdbus_pool_accounted(struct kdbus_pool *pool, size_t *size, size_t *acc) { mutex_lock(&pool->lock); if (size) *size = pool->size; if (acc) *acc = pool->accounted_size; mutex_unlock(&pool->lock); } /** * kdbus_pool_slice_copy_iovec() - copy user memory to a slice * @slice: The slice to write to * @off: Offset in the slice to write to * @iov: iovec array, pointing to data to copy * @iov_len: Number of elements in @iov * @total_len: Total number of bytes described in members of @iov * * User memory referenced by @iov will be copied into @slice at offset @off. * * Return: the numbers of bytes copied, negative errno on failure. */ ssize_t kdbus_pool_slice_copy_iovec(const struct kdbus_pool_slice *slice, loff_t off, struct iovec *iov, size_t iov_len, size_t total_len) { struct iov_iter iter; ssize_t len; if (WARN_ON(off + total_len > slice->size)) return -EFAULT; off += slice->off; iov_iter_init(&iter, WRITE, iov, iov_len, total_len); len = vfs_iter_write(slice->pool->f, &iter, &off); return (len >= 0 && len != total_len) ? -EFAULT : len; } /** * kdbus_pool_slice_copy_kvec() - copy kernel memory to a slice * @slice: The slice to write to * @off: Offset in the slice to write to * @kvec: kvec array, pointing to data to copy * @kvec_len: Number of elements in @kvec * @total_len: Total number of bytes described in members of @kvec * * Kernel memory referenced by @kvec will be copied into @slice at offset @off. * * Return: the numbers of bytes copied, negative errno on failure. */ ssize_t kdbus_pool_slice_copy_kvec(const struct kdbus_pool_slice *slice, loff_t off, struct kvec *kvec, size_t kvec_len, size_t total_len) { struct iov_iter iter; mm_segment_t old_fs; ssize_t len; if (WARN_ON(off + total_len > slice->size)) return -EFAULT; off += slice->off; iov_iter_kvec(&iter, WRITE | ITER_KVEC, kvec, kvec_len, total_len); old_fs = get_fs(); set_fs(get_ds()); len = vfs_iter_write(slice->pool->f, &iter, &off); set_fs(old_fs); return (len >= 0 && len != total_len) ? -EFAULT : len; } /** * kdbus_pool_slice_copy() - copy data from one slice into another * @slice_dst: destination slice * @slice_src: source slice * * Return: 0 on success, negative error number on failure. */ int kdbus_pool_slice_copy(const struct kdbus_pool_slice *slice_dst, const struct kdbus_pool_slice *slice_src) { struct file *f_src = slice_src->pool->f; struct file *f_dst = slice_dst->pool->f; struct inode *i_dst = file_inode(f_dst); struct address_space *mapping_dst = f_dst->f_mapping; const struct address_space_operations *aops = mapping_dst->a_ops; unsigned long len = slice_src->size; loff_t off_src = slice_src->off; loff_t off_dst = slice_dst->off; mm_segment_t old_fs; int ret = 0; if (WARN_ON(slice_src->size != slice_dst->size) || WARN_ON(slice_src->free || slice_dst->free)) return -EINVAL; mutex_lock(&i_dst->i_mutex); old_fs = get_fs(); set_fs(get_ds()); while (len > 0) { unsigned long page_off; unsigned long copy_len; char __user *kaddr; struct page *page; ssize_t n_read; void *fsdata; long status; page_off = off_dst & (PAGE_CACHE_SIZE - 1); copy_len = min_t(unsigned long, PAGE_CACHE_SIZE - page_off, len); status = aops->write_begin(f_dst, mapping_dst, off_dst, copy_len, 0, &page, &fsdata); if (unlikely(status < 0)) { ret = status; break; } kaddr = (char __force __user *)kmap(page) + page_off; n_read = __vfs_read(f_src, kaddr, copy_len, &off_src); kunmap(page); mark_page_accessed(page); flush_dcache_page(page); if (unlikely(n_read != copy_len)) { ret = -EFAULT; break; } status = aops->write_end(f_dst, mapping_dst, off_dst, copy_len, copy_len, page, fsdata); if (unlikely(status != copy_len)) { ret = -EFAULT; break; } off_dst += copy_len; len -= copy_len; } set_fs(old_fs); mutex_unlock(&i_dst->i_mutex); return ret; } /** * kdbus_pool_mmap() - map the pool into the process * @pool: The receiver's pool * @vma: passed by mmap() syscall * * Return: the result of the mmap() call, negative errno on failure. */ int kdbus_pool_mmap(const struct kdbus_pool *pool, struct vm_area_struct *vma) { /* deny write access to the pool */ if (vma->vm_flags & VM_WRITE) return -EPERM; vma->vm_flags &= ~VM_MAYWRITE; /* do not allow to map more than the size of the file */ if ((vma->vm_end - vma->vm_start) > pool->size) return -EFAULT; /* replace the connection file with our shmem file */ if (vma->vm_file) fput(vma->vm_file); vma->vm_file = get_file(pool->f); return pool->f->f_op->mmap(pool->f, vma); }