diff options
Diffstat (limited to 'drivers/xen/xenbus')
-rw-r--r-- | drivers/xen/xenbus/Makefile | 14 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_client.c | 906 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_comms.c | 243 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_comms.h | 52 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_dev_backend.c | 142 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_dev_frontend.c | 634 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe.c | 839 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe.h | 88 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe_backend.c | 276 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe_frontend.c | 512 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_xs.c | 981 |
11 files changed, 4687 insertions, 0 deletions
diff --git a/drivers/xen/xenbus/Makefile b/drivers/xen/xenbus/Makefile new file mode 100644 index 000000000..31e2e9050 --- /dev/null +++ b/drivers/xen/xenbus/Makefile @@ -0,0 +1,14 @@ +obj-y += xenbus.o +obj-y += xenbus_dev_frontend.o + +xenbus-objs = +xenbus-objs += xenbus_client.o +xenbus-objs += xenbus_comms.o +xenbus-objs += xenbus_xs.o +xenbus-objs += xenbus_probe.o + +xenbus-be-objs-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o +xenbus-objs += $(xenbus-be-objs-y) + +obj-$(CONFIG_XEN_BACKEND) += xenbus_dev_backend.o +obj-$(CONFIG_XEN_XENBUS_FRONTEND) += xenbus_probe_frontend.o diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c new file mode 100644 index 000000000..96b2011d2 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_client.c @@ -0,0 +1,906 @@ +/****************************************************************************** + * Client-facing interface for the Xenbus driver. In other words, the + * interface between the Xenbus and the device-specific code, be it the + * frontend or the backend of that driver. + * + * Copyright (C) 2005 XenSource Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/spinlock.h> +#include <linux/vmalloc.h> +#include <linux/export.h> +#include <asm/xen/hypervisor.h> +#include <asm/xen/page.h> +#include <xen/interface/xen.h> +#include <xen/interface/event_channel.h> +#include <xen/balloon.h> +#include <xen/events.h> +#include <xen/grant_table.h> +#include <xen/xenbus.h> +#include <xen/xen.h> +#include <xen/features.h> + +#include "xenbus_probe.h" + +struct xenbus_map_node { + struct list_head next; + union { + struct { + struct vm_struct *area; + } pv; + struct { + struct page *pages[XENBUS_MAX_RING_PAGES]; + void *addr; + } hvm; + }; + grant_handle_t handles[XENBUS_MAX_RING_PAGES]; + unsigned int nr_handles; +}; + +static DEFINE_SPINLOCK(xenbus_valloc_lock); +static LIST_HEAD(xenbus_valloc_pages); + +struct xenbus_ring_ops { + int (*map)(struct xenbus_device *dev, + grant_ref_t *gnt_refs, unsigned int nr_grefs, + void **vaddr); + int (*unmap)(struct xenbus_device *dev, void *vaddr); +}; + +static const struct xenbus_ring_ops *ring_ops __read_mostly; + +const char *xenbus_strstate(enum xenbus_state state) +{ + static const char *const name[] = { + [ XenbusStateUnknown ] = "Unknown", + [ XenbusStateInitialising ] = "Initialising", + [ XenbusStateInitWait ] = "InitWait", + [ XenbusStateInitialised ] = "Initialised", + [ XenbusStateConnected ] = "Connected", + [ XenbusStateClosing ] = "Closing", + [ XenbusStateClosed ] = "Closed", + [XenbusStateReconfiguring] = "Reconfiguring", + [XenbusStateReconfigured] = "Reconfigured", + }; + return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID"; +} +EXPORT_SYMBOL_GPL(xenbus_strstate); + +/** + * xenbus_watch_path - register a watch + * @dev: xenbus device + * @path: path to watch + * @watch: watch to register + * @callback: callback to register + * + * Register a @watch on the given path, using the given xenbus_watch structure + * for storage, and the given @callback function as the callback. Return 0 on + * success, or -errno on error. On success, the given @path will be saved as + * @watch->node, and remains the caller's to free. On error, @watch->node will + * be NULL, the device will switch to %XenbusStateClosing, and the error will + * be saved in the store. + */ +int xenbus_watch_path(struct xenbus_device *dev, const char *path, + struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, + const char **, unsigned int)) +{ + int err; + + watch->node = path; + watch->callback = callback; + + err = register_xenbus_watch(watch); + + if (err) { + watch->node = NULL; + watch->callback = NULL; + xenbus_dev_fatal(dev, err, "adding watch on %s", path); + } + + return err; +} +EXPORT_SYMBOL_GPL(xenbus_watch_path); + + +/** + * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path + * @dev: xenbus device + * @watch: watch to register + * @callback: callback to register + * @pathfmt: format of path to watch + * + * Register a watch on the given @path, using the given xenbus_watch + * structure for storage, and the given @callback function as the callback. + * Return 0 on success, or -errno on error. On success, the watched path + * (@path/@path2) will be saved as @watch->node, and becomes the caller's to + * kfree(). On error, watch->node will be NULL, so the caller has nothing to + * free, the device will switch to %XenbusStateClosing, and the error will be + * saved in the store. + */ +int xenbus_watch_pathfmt(struct xenbus_device *dev, + struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, + const char **, unsigned int), + const char *pathfmt, ...) +{ + int err; + va_list ap; + char *path; + + va_start(ap, pathfmt); + path = kvasprintf(GFP_NOIO | __GFP_HIGH, pathfmt, ap); + va_end(ap); + + if (!path) { + xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch"); + return -ENOMEM; + } + err = xenbus_watch_path(dev, path, watch, callback); + + if (err) + kfree(path); + return err; +} +EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt); + +static void xenbus_switch_fatal(struct xenbus_device *, int, int, + const char *, ...); + +static int +__xenbus_switch_state(struct xenbus_device *dev, + enum xenbus_state state, int depth) +{ + /* We check whether the state is currently set to the given value, and + if not, then the state is set. We don't want to unconditionally + write the given state, because we don't want to fire watches + unnecessarily. Furthermore, if the node has gone, we don't write + to it, as the device will be tearing down, and we don't want to + resurrect that directory. + + Note that, because of this cached value of our state, this + function will not take a caller's Xenstore transaction + (something it was trying to in the past) because dev->state + would not get reset if the transaction was aborted. + */ + + struct xenbus_transaction xbt; + int current_state; + int err, abort; + + if (state == dev->state) + return 0; + +again: + abort = 1; + + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_switch_fatal(dev, depth, err, "starting transaction"); + return 0; + } + + err = xenbus_scanf(xbt, dev->nodename, "state", "%d", ¤t_state); + if (err != 1) + goto abort; + + err = xenbus_printf(xbt, dev->nodename, "state", "%d", state); + if (err) { + xenbus_switch_fatal(dev, depth, err, "writing new state"); + goto abort; + } + + abort = 0; +abort: + err = xenbus_transaction_end(xbt, abort); + if (err) { + if (err == -EAGAIN && !abort) + goto again; + xenbus_switch_fatal(dev, depth, err, "ending transaction"); + } else + dev->state = state; + + return 0; +} + +/** + * xenbus_switch_state + * @dev: xenbus device + * @state: new state + * + * Advertise in the store a change of the given driver to the given new_state. + * Return 0 on success, or -errno on error. On error, the device will switch + * to XenbusStateClosing, and the error will be saved in the store. + */ +int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state) +{ + return __xenbus_switch_state(dev, state, 0); +} + +EXPORT_SYMBOL_GPL(xenbus_switch_state); + +int xenbus_frontend_closed(struct xenbus_device *dev) +{ + xenbus_switch_state(dev, XenbusStateClosed); + complete(&dev->down); + return 0; +} +EXPORT_SYMBOL_GPL(xenbus_frontend_closed); + +/** + * Return the path to the error node for the given device, or NULL on failure. + * If the value returned is non-NULL, then it is the caller's to kfree. + */ +static char *error_path(struct xenbus_device *dev) +{ + return kasprintf(GFP_KERNEL, "error/%s", dev->nodename); +} + + +static void xenbus_va_dev_error(struct xenbus_device *dev, int err, + const char *fmt, va_list ap) +{ + unsigned int len; + char *printf_buffer = NULL; + char *path_buffer = NULL; + +#define PRINTF_BUFFER_SIZE 4096 + printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL); + if (printf_buffer == NULL) + goto fail; + + len = sprintf(printf_buffer, "%i ", -err); + vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap); + + dev_err(&dev->dev, "%s\n", printf_buffer); + + path_buffer = error_path(dev); + + if (path_buffer == NULL) { + dev_err(&dev->dev, "failed to write error node for %s (%s)\n", + dev->nodename, printf_buffer); + goto fail; + } + + if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) { + dev_err(&dev->dev, "failed to write error node for %s (%s)\n", + dev->nodename, printf_buffer); + goto fail; + } + +fail: + kfree(printf_buffer); + kfree(path_buffer); +} + + +/** + * xenbus_dev_error + * @dev: xenbus device + * @err: error to report + * @fmt: error message format + * + * Report the given negative errno into the store, along with the given + * formatted message. + */ +void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + xenbus_va_dev_error(dev, err, fmt, ap); + va_end(ap); +} +EXPORT_SYMBOL_GPL(xenbus_dev_error); + +/** + * xenbus_dev_fatal + * @dev: xenbus device + * @err: error to report + * @fmt: error message format + * + * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by + * xenbus_switch_state(dev, XenbusStateClosing) to schedule an orderly + * closedown of this driver and its peer. + */ + +void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + xenbus_va_dev_error(dev, err, fmt, ap); + va_end(ap); + + xenbus_switch_state(dev, XenbusStateClosing); +} +EXPORT_SYMBOL_GPL(xenbus_dev_fatal); + +/** + * Equivalent to xenbus_dev_fatal(dev, err, fmt, args), but helps + * avoiding recursion within xenbus_switch_state. + */ +static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err, + const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + xenbus_va_dev_error(dev, err, fmt, ap); + va_end(ap); + + if (!depth) + __xenbus_switch_state(dev, XenbusStateClosing, 1); +} + +/** + * xenbus_grant_ring + * @dev: xenbus device + * @vaddr: starting virtual address of the ring + * @nr_pages: number of pages to be granted + * @grefs: grant reference array to be filled in + * + * Grant access to the given @vaddr to the peer of the given device. + * Then fill in @grefs with grant references. Return 0 on success, or + * -errno on error. On error, the device will switch to + * XenbusStateClosing, and the error will be saved in the store. + */ +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, + unsigned int nr_pages, grant_ref_t *grefs) +{ + int err; + int i, j; + + for (i = 0; i < nr_pages; i++) { + unsigned long addr = (unsigned long)vaddr + + (PAGE_SIZE * i); + err = gnttab_grant_foreign_access(dev->otherend_id, + virt_to_mfn(addr), 0); + if (err < 0) { + xenbus_dev_fatal(dev, err, + "granting access to ring page"); + goto fail; + } + grefs[i] = err; + } + + return 0; + +fail: + for (j = 0; j < i; j++) + gnttab_end_foreign_access_ref(grefs[j], 0); + return err; +} +EXPORT_SYMBOL_GPL(xenbus_grant_ring); + + +/** + * Allocate an event channel for the given xenbus_device, assigning the newly + * created local port to *port. Return 0 on success, or -errno on error. On + * error, the device will switch to XenbusStateClosing, and the error will be + * saved in the store. + */ +int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port) +{ + struct evtchn_alloc_unbound alloc_unbound; + int err; + + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = dev->otherend_id; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (err) + xenbus_dev_fatal(dev, err, "allocating event channel"); + else + *port = alloc_unbound.port; + + return err; +} +EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn); + + +/** + * Free an existing event channel. Returns 0 on success or -errno on error. + */ +int xenbus_free_evtchn(struct xenbus_device *dev, int port) +{ + struct evtchn_close close; + int err; + + close.port = port; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); + if (err) + xenbus_dev_error(dev, err, "freeing event channel %d", port); + + return err; +} +EXPORT_SYMBOL_GPL(xenbus_free_evtchn); + + +/** + * xenbus_map_ring_valloc + * @dev: xenbus device + * @gnt_refs: grant reference array + * @nr_grefs: number of grant references + * @vaddr: pointer to address to be filled out by mapping + * + * Map @nr_grefs pages of memory into this domain from another + * domain's grant table. xenbus_map_ring_valloc allocates @nr_grefs + * pages of virtual address space, maps the pages to that address, and + * sets *vaddr to that address. Returns 0 on success, and GNTST_* + * (see xen/include/interface/grant_table.h) or -ENOMEM / -EINVAL on + * error. If an error is returned, device will switch to + * XenbusStateClosing and the error message will be saved in XenStore. + */ +int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t *gnt_refs, + unsigned int nr_grefs, void **vaddr) +{ + return ring_ops->map(dev, gnt_refs, nr_grefs, vaddr); +} +EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); + +/* N.B. sizeof(phys_addr_t) doesn't always equal to sizeof(unsigned + * long), e.g. 32-on-64. Caller is responsible for preparing the + * right array to feed into this function */ +static int __xenbus_map_ring(struct xenbus_device *dev, + grant_ref_t *gnt_refs, + unsigned int nr_grefs, + grant_handle_t *handles, + phys_addr_t *addrs, + unsigned int flags, + bool *leaked) +{ + struct gnttab_map_grant_ref map[XENBUS_MAX_RING_PAGES]; + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES]; + int i, j; + int err = GNTST_okay; + + if (nr_grefs > XENBUS_MAX_RING_PAGES) + return -EINVAL; + + for (i = 0; i < nr_grefs; i++) { + memset(&map[i], 0, sizeof(map[i])); + gnttab_set_map_op(&map[i], addrs[i], flags, gnt_refs[i], + dev->otherend_id); + handles[i] = INVALID_GRANT_HANDLE; + } + + gnttab_batch_map(map, i); + + for (i = 0; i < nr_grefs; i++) { + if (map[i].status != GNTST_okay) { + err = map[i].status; + xenbus_dev_fatal(dev, map[i].status, + "mapping in shared page %d from domain %d", + gnt_refs[i], dev->otherend_id); + goto fail; + } else + handles[i] = map[i].handle; + } + + return GNTST_okay; + + fail: + for (i = j = 0; i < nr_grefs; i++) { + if (handles[i] != INVALID_GRANT_HANDLE) { + memset(&unmap[j], 0, sizeof(unmap[j])); + gnttab_set_unmap_op(&unmap[j], (phys_addr_t)addrs[i], + GNTMAP_host_map, handles[i]); + j++; + } + } + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, j)) + BUG(); + + *leaked = false; + for (i = 0; i < j; i++) { + if (unmap[i].status != GNTST_okay) { + *leaked = true; + break; + } + } + + return err; +} + +static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, + grant_ref_t *gnt_refs, + unsigned int nr_grefs, + void **vaddr) +{ + struct xenbus_map_node *node; + struct vm_struct *area; + pte_t *ptes[XENBUS_MAX_RING_PAGES]; + phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES]; + int err = GNTST_okay; + int i; + bool leaked; + + *vaddr = NULL; + + if (nr_grefs > XENBUS_MAX_RING_PAGES) + return -EINVAL; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + area = alloc_vm_area(PAGE_SIZE * nr_grefs, ptes); + if (!area) { + kfree(node); + return -ENOMEM; + } + + for (i = 0; i < nr_grefs; i++) + phys_addrs[i] = arbitrary_virt_to_machine(ptes[i]).maddr; + + err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles, + phys_addrs, + GNTMAP_host_map | GNTMAP_contains_pte, + &leaked); + if (err) + goto failed; + + node->nr_handles = nr_grefs; + node->pv.area = area; + + spin_lock(&xenbus_valloc_lock); + list_add(&node->next, &xenbus_valloc_pages); + spin_unlock(&xenbus_valloc_lock); + + *vaddr = area->addr; + return 0; + +failed: + if (!leaked) + free_vm_area(area); + else + pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs); + + kfree(node); + return err; +} + +static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, + grant_ref_t *gnt_ref, + unsigned int nr_grefs, + void **vaddr) +{ + struct xenbus_map_node *node; + int i; + int err; + void *addr; + bool leaked = false; + /* Why do we need two arrays? See comment of __xenbus_map_ring */ + phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES]; + unsigned long addrs[XENBUS_MAX_RING_PAGES]; + + if (nr_grefs > XENBUS_MAX_RING_PAGES) + return -EINVAL; + + *vaddr = NULL; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + err = alloc_xenballooned_pages(nr_grefs, node->hvm.pages, + false /* lowmem */); + if (err) + goto out_err; + + for (i = 0; i < nr_grefs; i++) { + unsigned long pfn = page_to_pfn(node->hvm.pages[i]); + phys_addrs[i] = (unsigned long)pfn_to_kaddr(pfn); + addrs[i] = (unsigned long)pfn_to_kaddr(pfn); + } + + err = __xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handles, + phys_addrs, GNTMAP_host_map, &leaked); + node->nr_handles = nr_grefs; + + if (err) + goto out_free_ballooned_pages; + + addr = vmap(node->hvm.pages, nr_grefs, VM_MAP | VM_IOREMAP, + PAGE_KERNEL); + if (!addr) { + err = -ENOMEM; + goto out_xenbus_unmap_ring; + } + + node->hvm.addr = addr; + + spin_lock(&xenbus_valloc_lock); + list_add(&node->next, &xenbus_valloc_pages); + spin_unlock(&xenbus_valloc_lock); + + *vaddr = addr; + return 0; + + out_xenbus_unmap_ring: + if (!leaked) + xenbus_unmap_ring(dev, node->handles, node->nr_handles, + addrs); + else + pr_alert("leaking %p size %u page(s)", + addr, nr_grefs); + out_free_ballooned_pages: + if (!leaked) + free_xenballooned_pages(nr_grefs, node->hvm.pages); + out_err: + kfree(node); + return err; +} + + +/** + * xenbus_map_ring + * @dev: xenbus device + * @gnt_refs: grant reference array + * @nr_grefs: number of grant reference + * @handles: pointer to grant handle to be filled + * @vaddrs: addresses to be mapped to + * @leaked: fail to clean up a failed map, caller should not free vaddr + * + * Map pages of memory into this domain from another domain's grant table. + * xenbus_map_ring does not allocate the virtual address space (you must do + * this yourself!). It only maps in the pages to the specified address. + * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) + * or -ENOMEM / -EINVAL on error. If an error is returned, device will switch to + * XenbusStateClosing and the first error message will be saved in XenStore. + * Further more if we fail to map the ring, caller should check @leaked. + * If @leaked is not zero it means xenbus_map_ring fails to clean up, caller + * should not free the address space of @vaddr. + */ +int xenbus_map_ring(struct xenbus_device *dev, grant_ref_t *gnt_refs, + unsigned int nr_grefs, grant_handle_t *handles, + unsigned long *vaddrs, bool *leaked) +{ + phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES]; + int i; + + if (nr_grefs > XENBUS_MAX_RING_PAGES) + return -EINVAL; + + for (i = 0; i < nr_grefs; i++) + phys_addrs[i] = (unsigned long)vaddrs[i]; + + return __xenbus_map_ring(dev, gnt_refs, nr_grefs, handles, + phys_addrs, GNTMAP_host_map, leaked); +} +EXPORT_SYMBOL_GPL(xenbus_map_ring); + + +/** + * xenbus_unmap_ring_vfree + * @dev: xenbus device + * @vaddr: addr to unmap + * + * Based on Rusty Russell's skeleton driver's unmap_page. + * Unmap a page of memory in this domain that was imported from another domain. + * Use xenbus_unmap_ring_vfree if you mapped in your memory with + * xenbus_map_ring_valloc (it will free the virtual address space). + * Returns 0 on success and returns GNTST_* on error + * (see xen/include/interface/grant_table.h). + */ +int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) +{ + return ring_ops->unmap(dev, vaddr); +} +EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); + +static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) +{ + struct xenbus_map_node *node; + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES]; + unsigned int level; + int i; + bool leaked = false; + int err; + + spin_lock(&xenbus_valloc_lock); + list_for_each_entry(node, &xenbus_valloc_pages, next) { + if (node->pv.area->addr == vaddr) { + list_del(&node->next); + goto found; + } + } + node = NULL; + found: + spin_unlock(&xenbus_valloc_lock); + + if (!node) { + xenbus_dev_error(dev, -ENOENT, + "can't find mapped virtual address %p", vaddr); + return GNTST_bad_virt_addr; + } + + for (i = 0; i < node->nr_handles; i++) { + unsigned long addr; + + memset(&unmap[i], 0, sizeof(unmap[i])); + addr = (unsigned long)vaddr + (PAGE_SIZE * i); + unmap[i].host_addr = arbitrary_virt_to_machine( + lookup_address(addr, &level)).maddr; + unmap[i].dev_bus_addr = 0; + unmap[i].handle = node->handles[i]; + } + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i)) + BUG(); + + err = GNTST_okay; + leaked = false; + for (i = 0; i < node->nr_handles; i++) { + if (unmap[i].status != GNTST_okay) { + leaked = true; + xenbus_dev_error(dev, unmap[i].status, + "unmapping page at handle %d error %d", + node->handles[i], unmap[i].status); + err = unmap[i].status; + break; + } + } + + if (!leaked) + free_vm_area(node->pv.area); + else + pr_alert("leaking VM area %p size %u page(s)", + node->pv.area, node->nr_handles); + + kfree(node); + return err; +} + +static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) +{ + int rv; + struct xenbus_map_node *node; + void *addr; + unsigned long addrs[XENBUS_MAX_RING_PAGES]; + int i; + + spin_lock(&xenbus_valloc_lock); + list_for_each_entry(node, &xenbus_valloc_pages, next) { + addr = node->hvm.addr; + if (addr == vaddr) { + list_del(&node->next); + goto found; + } + } + node = addr = NULL; + found: + spin_unlock(&xenbus_valloc_lock); + + if (!node) { + xenbus_dev_error(dev, -ENOENT, + "can't find mapped virtual address %p", vaddr); + return GNTST_bad_virt_addr; + } + + for (i = 0; i < node->nr_handles; i++) + addrs[i] = (unsigned long)pfn_to_kaddr(page_to_pfn(node->hvm.pages[i])); + + rv = xenbus_unmap_ring(dev, node->handles, node->nr_handles, + addrs); + if (!rv) + vunmap(vaddr); + else + WARN(1, "Leaking %p, size %u page(s)\n", vaddr, + node->nr_handles); + + kfree(node); + return rv; +} + +/** + * xenbus_unmap_ring + * @dev: xenbus device + * @handles: grant handle array + * @nr_handles: number of handles in the array + * @vaddrs: addresses to unmap + * + * Unmap memory in this domain that was imported from another domain. + * Returns 0 on success and returns GNTST_* on error + * (see xen/include/interface/grant_table.h). + */ +int xenbus_unmap_ring(struct xenbus_device *dev, + grant_handle_t *handles, unsigned int nr_handles, + unsigned long *vaddrs) +{ + struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES]; + int i; + int err; + + if (nr_handles > XENBUS_MAX_RING_PAGES) + return -EINVAL; + + for (i = 0; i < nr_handles; i++) + gnttab_set_unmap_op(&unmap[i], vaddrs[i], + GNTMAP_host_map, handles[i]); + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i)) + BUG(); + + err = GNTST_okay; + for (i = 0; i < nr_handles; i++) { + if (unmap[i].status != GNTST_okay) { + xenbus_dev_error(dev, unmap[i].status, + "unmapping page at handle %d error %d", + handles[i], unmap[i].status); + err = unmap[i].status; + break; + } + } + + return err; +} +EXPORT_SYMBOL_GPL(xenbus_unmap_ring); + + +/** + * xenbus_read_driver_state + * @path: path for driver + * + * Return the state of the driver rooted at the given store path, or + * XenbusStateUnknown if no state can be read. + */ +enum xenbus_state xenbus_read_driver_state(const char *path) +{ + enum xenbus_state result; + int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL); + if (err) + result = XenbusStateUnknown; + + return result; +} +EXPORT_SYMBOL_GPL(xenbus_read_driver_state); + +static const struct xenbus_ring_ops ring_ops_pv = { + .map = xenbus_map_ring_valloc_pv, + .unmap = xenbus_unmap_ring_vfree_pv, +}; + +static const struct xenbus_ring_ops ring_ops_hvm = { + .map = xenbus_map_ring_valloc_hvm, + .unmap = xenbus_unmap_ring_vfree_hvm, +}; + +void __init xenbus_ring_ops_init(void) +{ + if (!xen_feature(XENFEAT_auto_translated_physmap)) + ring_ops = &ring_ops_pv; + else + ring_ops = &ring_ops_hvm; +} diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c new file mode 100644 index 000000000..fdb0f339d --- /dev/null +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -0,0 +1,243 @@ +/****************************************************************************** + * xenbus_comms.c + * + * Low level code to talks to Xen Store: ringbuffer and event channel. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/wait.h> +#include <linux/interrupt.h> +#include <linux/sched.h> +#include <linux/err.h> +#include <xen/xenbus.h> +#include <asm/xen/hypervisor.h> +#include <xen/events.h> +#include <xen/page.h> +#include "xenbus_comms.h" + +static int xenbus_irq; + +static DECLARE_WORK(probe_work, xenbus_probe); + +static DECLARE_WAIT_QUEUE_HEAD(xb_waitq); + +static irqreturn_t wake_waiting(int irq, void *unused) +{ + if (unlikely(xenstored_ready == 0)) { + xenstored_ready = 1; + schedule_work(&probe_work); + } + + wake_up(&xb_waitq); + return IRQ_HANDLED; +} + +static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod) +{ + return ((prod - cons) <= XENSTORE_RING_SIZE); +} + +static void *get_output_chunk(XENSTORE_RING_IDX cons, + XENSTORE_RING_IDX prod, + char *buf, uint32_t *len) +{ + *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod); + if ((XENSTORE_RING_SIZE - (prod - cons)) < *len) + *len = XENSTORE_RING_SIZE - (prod - cons); + return buf + MASK_XENSTORE_IDX(prod); +} + +static const void *get_input_chunk(XENSTORE_RING_IDX cons, + XENSTORE_RING_IDX prod, + const char *buf, uint32_t *len) +{ + *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons); + if ((prod - cons) < *len) + *len = prod - cons; + return buf + MASK_XENSTORE_IDX(cons); +} + +/** + * xb_write - low level write + * @data: buffer to send + * @len: length of buffer + * + * Returns 0 on success, error otherwise. + */ +int xb_write(const void *data, unsigned len) +{ + struct xenstore_domain_interface *intf = xen_store_interface; + XENSTORE_RING_IDX cons, prod; + int rc; + + while (len != 0) { + void *dst; + unsigned int avail; + + rc = wait_event_interruptible( + xb_waitq, + (intf->req_prod - intf->req_cons) != + XENSTORE_RING_SIZE); + if (rc < 0) + return rc; + + /* Read indexes, then verify. */ + cons = intf->req_cons; + prod = intf->req_prod; + if (!check_indexes(cons, prod)) { + intf->req_cons = intf->req_prod = 0; + return -EIO; + } + + dst = get_output_chunk(cons, prod, intf->req, &avail); + if (avail == 0) + continue; + if (avail > len) + avail = len; + + /* Must write data /after/ reading the consumer index. */ + mb(); + + memcpy(dst, data, avail); + data += avail; + len -= avail; + + /* Other side must not see new producer until data is there. */ + wmb(); + intf->req_prod += avail; + + /* Implies mb(): other side will see the updated producer. */ + notify_remote_via_evtchn(xen_store_evtchn); + } + + return 0; +} + +int xb_data_to_read(void) +{ + struct xenstore_domain_interface *intf = xen_store_interface; + return (intf->rsp_cons != intf->rsp_prod); +} + +int xb_wait_for_data_to_read(void) +{ + return wait_event_interruptible(xb_waitq, xb_data_to_read()); +} + +int xb_read(void *data, unsigned len) +{ + struct xenstore_domain_interface *intf = xen_store_interface; + XENSTORE_RING_IDX cons, prod; + int rc; + + while (len != 0) { + unsigned int avail; + const char *src; + + rc = xb_wait_for_data_to_read(); + if (rc < 0) + return rc; + + /* Read indexes, then verify. */ + cons = intf->rsp_cons; + prod = intf->rsp_prod; + if (!check_indexes(cons, prod)) { + intf->rsp_cons = intf->rsp_prod = 0; + return -EIO; + } + + src = get_input_chunk(cons, prod, intf->rsp, &avail); + if (avail == 0) + continue; + if (avail > len) + avail = len; + + /* Must read data /after/ reading the producer index. */ + rmb(); + + memcpy(data, src, avail); + data += avail; + len -= avail; + + /* Other side must not see free space until we've copied out */ + mb(); + intf->rsp_cons += avail; + + pr_debug("Finished read of %i bytes (%i to go)\n", avail, len); + + /* Implies mb(): other side will see the updated consumer. */ + notify_remote_via_evtchn(xen_store_evtchn); + } + + return 0; +} + +/** + * xb_init_comms - Set up interrupt handler off store event channel. + */ +int xb_init_comms(void) +{ + struct xenstore_domain_interface *intf = xen_store_interface; + + if (intf->req_prod != intf->req_cons) + pr_err("request ring is not quiescent (%08x:%08x)!\n", + intf->req_cons, intf->req_prod); + + if (intf->rsp_prod != intf->rsp_cons) { + pr_warn("response ring is not quiescent (%08x:%08x): fixing up\n", + intf->rsp_cons, intf->rsp_prod); + /* breaks kdump */ + if (!reset_devices) + intf->rsp_cons = intf->rsp_prod; + } + + if (xenbus_irq) { + /* Already have an irq; assume we're resuming */ + rebind_evtchn_irq(xen_store_evtchn, xenbus_irq); + } else { + int err; + err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting, + 0, "xenbus", &xb_waitq); + if (err < 0) { + pr_err("request irq failed %i\n", err); + return err; + } + + xenbus_irq = err; + } + + return 0; +} + +void xb_deinit_comms(void) +{ + unbind_from_irqhandler(xenbus_irq, &xb_waitq); + xenbus_irq = 0; +} diff --git a/drivers/xen/xenbus/xenbus_comms.h b/drivers/xen/xenbus/xenbus_comms.h new file mode 100644 index 000000000..e74f9c1fb --- /dev/null +++ b/drivers/xen/xenbus/xenbus_comms.h @@ -0,0 +1,52 @@ +/* + * Private include for xenbus communications. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _XENBUS_COMMS_H +#define _XENBUS_COMMS_H + +#include <linux/fs.h> + +int xs_init(void); +int xb_init_comms(void); +void xb_deinit_comms(void); + +/* Low level routines. */ +int xb_write(const void *data, unsigned len); +int xb_read(void *data, unsigned len); +int xb_data_to_read(void); +int xb_wait_for_data_to_read(void); +int xs_input_avail(void); +extern struct xenstore_domain_interface *xen_store_interface; +extern int xen_store_evtchn; +extern enum xenstore_init xen_store_domain_type; + +extern const struct file_operations xen_xenbus_fops; + +#endif /* _XENBUS_COMMS_H */ diff --git a/drivers/xen/xenbus/xenbus_dev_backend.c b/drivers/xen/xenbus/xenbus_dev_backend.c new file mode 100644 index 000000000..b17707ee0 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_dev_backend.c @@ -0,0 +1,142 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/capability.h> + +#include <xen/xen.h> +#include <xen/page.h> +#include <xen/xenbus.h> +#include <xen/xenbus_dev.h> +#include <xen/grant_table.h> +#include <xen/events.h> +#include <asm/xen/hypervisor.h> + +#include "xenbus_comms.h" + +MODULE_LICENSE("GPL"); + +static int xenbus_backend_open(struct inode *inode, struct file *filp) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return nonseekable_open(inode, filp); +} + +static long xenbus_alloc(domid_t domid) +{ + struct evtchn_alloc_unbound arg; + int err = -EEXIST; + + xs_suspend(); + + /* If xenstored_ready is nonzero, that means we have already talked to + * xenstore and set up watches. These watches will be restored by + * xs_resume, but that requires communication over the port established + * below that is not visible to anyone until the ioctl returns. + * + * This can be resolved by splitting the ioctl into two parts + * (postponing the resume until xenstored is active) but this is + * unnecessarily complex for the intended use where xenstored is only + * started once - so return -EEXIST if it's already running. + */ + if (xenstored_ready) + goto out_err; + + gnttab_grant_foreign_access_ref(GNTTAB_RESERVED_XENSTORE, domid, + virt_to_mfn(xen_store_interface), 0 /* writable */); + + arg.dom = DOMID_SELF; + arg.remote_dom = domid; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &arg); + if (err) + goto out_err; + + if (xen_store_evtchn > 0) + xb_deinit_comms(); + + xen_store_evtchn = arg.port; + + xs_resume(); + + return arg.port; + + out_err: + xs_suspend_cancel(); + return err; +} + +static long xenbus_backend_ioctl(struct file *file, unsigned int cmd, + unsigned long data) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + switch (cmd) { + case IOCTL_XENBUS_BACKEND_EVTCHN: + if (xen_store_evtchn > 0) + return xen_store_evtchn; + return -ENODEV; + case IOCTL_XENBUS_BACKEND_SETUP: + return xenbus_alloc(data); + default: + return -ENOTTY; + } +} + +static int xenbus_backend_mmap(struct file *file, struct vm_area_struct *vma) +{ + size_t size = vma->vm_end - vma->vm_start; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0)) + return -EINVAL; + + if (remap_pfn_range(vma, vma->vm_start, + virt_to_pfn(xen_store_interface), + size, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +static const struct file_operations xenbus_backend_fops = { + .open = xenbus_backend_open, + .mmap = xenbus_backend_mmap, + .unlocked_ioctl = xenbus_backend_ioctl, +}; + +static struct miscdevice xenbus_backend_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "xen/xenbus_backend", + .fops = &xenbus_backend_fops, +}; + +static int __init xenbus_backend_init(void) +{ + int err; + + if (!xen_initial_domain()) + return -ENODEV; + + err = misc_register(&xenbus_backend_dev); + if (err) + pr_err("Could not register xenbus backend device\n"); + return err; +} + +static void __exit xenbus_backend_exit(void) +{ + misc_deregister(&xenbus_backend_dev); +} + +module_init(xenbus_backend_init); +module_exit(xenbus_backend_exit); diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c new file mode 100644 index 000000000..9433e4651 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -0,0 +1,634 @@ +/* + * Driver giving user-space access to the kernel's xenbus connection + * to xenstore. + * + * Copyright (c) 2005, Christian Limpach + * Copyright (c) 2005, Rusty Russell, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Changes: + * 2008-10-07 Alex Zeffertt Replaced /proc/xen/xenbus with xenfs filesystem + * and /proc/xen compatibility mount point. + * Turned xenfs into a loadable module. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/uio.h> +#include <linux/notifier.h> +#include <linux/wait.h> +#include <linux/fs.h> +#include <linux/poll.h> +#include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/spinlock.h> +#include <linux/mount.h> +#include <linux/pagemap.h> +#include <linux/uaccess.h> +#include <linux/init.h> +#include <linux/namei.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/miscdevice.h> +#include <linux/module.h> + +#include "xenbus_comms.h" + +#include <xen/xenbus.h> +#include <xen/xen.h> +#include <asm/xen/hypervisor.h> + +MODULE_LICENSE("GPL"); + +/* + * An element of a list of outstanding transactions, for which we're + * still waiting a reply. + */ +struct xenbus_transaction_holder { + struct list_head list; + struct xenbus_transaction handle; +}; + +/* + * A buffer of data on the queue. + */ +struct read_buffer { + struct list_head list; + unsigned int cons; + unsigned int len; + char msg[]; +}; + +struct xenbus_file_priv { + /* + * msgbuffer_mutex is held while partial requests are built up + * and complete requests are acted on. It therefore protects + * the "transactions" and "watches" lists, and the partial + * request length and buffer. + * + * reply_mutex protects the reply being built up to return to + * usermode. It nests inside msgbuffer_mutex but may be held + * alone during a watch callback. + */ + struct mutex msgbuffer_mutex; + + /* In-progress transactions */ + struct list_head transactions; + + /* Active watches. */ + struct list_head watches; + + /* Partial request. */ + unsigned int len; + union { + struct xsd_sockmsg msg; + char buffer[XENSTORE_PAYLOAD_MAX]; + } u; + + /* Response queue. */ + struct mutex reply_mutex; + struct list_head read_buffers; + wait_queue_head_t read_waitq; + +}; + +/* Read out any raw xenbus messages queued up. */ +static ssize_t xenbus_file_read(struct file *filp, + char __user *ubuf, + size_t len, loff_t *ppos) +{ + struct xenbus_file_priv *u = filp->private_data; + struct read_buffer *rb; + unsigned i; + int ret; + + mutex_lock(&u->reply_mutex); +again: + while (list_empty(&u->read_buffers)) { + mutex_unlock(&u->reply_mutex); + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + ret = wait_event_interruptible(u->read_waitq, + !list_empty(&u->read_buffers)); + if (ret) + return ret; + mutex_lock(&u->reply_mutex); + } + + rb = list_entry(u->read_buffers.next, struct read_buffer, list); + i = 0; + while (i < len) { + unsigned sz = min((unsigned)len - i, rb->len - rb->cons); + + ret = copy_to_user(ubuf + i, &rb->msg[rb->cons], sz); + + i += sz - ret; + rb->cons += sz - ret; + + if (ret != 0) { + if (i == 0) + i = -EFAULT; + goto out; + } + + /* Clear out buffer if it has been consumed */ + if (rb->cons == rb->len) { + list_del(&rb->list); + kfree(rb); + if (list_empty(&u->read_buffers)) + break; + rb = list_entry(u->read_buffers.next, + struct read_buffer, list); + } + } + if (i == 0) + goto again; + +out: + mutex_unlock(&u->reply_mutex); + return i; +} + +/* + * Add a buffer to the queue. Caller must hold the appropriate lock + * if the queue is not local. (Commonly the caller will build up + * multiple queued buffers on a temporary local list, and then add it + * to the appropriate list under lock once all the buffers have een + * successfully allocated.) + */ +static int queue_reply(struct list_head *queue, const void *data, size_t len) +{ + struct read_buffer *rb; + + if (len == 0) + return 0; + + rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL); + if (rb == NULL) + return -ENOMEM; + + rb->cons = 0; + rb->len = len; + + memcpy(rb->msg, data, len); + + list_add_tail(&rb->list, queue); + return 0; +} + +/* + * Free all the read_buffer s on a list. + * Caller must have sole reference to list. + */ +static void queue_cleanup(struct list_head *list) +{ + struct read_buffer *rb; + + while (!list_empty(list)) { + rb = list_entry(list->next, struct read_buffer, list); + list_del(list->next); + kfree(rb); + } +} + +struct watch_adapter { + struct list_head list; + struct xenbus_watch watch; + struct xenbus_file_priv *dev_data; + char *token; +}; + +static void free_watch_adapter(struct watch_adapter *watch) +{ + kfree(watch->watch.node); + kfree(watch->token); + kfree(watch); +} + +static struct watch_adapter *alloc_watch_adapter(const char *path, + const char *token) +{ + struct watch_adapter *watch; + + watch = kzalloc(sizeof(*watch), GFP_KERNEL); + if (watch == NULL) + goto out_fail; + + watch->watch.node = kstrdup(path, GFP_KERNEL); + if (watch->watch.node == NULL) + goto out_free; + + watch->token = kstrdup(token, GFP_KERNEL); + if (watch->token == NULL) + goto out_free; + + return watch; + +out_free: + free_watch_adapter(watch); + +out_fail: + return NULL; +} + +static void watch_fired(struct xenbus_watch *watch, + const char **vec, + unsigned int len) +{ + struct watch_adapter *adap; + struct xsd_sockmsg hdr; + const char *path, *token; + int path_len, tok_len, body_len, data_len = 0; + int ret; + LIST_HEAD(staging_q); + + adap = container_of(watch, struct watch_adapter, watch); + + path = vec[XS_WATCH_PATH]; + token = adap->token; + + path_len = strlen(path) + 1; + tok_len = strlen(token) + 1; + if (len > 2) + data_len = vec[len] - vec[2] + 1; + body_len = path_len + tok_len + data_len; + + hdr.type = XS_WATCH_EVENT; + hdr.len = body_len; + + mutex_lock(&adap->dev_data->reply_mutex); + + ret = queue_reply(&staging_q, &hdr, sizeof(hdr)); + if (!ret) + ret = queue_reply(&staging_q, path, path_len); + if (!ret) + ret = queue_reply(&staging_q, token, tok_len); + if (!ret && len > 2) + ret = queue_reply(&staging_q, vec[2], data_len); + + if (!ret) { + /* success: pass reply list onto watcher */ + list_splice_tail(&staging_q, &adap->dev_data->read_buffers); + wake_up(&adap->dev_data->read_waitq); + } else + queue_cleanup(&staging_q); + + mutex_unlock(&adap->dev_data->reply_mutex); +} + +static int xenbus_write_transaction(unsigned msg_type, + struct xenbus_file_priv *u) +{ + int rc; + void *reply; + struct xenbus_transaction_holder *trans = NULL; + LIST_HEAD(staging_q); + + if (msg_type == XS_TRANSACTION_START) { + trans = kmalloc(sizeof(*trans), GFP_KERNEL); + if (!trans) { + rc = -ENOMEM; + goto out; + } + } + + reply = xenbus_dev_request_and_reply(&u->u.msg); + if (IS_ERR(reply)) { + kfree(trans); + rc = PTR_ERR(reply); + goto out; + } + + if (msg_type == XS_TRANSACTION_START) { + if (u->u.msg.type == XS_ERROR) + kfree(trans); + else { + trans->handle.id = simple_strtoul(reply, NULL, 0); + list_add(&trans->list, &u->transactions); + } + } else if (u->u.msg.type == XS_TRANSACTION_END) { + list_for_each_entry(trans, &u->transactions, list) + if (trans->handle.id == u->u.msg.tx_id) + break; + BUG_ON(&trans->list == &u->transactions); + list_del(&trans->list); + + kfree(trans); + } + + mutex_lock(&u->reply_mutex); + rc = queue_reply(&staging_q, &u->u.msg, sizeof(u->u.msg)); + if (!rc) + rc = queue_reply(&staging_q, reply, u->u.msg.len); + if (!rc) { + list_splice_tail(&staging_q, &u->read_buffers); + wake_up(&u->read_waitq); + } else { + queue_cleanup(&staging_q); + } + mutex_unlock(&u->reply_mutex); + + kfree(reply); + +out: + return rc; +} + +static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u) +{ + struct watch_adapter *watch, *tmp_watch; + char *path, *token; + int err, rc; + LIST_HEAD(staging_q); + + path = u->u.buffer + sizeof(u->u.msg); + token = memchr(path, 0, u->u.msg.len); + if (token == NULL) { + rc = -EILSEQ; + goto out; + } + token++; + if (memchr(token, 0, u->u.msg.len - (token - path)) == NULL) { + rc = -EILSEQ; + goto out; + } + + if (msg_type == XS_WATCH) { + watch = alloc_watch_adapter(path, token); + if (watch == NULL) { + rc = -ENOMEM; + goto out; + } + + watch->watch.callback = watch_fired; + watch->dev_data = u; + + err = register_xenbus_watch(&watch->watch); + if (err) { + free_watch_adapter(watch); + rc = err; + goto out; + } + list_add(&watch->list, &u->watches); + } else { + list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { + if (!strcmp(watch->token, token) && + !strcmp(watch->watch.node, path)) { + unregister_xenbus_watch(&watch->watch); + list_del(&watch->list); + free_watch_adapter(watch); + break; + } + } + } + + /* Success. Synthesize a reply to say all is OK. */ + { + struct { + struct xsd_sockmsg hdr; + char body[3]; + } __packed reply = { + { + .type = msg_type, + .len = sizeof(reply.body) + }, + "OK" + }; + + mutex_lock(&u->reply_mutex); + rc = queue_reply(&u->read_buffers, &reply, sizeof(reply)); + wake_up(&u->read_waitq); + mutex_unlock(&u->reply_mutex); + } + +out: + return rc; +} + +static ssize_t xenbus_file_write(struct file *filp, + const char __user *ubuf, + size_t len, loff_t *ppos) +{ + struct xenbus_file_priv *u = filp->private_data; + uint32_t msg_type; + int rc = len; + int ret; + LIST_HEAD(staging_q); + + /* + * We're expecting usermode to be writing properly formed + * xenbus messages. If they write an incomplete message we + * buffer it up. Once it is complete, we act on it. + */ + + /* + * Make sure concurrent writers can't stomp all over each + * other's messages and make a mess of our partial message + * buffer. We don't make any attemppt to stop multiple + * writers from making a mess of each other's incomplete + * messages; we're just trying to guarantee our own internal + * consistency and make sure that single writes are handled + * atomically. + */ + mutex_lock(&u->msgbuffer_mutex); + + /* Get this out of the way early to avoid confusion */ + if (len == 0) + goto out; + + /* Can't write a xenbus message larger we can buffer */ + if (len > sizeof(u->u.buffer) - u->len) { + /* On error, dump existing buffer */ + u->len = 0; + rc = -EINVAL; + goto out; + } + + ret = copy_from_user(u->u.buffer + u->len, ubuf, len); + + if (ret != 0) { + rc = -EFAULT; + goto out; + } + + /* Deal with a partial copy. */ + len -= ret; + rc = len; + + u->len += len; + + /* Return if we haven't got a full message yet */ + if (u->len < sizeof(u->u.msg)) + goto out; /* not even the header yet */ + + /* If we're expecting a message that's larger than we can + possibly send, dump what we have and return an error. */ + if ((sizeof(u->u.msg) + u->u.msg.len) > sizeof(u->u.buffer)) { + rc = -E2BIG; + u->len = 0; + goto out; + } + + if (u->len < (sizeof(u->u.msg) + u->u.msg.len)) + goto out; /* incomplete data portion */ + + /* + * OK, now we have a complete message. Do something with it. + */ + + msg_type = u->u.msg.type; + + switch (msg_type) { + case XS_WATCH: + case XS_UNWATCH: + /* (Un)Ask for some path to be watched for changes */ + ret = xenbus_write_watch(msg_type, u); + break; + + default: + /* Send out a transaction */ + ret = xenbus_write_transaction(msg_type, u); + break; + } + if (ret != 0) + rc = ret; + + /* Buffered message consumed */ + u->len = 0; + + out: + mutex_unlock(&u->msgbuffer_mutex); + return rc; +} + +static int xenbus_file_open(struct inode *inode, struct file *filp) +{ + struct xenbus_file_priv *u; + + if (xen_store_evtchn == 0) + return -ENOENT; + + nonseekable_open(inode, filp); + + u = kzalloc(sizeof(*u), GFP_KERNEL); + if (u == NULL) + return -ENOMEM; + + INIT_LIST_HEAD(&u->transactions); + INIT_LIST_HEAD(&u->watches); + INIT_LIST_HEAD(&u->read_buffers); + init_waitqueue_head(&u->read_waitq); + + mutex_init(&u->reply_mutex); + mutex_init(&u->msgbuffer_mutex); + + filp->private_data = u; + + return 0; +} + +static int xenbus_file_release(struct inode *inode, struct file *filp) +{ + struct xenbus_file_priv *u = filp->private_data; + struct xenbus_transaction_holder *trans, *tmp; + struct watch_adapter *watch, *tmp_watch; + struct read_buffer *rb, *tmp_rb; + + /* + * No need for locking here because there are no other users, + * by definition. + */ + + list_for_each_entry_safe(trans, tmp, &u->transactions, list) { + xenbus_transaction_end(trans->handle, 1); + list_del(&trans->list); + kfree(trans); + } + + list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { + unregister_xenbus_watch(&watch->watch); + list_del(&watch->list); + free_watch_adapter(watch); + } + + list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) { + list_del(&rb->list); + kfree(rb); + } + kfree(u); + + return 0; +} + +static unsigned int xenbus_file_poll(struct file *file, poll_table *wait) +{ + struct xenbus_file_priv *u = file->private_data; + + poll_wait(file, &u->read_waitq, wait); + if (!list_empty(&u->read_buffers)) + return POLLIN | POLLRDNORM; + return 0; +} + +const struct file_operations xen_xenbus_fops = { + .read = xenbus_file_read, + .write = xenbus_file_write, + .open = xenbus_file_open, + .release = xenbus_file_release, + .poll = xenbus_file_poll, + .llseek = no_llseek, +}; +EXPORT_SYMBOL_GPL(xen_xenbus_fops); + +static struct miscdevice xenbus_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "xen/xenbus", + .fops = &xen_xenbus_fops, +}; + +static int __init xenbus_init(void) +{ + int err; + + if (!xen_domain()) + return -ENODEV; + + err = misc_register(&xenbus_dev); + if (err) + pr_err("Could not register xenbus frontend device\n"); + return err; +} + +static void __exit xenbus_exit(void) +{ + misc_deregister(&xenbus_dev); +} + +module_init(xenbus_init); +module_exit(xenbus_exit); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c new file mode 100644 index 000000000..5390a674b --- /dev/null +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -0,0 +1,839 @@ +/****************************************************************************** + * Talks to Xen Store to figure out what devices we have. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 Mike Wray, Hewlett-Packard + * Copyright (C) 2005, 2006 XenSource Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#define DPRINTK(fmt, args...) \ + pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \ + __func__, __LINE__, ##args) + +#include <linux/kernel.h> +#include <linux/err.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/proc_fs.h> +#include <linux/notifier.h> +#include <linux/kthread.h> +#include <linux/mutex.h> +#include <linux/io.h> +#include <linux/slab.h> +#include <linux/module.h> + +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/xen/hypervisor.h> + +#include <xen/xen.h> +#include <xen/xenbus.h> +#include <xen/events.h> +#include <xen/xen-ops.h> +#include <xen/page.h> + +#include <xen/hvm.h> + +#include "xenbus_comms.h" +#include "xenbus_probe.h" + + +int xen_store_evtchn; +EXPORT_SYMBOL_GPL(xen_store_evtchn); + +struct xenstore_domain_interface *xen_store_interface; +EXPORT_SYMBOL_GPL(xen_store_interface); + +enum xenstore_init xen_store_domain_type; +EXPORT_SYMBOL_GPL(xen_store_domain_type); + +static unsigned long xen_store_mfn; + +static BLOCKING_NOTIFIER_HEAD(xenstore_chain); + +/* If something in array of ids matches this device, return it. */ +static const struct xenbus_device_id * +match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev) +{ + for (; *arr->devicetype != '\0'; arr++) { + if (!strcmp(arr->devicetype, dev->devicetype)) + return arr; + } + return NULL; +} + +int xenbus_match(struct device *_dev, struct device_driver *_drv) +{ + struct xenbus_driver *drv = to_xenbus_driver(_drv); + + if (!drv->ids) + return 0; + + return match_device(drv->ids, to_xenbus_device(_dev)) != NULL; +} +EXPORT_SYMBOL_GPL(xenbus_match); + + +static void free_otherend_details(struct xenbus_device *dev) +{ + kfree(dev->otherend); + dev->otherend = NULL; +} + + +static void free_otherend_watch(struct xenbus_device *dev) +{ + if (dev->otherend_watch.node) { + unregister_xenbus_watch(&dev->otherend_watch); + kfree(dev->otherend_watch.node); + dev->otherend_watch.node = NULL; + } +} + + +static int talk_to_otherend(struct xenbus_device *dev) +{ + struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver); + + free_otherend_watch(dev); + free_otherend_details(dev); + + return drv->read_otherend_details(dev); +} + + + +static int watch_otherend(struct xenbus_device *dev) +{ + struct xen_bus_type *bus = + container_of(dev->dev.bus, struct xen_bus_type, bus); + + return xenbus_watch_pathfmt(dev, &dev->otherend_watch, + bus->otherend_changed, + "%s/%s", dev->otherend, "state"); +} + + +int xenbus_read_otherend_details(struct xenbus_device *xendev, + char *id_node, char *path_node) +{ + int err = xenbus_gather(XBT_NIL, xendev->nodename, + id_node, "%i", &xendev->otherend_id, + path_node, NULL, &xendev->otherend, + NULL); + if (err) { + xenbus_dev_fatal(xendev, err, + "reading other end details from %s", + xendev->nodename); + return err; + } + if (strlen(xendev->otherend) == 0 || + !xenbus_exists(XBT_NIL, xendev->otherend, "")) { + xenbus_dev_fatal(xendev, -ENOENT, + "unable to read other end from %s. " + "missing or inaccessible.", + xendev->nodename); + free_otherend_details(xendev); + return -ENOENT; + } + + return 0; +} +EXPORT_SYMBOL_GPL(xenbus_read_otherend_details); + +void xenbus_otherend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len, + int ignore_on_shutdown) +{ + struct xenbus_device *dev = + container_of(watch, struct xenbus_device, otherend_watch); + struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver); + enum xenbus_state state; + + /* Protect us against watches firing on old details when the otherend + details change, say immediately after a resume. */ + if (!dev->otherend || + strncmp(dev->otherend, vec[XS_WATCH_PATH], + strlen(dev->otherend))) { + dev_dbg(&dev->dev, "Ignoring watch at %s\n", + vec[XS_WATCH_PATH]); + return; + } + + state = xenbus_read_driver_state(dev->otherend); + + dev_dbg(&dev->dev, "state is %d, (%s), %s, %s\n", + state, xenbus_strstate(state), dev->otherend_watch.node, + vec[XS_WATCH_PATH]); + + /* + * Ignore xenbus transitions during shutdown. This prevents us doing + * work that can fail e.g., when the rootfs is gone. + */ + if (system_state > SYSTEM_RUNNING) { + if (ignore_on_shutdown && (state == XenbusStateClosing)) + xenbus_frontend_closed(dev); + return; + } + + if (drv->otherend_changed) + drv->otherend_changed(dev, state); +} +EXPORT_SYMBOL_GPL(xenbus_otherend_changed); + +int xenbus_dev_probe(struct device *_dev) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + struct xenbus_driver *drv = to_xenbus_driver(_dev->driver); + const struct xenbus_device_id *id; + int err; + + DPRINTK("%s", dev->nodename); + + if (!drv->probe) { + err = -ENODEV; + goto fail; + } + + id = match_device(drv->ids, dev); + if (!id) { + err = -ENODEV; + goto fail; + } + + err = talk_to_otherend(dev); + if (err) { + dev_warn(&dev->dev, "talk_to_otherend on %s failed.\n", + dev->nodename); + return err; + } + + err = drv->probe(dev, id); + if (err) + goto fail; + + err = watch_otherend(dev); + if (err) { + dev_warn(&dev->dev, "watch_otherend on %s failed.\n", + dev->nodename); + return err; + } + + return 0; +fail: + xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename); + xenbus_switch_state(dev, XenbusStateClosed); + return err; +} +EXPORT_SYMBOL_GPL(xenbus_dev_probe); + +int xenbus_dev_remove(struct device *_dev) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + struct xenbus_driver *drv = to_xenbus_driver(_dev->driver); + + DPRINTK("%s", dev->nodename); + + free_otherend_watch(dev); + + if (drv->remove) + drv->remove(dev); + + free_otherend_details(dev); + + xenbus_switch_state(dev, XenbusStateClosed); + return 0; +} +EXPORT_SYMBOL_GPL(xenbus_dev_remove); + +void xenbus_dev_shutdown(struct device *_dev) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + unsigned long timeout = 5*HZ; + + DPRINTK("%s", dev->nodename); + + get_device(&dev->dev); + if (dev->state != XenbusStateConnected) { + pr_info("%s: %s: %s != Connected, skipping\n", + __func__, dev->nodename, xenbus_strstate(dev->state)); + goto out; + } + xenbus_switch_state(dev, XenbusStateClosing); + timeout = wait_for_completion_timeout(&dev->down, timeout); + if (!timeout) + pr_info("%s: %s timeout closing device\n", + __func__, dev->nodename); + out: + put_device(&dev->dev); +} +EXPORT_SYMBOL_GPL(xenbus_dev_shutdown); + +int xenbus_register_driver_common(struct xenbus_driver *drv, + struct xen_bus_type *bus, + struct module *owner, const char *mod_name) +{ + drv->driver.name = drv->name ? drv->name : drv->ids[0].devicetype; + drv->driver.bus = &bus->bus; + drv->driver.owner = owner; + drv->driver.mod_name = mod_name; + + return driver_register(&drv->driver); +} +EXPORT_SYMBOL_GPL(xenbus_register_driver_common); + +void xenbus_unregister_driver(struct xenbus_driver *drv) +{ + driver_unregister(&drv->driver); +} +EXPORT_SYMBOL_GPL(xenbus_unregister_driver); + +struct xb_find_info { + struct xenbus_device *dev; + const char *nodename; +}; + +static int cmp_dev(struct device *dev, void *data) +{ + struct xenbus_device *xendev = to_xenbus_device(dev); + struct xb_find_info *info = data; + + if (!strcmp(xendev->nodename, info->nodename)) { + info->dev = xendev; + get_device(dev); + return 1; + } + return 0; +} + +static struct xenbus_device *xenbus_device_find(const char *nodename, + struct bus_type *bus) +{ + struct xb_find_info info = { .dev = NULL, .nodename = nodename }; + + bus_for_each_dev(bus, NULL, &info, cmp_dev); + return info.dev; +} + +static int cleanup_dev(struct device *dev, void *data) +{ + struct xenbus_device *xendev = to_xenbus_device(dev); + struct xb_find_info *info = data; + int len = strlen(info->nodename); + + DPRINTK("%s", info->nodename); + + /* Match the info->nodename path, or any subdirectory of that path. */ + if (strncmp(xendev->nodename, info->nodename, len)) + return 0; + + /* If the node name is longer, ensure it really is a subdirectory. */ + if ((strlen(xendev->nodename) > len) && (xendev->nodename[len] != '/')) + return 0; + + info->dev = xendev; + get_device(dev); + return 1; +} + +static void xenbus_cleanup_devices(const char *path, struct bus_type *bus) +{ + struct xb_find_info info = { .nodename = path }; + + do { + info.dev = NULL; + bus_for_each_dev(bus, NULL, &info, cleanup_dev); + if (info.dev) { + device_unregister(&info.dev->dev); + put_device(&info.dev->dev); + } + } while (info.dev); +} + +static void xenbus_dev_release(struct device *dev) +{ + if (dev) + kfree(to_xenbus_device(dev)); +} + +static ssize_t nodename_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename); +} +static DEVICE_ATTR_RO(nodename); + +static ssize_t devtype_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype); +} +static DEVICE_ATTR_RO(devtype); + +static ssize_t modalias_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%s:%s\n", dev->bus->name, + to_xenbus_device(dev)->devicetype); +} +static DEVICE_ATTR_RO(modalias); + +static struct attribute *xenbus_dev_attrs[] = { + &dev_attr_nodename.attr, + &dev_attr_devtype.attr, + &dev_attr_modalias.attr, + NULL, +}; + +static const struct attribute_group xenbus_dev_group = { + .attrs = xenbus_dev_attrs, +}; + +const struct attribute_group *xenbus_dev_groups[] = { + &xenbus_dev_group, + NULL, +}; +EXPORT_SYMBOL_GPL(xenbus_dev_groups); + +int xenbus_probe_node(struct xen_bus_type *bus, + const char *type, + const char *nodename) +{ + char devname[XEN_BUS_ID_SIZE]; + int err; + struct xenbus_device *xendev; + size_t stringlen; + char *tmpstring; + + enum xenbus_state state = xenbus_read_driver_state(nodename); + + if (state != XenbusStateInitialising) { + /* Device is not new, so ignore it. This can happen if a + device is going away after switching to Closed. */ + return 0; + } + + stringlen = strlen(nodename) + 1 + strlen(type) + 1; + xendev = kzalloc(sizeof(*xendev) + stringlen, GFP_KERNEL); + if (!xendev) + return -ENOMEM; + + xendev->state = XenbusStateInitialising; + + /* Copy the strings into the extra space. */ + + tmpstring = (char *)(xendev + 1); + strcpy(tmpstring, nodename); + xendev->nodename = tmpstring; + + tmpstring += strlen(tmpstring) + 1; + strcpy(tmpstring, type); + xendev->devicetype = tmpstring; + init_completion(&xendev->down); + + xendev->dev.bus = &bus->bus; + xendev->dev.release = xenbus_dev_release; + + err = bus->get_bus_id(devname, xendev->nodename); + if (err) + goto fail; + + dev_set_name(&xendev->dev, "%s", devname); + + /* Register with generic device framework. */ + err = device_register(&xendev->dev); + if (err) + goto fail; + + return 0; +fail: + kfree(xendev); + return err; +} +EXPORT_SYMBOL_GPL(xenbus_probe_node); + +static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type) +{ + int err = 0; + char **dir; + unsigned int dir_n = 0; + int i; + + dir = xenbus_directory(XBT_NIL, bus->root, type, &dir_n); + if (IS_ERR(dir)) + return PTR_ERR(dir); + + for (i = 0; i < dir_n; i++) { + err = bus->probe(bus, type, dir[i]); + if (err) + break; + } + + kfree(dir); + return err; +} + +int xenbus_probe_devices(struct xen_bus_type *bus) +{ + int err = 0; + char **dir; + unsigned int i, dir_n; + + dir = xenbus_directory(XBT_NIL, bus->root, "", &dir_n); + if (IS_ERR(dir)) + return PTR_ERR(dir); + + for (i = 0; i < dir_n; i++) { + err = xenbus_probe_device_type(bus, dir[i]); + if (err) + break; + } + + kfree(dir); + return err; +} +EXPORT_SYMBOL_GPL(xenbus_probe_devices); + +static unsigned int char_count(const char *str, char c) +{ + unsigned int i, ret = 0; + + for (i = 0; str[i]; i++) + if (str[i] == c) + ret++; + return ret; +} + +static int strsep_len(const char *str, char c, unsigned int len) +{ + unsigned int i; + + for (i = 0; str[i]; i++) + if (str[i] == c) { + if (len == 0) + return i; + len--; + } + return (len == 0) ? i : -ERANGE; +} + +void xenbus_dev_changed(const char *node, struct xen_bus_type *bus) +{ + int exists, rootlen; + struct xenbus_device *dev; + char type[XEN_BUS_ID_SIZE]; + const char *p, *root; + + if (char_count(node, '/') < 2) + return; + + exists = xenbus_exists(XBT_NIL, node, ""); + if (!exists) { + xenbus_cleanup_devices(node, &bus->bus); + return; + } + + /* backend/<type>/... or device/<type>/... */ + p = strchr(node, '/') + 1; + snprintf(type, XEN_BUS_ID_SIZE, "%.*s", (int)strcspn(p, "/"), p); + type[XEN_BUS_ID_SIZE-1] = '\0'; + + rootlen = strsep_len(node, '/', bus->levels); + if (rootlen < 0) + return; + root = kasprintf(GFP_KERNEL, "%.*s", rootlen, node); + if (!root) + return; + + dev = xenbus_device_find(root, &bus->bus); + if (!dev) + xenbus_probe_node(bus, type, root); + else + put_device(&dev->dev); + + kfree(root); +} +EXPORT_SYMBOL_GPL(xenbus_dev_changed); + +int xenbus_dev_suspend(struct device *dev) +{ + int err = 0; + struct xenbus_driver *drv; + struct xenbus_device *xdev + = container_of(dev, struct xenbus_device, dev); + + DPRINTK("%s", xdev->nodename); + + if (dev->driver == NULL) + return 0; + drv = to_xenbus_driver(dev->driver); + if (drv->suspend) + err = drv->suspend(xdev); + if (err) + pr_warn("suspend %s failed: %i\n", dev_name(dev), err); + return 0; +} +EXPORT_SYMBOL_GPL(xenbus_dev_suspend); + +int xenbus_dev_resume(struct device *dev) +{ + int err; + struct xenbus_driver *drv; + struct xenbus_device *xdev + = container_of(dev, struct xenbus_device, dev); + + DPRINTK("%s", xdev->nodename); + + if (dev->driver == NULL) + return 0; + drv = to_xenbus_driver(dev->driver); + err = talk_to_otherend(xdev); + if (err) { + pr_warn("resume (talk_to_otherend) %s failed: %i\n", + dev_name(dev), err); + return err; + } + + xdev->state = XenbusStateInitialising; + + if (drv->resume) { + err = drv->resume(xdev); + if (err) { + pr_warn("resume %s failed: %i\n", dev_name(dev), err); + return err; + } + } + + err = watch_otherend(xdev); + if (err) { + pr_warn("resume (watch_otherend) %s failed: %d.\n", + dev_name(dev), err); + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(xenbus_dev_resume); + +int xenbus_dev_cancel(struct device *dev) +{ + /* Do nothing */ + DPRINTK("cancel"); + return 0; +} +EXPORT_SYMBOL_GPL(xenbus_dev_cancel); + +/* A flag to determine if xenstored is 'ready' (i.e. has started) */ +int xenstored_ready; + + +int register_xenstore_notifier(struct notifier_block *nb) +{ + int ret = 0; + + if (xenstored_ready > 0) + ret = nb->notifier_call(nb, 0, NULL); + else + blocking_notifier_chain_register(&xenstore_chain, nb); + + return ret; +} +EXPORT_SYMBOL_GPL(register_xenstore_notifier); + +void unregister_xenstore_notifier(struct notifier_block *nb) +{ + blocking_notifier_chain_unregister(&xenstore_chain, nb); +} +EXPORT_SYMBOL_GPL(unregister_xenstore_notifier); + +void xenbus_probe(struct work_struct *unused) +{ + xenstored_ready = 1; + + /* Notify others that xenstore is up */ + blocking_notifier_call_chain(&xenstore_chain, 0, NULL); +} +EXPORT_SYMBOL_GPL(xenbus_probe); + +static int __init xenbus_probe_initcall(void) +{ + if (!xen_domain()) + return -ENODEV; + + if (xen_initial_domain() || xen_hvm_domain()) + return 0; + + xenbus_probe(NULL); + return 0; +} + +device_initcall(xenbus_probe_initcall); + +/* Set up event channel for xenstored which is run as a local process + * (this is normally used only in dom0) + */ +static int __init xenstored_local_init(void) +{ + int err = 0; + unsigned long page = 0; + struct evtchn_alloc_unbound alloc_unbound; + + /* Allocate Xenstore page */ + page = get_zeroed_page(GFP_KERNEL); + if (!page) + goto out_err; + + xen_store_mfn = xen_start_info->store_mfn = + pfn_to_mfn(virt_to_phys((void *)page) >> + PAGE_SHIFT); + + /* Next allocate a local port which xenstored can bind to */ + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = DOMID_SELF; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (err == -ENOSYS) + goto out_err; + + BUG_ON(err); + xen_store_evtchn = xen_start_info->store_evtchn = + alloc_unbound.port; + + return 0; + + out_err: + if (page != 0) + free_page(page); + return err; +} + +static int xenbus_resume_cb(struct notifier_block *nb, + unsigned long action, void *data) +{ + int err = 0; + + if (xen_hvm_domain()) { + uint64_t v; + + err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); + if (!err && v) + xen_store_evtchn = v; + else + pr_warn("Cannot update xenstore event channel: %d\n", + err); + } else + xen_store_evtchn = xen_start_info->store_evtchn; + + return err; +} + +static struct notifier_block xenbus_resume_nb = { + .notifier_call = xenbus_resume_cb, +}; + +static int __init xenbus_init(void) +{ + int err = 0; + uint64_t v = 0; + xen_store_domain_type = XS_UNKNOWN; + + if (!xen_domain()) + return -ENODEV; + + xenbus_ring_ops_init(); + + if (xen_pv_domain()) + xen_store_domain_type = XS_PV; + if (xen_hvm_domain()) + xen_store_domain_type = XS_HVM; + if (xen_hvm_domain() && xen_initial_domain()) + xen_store_domain_type = XS_LOCAL; + if (xen_pv_domain() && !xen_start_info->store_evtchn) + xen_store_domain_type = XS_LOCAL; + if (xen_pv_domain() && xen_start_info->store_evtchn) + xenstored_ready = 1; + + switch (xen_store_domain_type) { + case XS_LOCAL: + err = xenstored_local_init(); + if (err) + goto out_error; + xen_store_interface = mfn_to_virt(xen_store_mfn); + break; + case XS_PV: + xen_store_evtchn = xen_start_info->store_evtchn; + xen_store_mfn = xen_start_info->store_mfn; + xen_store_interface = mfn_to_virt(xen_store_mfn); + break; + case XS_HVM: + err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); + if (err) + goto out_error; + xen_store_evtchn = (int)v; + err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); + if (err) + goto out_error; + xen_store_mfn = (unsigned long)v; + xen_store_interface = + xen_remap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); + break; + default: + pr_warn("Xenstore state unknown\n"); + break; + } + + /* Initialize the interface to xenstore. */ + err = xs_init(); + if (err) { + pr_warn("Error initializing xenstore comms: %i\n", err); + goto out_error; + } + + if ((xen_store_domain_type != XS_LOCAL) && + (xen_store_domain_type != XS_UNKNOWN)) + xen_resume_notifier_register(&xenbus_resume_nb); + +#ifdef CONFIG_XEN_COMPAT_XENFS + /* + * Create xenfs mountpoint in /proc for compatibility with + * utilities that expect to find "xenbus" under "/proc/xen". + */ + proc_mkdir("xen", NULL); +#endif + +out_error: + return err; +} + +postcore_initcall(xenbus_init); + +MODULE_LICENSE("GPL"); diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h new file mode 100644 index 000000000..c9ec7ca1f --- /dev/null +++ b/drivers/xen/xenbus/xenbus_probe.h @@ -0,0 +1,88 @@ +/****************************************************************************** + * xenbus_probe.h + * + * Talks to Xen Store to figure out what devices we have. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 XenSource Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _XENBUS_PROBE_H +#define _XENBUS_PROBE_H + +#define XEN_BUS_ID_SIZE 20 + +struct xen_bus_type { + char *root; + unsigned int levels; + int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename); + int (*probe)(struct xen_bus_type *bus, const char *type, + const char *dir); + void (*otherend_changed)(struct xenbus_watch *watch, const char **vec, + unsigned int len); + struct bus_type bus; +}; + +enum xenstore_init { + XS_UNKNOWN, + XS_PV, + XS_HVM, + XS_LOCAL, +}; + +extern const struct attribute_group *xenbus_dev_groups[]; + +extern int xenbus_match(struct device *_dev, struct device_driver *_drv); +extern int xenbus_dev_probe(struct device *_dev); +extern int xenbus_dev_remove(struct device *_dev); +extern int xenbus_register_driver_common(struct xenbus_driver *drv, + struct xen_bus_type *bus, + struct module *owner, + const char *mod_name); +extern int xenbus_probe_node(struct xen_bus_type *bus, + const char *type, + const char *nodename); +extern int xenbus_probe_devices(struct xen_bus_type *bus); + +extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus); + +extern void xenbus_dev_shutdown(struct device *_dev); + +extern int xenbus_dev_suspend(struct device *dev); +extern int xenbus_dev_resume(struct device *dev); +extern int xenbus_dev_cancel(struct device *dev); + +extern void xenbus_otherend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len, + int ignore_on_shutdown); + +extern int xenbus_read_otherend_details(struct xenbus_device *xendev, + char *id_node, char *path_node); + +void xenbus_ring_ops_init(void); + +#endif diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c new file mode 100644 index 000000000..04f7f85a5 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -0,0 +1,276 @@ +/****************************************************************************** + * Talks to Xen Store to figure out what devices we have (backend half). + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 Mike Wray, Hewlett-Packard + * Copyright (C) 2005, 2006 XenSource Ltd + * Copyright (C) 2007 Solarflare Communications, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#define DPRINTK(fmt, ...) \ + pr_debug("(%s:%d) " fmt "\n", \ + __func__, __LINE__, ##__VA_ARGS__) + +#include <linux/kernel.h> +#include <linux/err.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/notifier.h> +#include <linux/export.h> + +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/xen/hypervisor.h> +#include <asm/hypervisor.h> +#include <xen/xenbus.h> +#include <xen/features.h> + +#include "xenbus_comms.h" +#include "xenbus_probe.h" + +/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */ +static int backend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) +{ + int domid, err; + const char *devid, *type, *frontend; + unsigned int typelen; + + type = strchr(nodename, '/'); + if (!type) + return -EINVAL; + type++; + typelen = strcspn(type, "/"); + if (!typelen || type[typelen] != '/') + return -EINVAL; + + devid = strrchr(nodename, '/') + 1; + + err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid, + "frontend", NULL, &frontend, + NULL); + if (err) + return err; + if (strlen(frontend) == 0) + err = -ERANGE; + if (!err && !xenbus_exists(XBT_NIL, frontend, "")) + err = -ENOENT; + kfree(frontend); + + if (err) + return err; + + if (snprintf(bus_id, XEN_BUS_ID_SIZE, "%.*s-%i-%s", + typelen, type, domid, devid) >= XEN_BUS_ID_SIZE) + return -ENOSPC; + return 0; +} + +static int xenbus_uevent_backend(struct device *dev, + struct kobj_uevent_env *env) +{ + struct xenbus_device *xdev; + struct xenbus_driver *drv; + struct xen_bus_type *bus; + + DPRINTK(""); + + if (dev == NULL) + return -ENODEV; + + xdev = to_xenbus_device(dev); + bus = container_of(xdev->dev.bus, struct xen_bus_type, bus); + + if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype)) + return -ENOMEM; + + /* stuff we want to pass to /sbin/hotplug */ + if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype)) + return -ENOMEM; + + if (add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename)) + return -ENOMEM; + + if (add_uevent_var(env, "XENBUS_BASE_PATH=%s", bus->root)) + return -ENOMEM; + + if (dev->driver) { + drv = to_xenbus_driver(dev->driver); + if (drv && drv->uevent) + return drv->uevent(xdev, env); + } + + return 0; +} + +/* backend/<typename>/<frontend-uuid>/<name> */ +static int xenbus_probe_backend_unit(struct xen_bus_type *bus, + const char *dir, + const char *type, + const char *name) +{ + char *nodename; + int err; + + nodename = kasprintf(GFP_KERNEL, "%s/%s", dir, name); + if (!nodename) + return -ENOMEM; + + DPRINTK("%s\n", nodename); + + err = xenbus_probe_node(bus, type, nodename); + kfree(nodename); + return err; +} + +/* backend/<typename>/<frontend-domid> */ +static int xenbus_probe_backend(struct xen_bus_type *bus, const char *type, + const char *domid) +{ + char *nodename; + int err = 0; + char **dir; + unsigned int i, dir_n = 0; + + DPRINTK(""); + + nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, domid); + if (!nodename) + return -ENOMEM; + + dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n); + if (IS_ERR(dir)) { + kfree(nodename); + return PTR_ERR(dir); + } + + for (i = 0; i < dir_n; i++) { + err = xenbus_probe_backend_unit(bus, nodename, type, dir[i]); + if (err) + break; + } + kfree(dir); + kfree(nodename); + return err; +} + +static void frontend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + xenbus_otherend_changed(watch, vec, len, 0); +} + +static struct xen_bus_type xenbus_backend = { + .root = "backend", + .levels = 3, /* backend/type/<frontend>/<id> */ + .get_bus_id = backend_bus_id, + .probe = xenbus_probe_backend, + .otherend_changed = frontend_changed, + .bus = { + .name = "xen-backend", + .match = xenbus_match, + .uevent = xenbus_uevent_backend, + .probe = xenbus_dev_probe, + .remove = xenbus_dev_remove, + .shutdown = xenbus_dev_shutdown, + .dev_groups = xenbus_dev_groups, + }, +}; + +static void backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + DPRINTK(""); + + xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_backend); +} + +static struct xenbus_watch be_watch = { + .node = "backend", + .callback = backend_changed, +}; + +static int read_frontend_details(struct xenbus_device *xendev) +{ + return xenbus_read_otherend_details(xendev, "frontend-id", "frontend"); +} + +int xenbus_dev_is_online(struct xenbus_device *dev) +{ + int rc, val; + + rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val); + if (rc != 1) + val = 0; /* no online node present */ + + return val; +} +EXPORT_SYMBOL_GPL(xenbus_dev_is_online); + +int __xenbus_register_backend(struct xenbus_driver *drv, struct module *owner, + const char *mod_name) +{ + drv->read_otherend_details = read_frontend_details; + + return xenbus_register_driver_common(drv, &xenbus_backend, + owner, mod_name); +} +EXPORT_SYMBOL_GPL(__xenbus_register_backend); + +static int backend_probe_and_watch(struct notifier_block *notifier, + unsigned long event, + void *data) +{ + /* Enumerate devices in xenstore and watch for changes. */ + xenbus_probe_devices(&xenbus_backend); + register_xenbus_watch(&be_watch); + + return NOTIFY_DONE; +} + +static int __init xenbus_probe_backend_init(void) +{ + static struct notifier_block xenstore_notifier = { + .notifier_call = backend_probe_and_watch + }; + int err; + + DPRINTK(""); + + /* Register ourselves with the kernel bus subsystem */ + err = bus_register(&xenbus_backend.bus); + if (err) + return err; + + register_xenstore_notifier(&xenstore_notifier); + + return 0; +} +subsys_initcall(xenbus_probe_backend_init); diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c new file mode 100644 index 000000000..bcb53bdc4 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -0,0 +1,512 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#define DPRINTK(fmt, ...) \ + pr_debug("(%s:%d) " fmt "\n", \ + __func__, __LINE__, ##__VA_ARGS__) + +#include <linux/kernel.h> +#include <linux/err.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/proc_fs.h> +#include <linux/notifier.h> +#include <linux/kthread.h> +#include <linux/mutex.h> +#include <linux/io.h> +#include <linux/module.h> + +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/xen/hypervisor.h> +#include <xen/xenbus.h> +#include <xen/events.h> +#include <xen/page.h> +#include <xen/xen.h> + +#include <xen/platform_pci.h> + +#include "xenbus_comms.h" +#include "xenbus_probe.h" + + +static struct workqueue_struct *xenbus_frontend_wq; + +/* device/<type>/<id> => <type>-<id> */ +static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) +{ + nodename = strchr(nodename, '/'); + if (!nodename || strlen(nodename + 1) >= XEN_BUS_ID_SIZE) { + pr_warn("bad frontend %s\n", nodename); + return -EINVAL; + } + + strlcpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE); + if (!strchr(bus_id, '/')) { + pr_warn("bus_id %s no slash\n", bus_id); + return -EINVAL; + } + *strchr(bus_id, '/') = '-'; + return 0; +} + +/* device/<typename>/<name> */ +static int xenbus_probe_frontend(struct xen_bus_type *bus, const char *type, + const char *name) +{ + char *nodename; + int err; + + /* ignore console/0 */ + if (!strncmp(type, "console", 7) && !strncmp(name, "0", 1)) { + DPRINTK("Ignoring buggy device entry console/0"); + return 0; + } + + nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, name); + if (!nodename) + return -ENOMEM; + + DPRINTK("%s", nodename); + + err = xenbus_probe_node(bus, type, nodename); + kfree(nodename); + return err; +} + +static int xenbus_uevent_frontend(struct device *_dev, + struct kobj_uevent_env *env) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + + if (add_uevent_var(env, "MODALIAS=xen:%s", dev->devicetype)) + return -ENOMEM; + + return 0; +} + + +static void backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + xenbus_otherend_changed(watch, vec, len, 1); +} + +static void xenbus_frontend_delayed_resume(struct work_struct *w) +{ + struct xenbus_device *xdev = container_of(w, struct xenbus_device, work); + + xenbus_dev_resume(&xdev->dev); +} + +static int xenbus_frontend_dev_resume(struct device *dev) +{ + /* + * If xenstored is running in this domain, we cannot access the backend + * state at the moment, so we need to defer xenbus_dev_resume + */ + if (xen_store_domain_type == XS_LOCAL) { + struct xenbus_device *xdev = to_xenbus_device(dev); + + if (!xenbus_frontend_wq) { + pr_err("%s: no workqueue to process delayed resume\n", + xdev->nodename); + return -EFAULT; + } + + queue_work(xenbus_frontend_wq, &xdev->work); + + return 0; + } + + return xenbus_dev_resume(dev); +} + +static int xenbus_frontend_dev_probe(struct device *dev) +{ + if (xen_store_domain_type == XS_LOCAL) { + struct xenbus_device *xdev = to_xenbus_device(dev); + INIT_WORK(&xdev->work, xenbus_frontend_delayed_resume); + } + + return xenbus_dev_probe(dev); +} + +static const struct dev_pm_ops xenbus_pm_ops = { + .suspend = xenbus_dev_suspend, + .resume = xenbus_frontend_dev_resume, + .freeze = xenbus_dev_suspend, + .thaw = xenbus_dev_cancel, + .restore = xenbus_dev_resume, +}; + +static struct xen_bus_type xenbus_frontend = { + .root = "device", + .levels = 2, /* device/type/<id> */ + .get_bus_id = frontend_bus_id, + .probe = xenbus_probe_frontend, + .otherend_changed = backend_changed, + .bus = { + .name = "xen", + .match = xenbus_match, + .uevent = xenbus_uevent_frontend, + .probe = xenbus_frontend_dev_probe, + .remove = xenbus_dev_remove, + .shutdown = xenbus_dev_shutdown, + .dev_groups = xenbus_dev_groups, + + .pm = &xenbus_pm_ops, + }, +}; + +static void frontend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + DPRINTK(""); + + xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend); +} + + +/* We watch for devices appearing and vanishing. */ +static struct xenbus_watch fe_watch = { + .node = "device", + .callback = frontend_changed, +}; + +static int read_backend_details(struct xenbus_device *xendev) +{ + return xenbus_read_otherend_details(xendev, "backend-id", "backend"); +} + +static int is_device_connecting(struct device *dev, void *data, bool ignore_nonessential) +{ + struct xenbus_device *xendev = to_xenbus_device(dev); + struct device_driver *drv = data; + struct xenbus_driver *xendrv; + + /* + * A device with no driver will never connect. We care only about + * devices which should currently be in the process of connecting. + */ + if (!dev->driver) + return 0; + + /* Is this search limited to a particular driver? */ + if (drv && (dev->driver != drv)) + return 0; + + if (ignore_nonessential) { + /* With older QEMU, for PVonHVM guests the guest config files + * could contain: vfb = [ 'vnc=1, vnclisten=0.0.0.0'] + * which is nonsensical as there is no PV FB (there can be + * a PVKB) running as HVM guest. */ + + if ((strncmp(xendev->nodename, "device/vkbd", 11) == 0)) + return 0; + + if ((strncmp(xendev->nodename, "device/vfb", 10) == 0)) + return 0; + } + xendrv = to_xenbus_driver(dev->driver); + return (xendev->state < XenbusStateConnected || + (xendev->state == XenbusStateConnected && + xendrv->is_ready && !xendrv->is_ready(xendev))); +} +static int essential_device_connecting(struct device *dev, void *data) +{ + return is_device_connecting(dev, data, true /* ignore PV[KBB+FB] */); +} +static int non_essential_device_connecting(struct device *dev, void *data) +{ + return is_device_connecting(dev, data, false); +} + +static int exists_essential_connecting_device(struct device_driver *drv) +{ + return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, + essential_device_connecting); +} +static int exists_non_essential_connecting_device(struct device_driver *drv) +{ + return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, + non_essential_device_connecting); +} + +static int print_device_status(struct device *dev, void *data) +{ + struct xenbus_device *xendev = to_xenbus_device(dev); + struct device_driver *drv = data; + + /* Is this operation limited to a particular driver? */ + if (drv && (dev->driver != drv)) + return 0; + + if (!dev->driver) { + /* Information only: is this too noisy? */ + pr_info("Device with no driver: %s\n", xendev->nodename); + } else if (xendev->state < XenbusStateConnected) { + enum xenbus_state rstate = XenbusStateUnknown; + if (xendev->otherend) + rstate = xenbus_read_driver_state(xendev->otherend); + pr_warn("Timeout connecting to device: %s (local state %d, remote state %d)\n", + xendev->nodename, xendev->state, rstate); + } + + return 0; +} + +/* We only wait for device setup after most initcalls have run. */ +static int ready_to_wait_for_devices; + +static bool wait_loop(unsigned long start, unsigned int max_delay, + unsigned int *seconds_waited) +{ + if (time_after(jiffies, start + (*seconds_waited+5)*HZ)) { + if (!*seconds_waited) + pr_warn("Waiting for devices to initialise: "); + *seconds_waited += 5; + pr_cont("%us...", max_delay - *seconds_waited); + if (*seconds_waited == max_delay) { + pr_cont("\n"); + return true; + } + } + + schedule_timeout_interruptible(HZ/10); + + return false; +} +/* + * On a 5-minute timeout, wait for all devices currently configured. We need + * to do this to guarantee that the filesystems and / or network devices + * needed for boot are available, before we can allow the boot to proceed. + * + * This needs to be on a late_initcall, to happen after the frontend device + * drivers have been initialised, but before the root fs is mounted. + * + * A possible improvement here would be to have the tools add a per-device + * flag to the store entry, indicating whether it is needed at boot time. + * This would allow people who knew what they were doing to accelerate their + * boot slightly, but of course needs tools or manual intervention to set up + * those flags correctly. + */ +static void wait_for_devices(struct xenbus_driver *xendrv) +{ + unsigned long start = jiffies; + struct device_driver *drv = xendrv ? &xendrv->driver : NULL; + unsigned int seconds_waited = 0; + + if (!ready_to_wait_for_devices || !xen_domain()) + return; + + while (exists_non_essential_connecting_device(drv)) + if (wait_loop(start, 30, &seconds_waited)) + break; + + /* Skips PVKB and PVFB check.*/ + while (exists_essential_connecting_device(drv)) + if (wait_loop(start, 270, &seconds_waited)) + break; + + if (seconds_waited) + printk("\n"); + + bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, + print_device_status); +} + +int __xenbus_register_frontend(struct xenbus_driver *drv, struct module *owner, + const char *mod_name) +{ + int ret; + + drv->read_otherend_details = read_backend_details; + + ret = xenbus_register_driver_common(drv, &xenbus_frontend, + owner, mod_name); + if (ret) + return ret; + + /* If this driver is loaded as a module wait for devices to attach. */ + wait_for_devices(drv); + + return 0; +} +EXPORT_SYMBOL_GPL(__xenbus_register_frontend); + +static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq); +static int backend_state; + +static void xenbus_reset_backend_state_changed(struct xenbus_watch *w, + const char **v, unsigned int l) +{ + xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i", &backend_state); + printk(KERN_DEBUG "XENBUS: backend %s %s\n", + v[XS_WATCH_PATH], xenbus_strstate(backend_state)); + wake_up(&backend_state_wq); +} + +static void xenbus_reset_wait_for_backend(char *be, int expected) +{ + long timeout; + timeout = wait_event_interruptible_timeout(backend_state_wq, + backend_state == expected, 5 * HZ); + if (timeout <= 0) + pr_info("backend %s timed out\n", be); +} + +/* + * Reset frontend if it is in Connected or Closed state. + * Wait for backend to catch up. + * State Connected happens during kdump, Closed after kexec. + */ +static void xenbus_reset_frontend(char *fe, char *be, int be_state) +{ + struct xenbus_watch be_watch; + + printk(KERN_DEBUG "XENBUS: backend %s %s\n", + be, xenbus_strstate(be_state)); + + memset(&be_watch, 0, sizeof(be_watch)); + be_watch.node = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/state", be); + if (!be_watch.node) + return; + + be_watch.callback = xenbus_reset_backend_state_changed; + backend_state = XenbusStateUnknown; + + pr_info("triggering reconnect on %s\n", be); + register_xenbus_watch(&be_watch); + + /* fall through to forward backend to state XenbusStateInitialising */ + switch (be_state) { + case XenbusStateConnected: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosing); + xenbus_reset_wait_for_backend(be, XenbusStateClosing); + + case XenbusStateClosing: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosed); + xenbus_reset_wait_for_backend(be, XenbusStateClosed); + + case XenbusStateClosed: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateInitialising); + xenbus_reset_wait_for_backend(be, XenbusStateInitWait); + } + + unregister_xenbus_watch(&be_watch); + pr_info("reconnect done on %s\n", be); + kfree(be_watch.node); +} + +static void xenbus_check_frontend(char *class, char *dev) +{ + int be_state, fe_state, err; + char *backend, *frontend; + + frontend = kasprintf(GFP_NOIO | __GFP_HIGH, "device/%s/%s", class, dev); + if (!frontend) + return; + + err = xenbus_scanf(XBT_NIL, frontend, "state", "%i", &fe_state); + if (err != 1) + goto out; + + switch (fe_state) { + case XenbusStateConnected: + case XenbusStateClosed: + printk(KERN_DEBUG "XENBUS: frontend %s %s\n", + frontend, xenbus_strstate(fe_state)); + backend = xenbus_read(XBT_NIL, frontend, "backend", NULL); + if (!backend || IS_ERR(backend)) + goto out; + err = xenbus_scanf(XBT_NIL, backend, "state", "%i", &be_state); + if (err == 1) + xenbus_reset_frontend(frontend, backend, be_state); + kfree(backend); + break; + default: + break; + } +out: + kfree(frontend); +} + +static void xenbus_reset_state(void) +{ + char **devclass, **dev; + int devclass_n, dev_n; + int i, j; + + devclass = xenbus_directory(XBT_NIL, "device", "", &devclass_n); + if (IS_ERR(devclass)) + return; + + for (i = 0; i < devclass_n; i++) { + dev = xenbus_directory(XBT_NIL, "device", devclass[i], &dev_n); + if (IS_ERR(dev)) + continue; + for (j = 0; j < dev_n; j++) + xenbus_check_frontend(devclass[i], dev[j]); + kfree(dev); + } + kfree(devclass); +} + +static int frontend_probe_and_watch(struct notifier_block *notifier, + unsigned long event, + void *data) +{ + /* reset devices in Connected or Closed state */ + if (xen_hvm_domain()) + xenbus_reset_state(); + /* Enumerate devices in xenstore and watch for changes. */ + xenbus_probe_devices(&xenbus_frontend); + register_xenbus_watch(&fe_watch); + + return NOTIFY_DONE; +} + + +static int __init xenbus_probe_frontend_init(void) +{ + static struct notifier_block xenstore_notifier = { + .notifier_call = frontend_probe_and_watch + }; + int err; + + DPRINTK(""); + + /* Register ourselves with the kernel bus subsystem */ + err = bus_register(&xenbus_frontend.bus); + if (err) + return err; + + register_xenstore_notifier(&xenstore_notifier); + + if (xen_store_domain_type == XS_LOCAL) { + xenbus_frontend_wq = create_workqueue("xenbus_frontend"); + if (!xenbus_frontend_wq) + pr_warn("create xenbus frontend workqueue failed, S3 resume is likely to fail\n"); + } + + return 0; +} +subsys_initcall(xenbus_probe_frontend_init); + +#ifndef MODULE +static int __init boot_wait_for_devices(void) +{ + if (!xen_has_pv_devices()) + return -ENODEV; + + ready_to_wait_for_devices = 1; + wait_for_devices(NULL); + return 0; +} + +late_initcall(boot_wait_for_devices); +#endif + +MODULE_LICENSE("GPL"); diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c new file mode 100644 index 000000000..ba804f3d8 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -0,0 +1,981 @@ +/****************************************************************************** + * xenbus_xs.c + * + * This is the kernel equivalent of the "xs" library. We don't need everything + * and we use xenbus_comms for communication. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/unistd.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/uio.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/err.h> +#include <linux/slab.h> +#include <linux/fcntl.h> +#include <linux/kthread.h> +#include <linux/rwsem.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <asm/xen/hypervisor.h> +#include <xen/xenbus.h> +#include <xen/xen.h> +#include "xenbus_comms.h" +#include "xenbus_probe.h" + +struct xs_stored_msg { + struct list_head list; + + struct xsd_sockmsg hdr; + + union { + /* Queued replies. */ + struct { + char *body; + } reply; + + /* Queued watch events. */ + struct { + struct xenbus_watch *handle; + char **vec; + unsigned int vec_size; + } watch; + } u; +}; + +struct xs_handle { + /* A list of replies. Currently only one will ever be outstanding. */ + struct list_head reply_list; + spinlock_t reply_lock; + wait_queue_head_t reply_waitq; + + /* + * Mutex ordering: transaction_mutex -> watch_mutex -> request_mutex. + * response_mutex is never taken simultaneously with the other three. + * + * transaction_mutex must be held before incrementing + * transaction_count. The mutex is held when a suspend is in + * progress to prevent new transactions starting. + * + * When decrementing transaction_count to zero the wait queue + * should be woken up, the suspend code waits for count to + * reach zero. + */ + + /* One request at a time. */ + struct mutex request_mutex; + + /* Protect xenbus reader thread against save/restore. */ + struct mutex response_mutex; + + /* Protect transactions against save/restore. */ + struct mutex transaction_mutex; + atomic_t transaction_count; + wait_queue_head_t transaction_wq; + + /* Protect watch (de)register against save/restore. */ + struct rw_semaphore watch_mutex; +}; + +static struct xs_handle xs_state; + +/* List of registered watches, and a lock to protect it. */ +static LIST_HEAD(watches); +static DEFINE_SPINLOCK(watches_lock); + +/* List of pending watch callback events, and a lock to protect it. */ +static LIST_HEAD(watch_events); +static DEFINE_SPINLOCK(watch_events_lock); + +/* + * Details of the xenwatch callback kernel thread. The thread waits on the + * watch_events_waitq for work to do (queued on watch_events list). When it + * wakes up it acquires the xenwatch_mutex before reading the list and + * carrying out work. + */ +static pid_t xenwatch_pid; +static DEFINE_MUTEX(xenwatch_mutex); +static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq); + +static int get_error(const char *errorstring) +{ + unsigned int i; + + for (i = 0; strcmp(errorstring, xsd_errors[i].errstring) != 0; i++) { + if (i == ARRAY_SIZE(xsd_errors) - 1) { + pr_warn("xen store gave: unknown error %s\n", + errorstring); + return EINVAL; + } + } + return xsd_errors[i].errnum; +} + +static bool xenbus_ok(void) +{ + switch (xen_store_domain_type) { + case XS_LOCAL: + switch (system_state) { + case SYSTEM_POWER_OFF: + case SYSTEM_RESTART: + case SYSTEM_HALT: + return false; + default: + break; + } + return true; + case XS_PV: + case XS_HVM: + /* FIXME: Could check that the remote domain is alive, + * but it is normally initial domain. */ + return true; + default: + break; + } + return false; +} +static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len) +{ + struct xs_stored_msg *msg; + char *body; + + spin_lock(&xs_state.reply_lock); + + while (list_empty(&xs_state.reply_list)) { + spin_unlock(&xs_state.reply_lock); + if (xenbus_ok()) + /* XXX FIXME: Avoid synchronous wait for response here. */ + wait_event_timeout(xs_state.reply_waitq, + !list_empty(&xs_state.reply_list), + msecs_to_jiffies(500)); + else { + /* + * If we are in the process of being shut-down there is + * no point of trying to contact XenBus - it is either + * killed (xenstored application) or the other domain + * has been killed or is unreachable. + */ + return ERR_PTR(-EIO); + } + spin_lock(&xs_state.reply_lock); + } + + msg = list_entry(xs_state.reply_list.next, + struct xs_stored_msg, list); + list_del(&msg->list); + + spin_unlock(&xs_state.reply_lock); + + *type = msg->hdr.type; + if (len) + *len = msg->hdr.len; + body = msg->u.reply.body; + + kfree(msg); + + return body; +} + +static void transaction_start(void) +{ + mutex_lock(&xs_state.transaction_mutex); + atomic_inc(&xs_state.transaction_count); + mutex_unlock(&xs_state.transaction_mutex); +} + +static void transaction_end(void) +{ + if (atomic_dec_and_test(&xs_state.transaction_count)) + wake_up(&xs_state.transaction_wq); +} + +static void transaction_suspend(void) +{ + mutex_lock(&xs_state.transaction_mutex); + wait_event(xs_state.transaction_wq, + atomic_read(&xs_state.transaction_count) == 0); +} + +static void transaction_resume(void) +{ + mutex_unlock(&xs_state.transaction_mutex); +} + +void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg) +{ + void *ret; + struct xsd_sockmsg req_msg = *msg; + int err; + + if (req_msg.type == XS_TRANSACTION_START) + transaction_start(); + + mutex_lock(&xs_state.request_mutex); + + err = xb_write(msg, sizeof(*msg) + msg->len); + if (err) { + msg->type = XS_ERROR; + ret = ERR_PTR(err); + } else + ret = read_reply(&msg->type, &msg->len); + + mutex_unlock(&xs_state.request_mutex); + + if (IS_ERR(ret)) + return ret; + + if ((msg->type == XS_TRANSACTION_END) || + ((req_msg.type == XS_TRANSACTION_START) && + (msg->type == XS_ERROR))) + transaction_end(); + + return ret; +} +EXPORT_SYMBOL(xenbus_dev_request_and_reply); + +/* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */ +static void *xs_talkv(struct xenbus_transaction t, + enum xsd_sockmsg_type type, + const struct kvec *iovec, + unsigned int num_vecs, + unsigned int *len) +{ + struct xsd_sockmsg msg; + void *ret = NULL; + unsigned int i; + int err; + + msg.tx_id = t.id; + msg.req_id = 0; + msg.type = type; + msg.len = 0; + for (i = 0; i < num_vecs; i++) + msg.len += iovec[i].iov_len; + + mutex_lock(&xs_state.request_mutex); + + err = xb_write(&msg, sizeof(msg)); + if (err) { + mutex_unlock(&xs_state.request_mutex); + return ERR_PTR(err); + } + + for (i = 0; i < num_vecs; i++) { + err = xb_write(iovec[i].iov_base, iovec[i].iov_len); + if (err) { + mutex_unlock(&xs_state.request_mutex); + return ERR_PTR(err); + } + } + + ret = read_reply(&msg.type, len); + + mutex_unlock(&xs_state.request_mutex); + + if (IS_ERR(ret)) + return ret; + + if (msg.type == XS_ERROR) { + err = get_error(ret); + kfree(ret); + return ERR_PTR(-err); + } + + if (msg.type != type) { + pr_warn_ratelimited("unexpected type [%d], expected [%d]\n", + msg.type, type); + kfree(ret); + return ERR_PTR(-EINVAL); + } + return ret; +} + +/* Simplified version of xs_talkv: single message. */ +static void *xs_single(struct xenbus_transaction t, + enum xsd_sockmsg_type type, + const char *string, + unsigned int *len) +{ + struct kvec iovec; + + iovec.iov_base = (void *)string; + iovec.iov_len = strlen(string) + 1; + return xs_talkv(t, type, &iovec, 1, len); +} + +/* Many commands only need an ack, don't care what it says. */ +static int xs_error(char *reply) +{ + if (IS_ERR(reply)) + return PTR_ERR(reply); + kfree(reply); + return 0; +} + +static unsigned int count_strings(const char *strings, unsigned int len) +{ + unsigned int num; + const char *p; + + for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) + num++; + + return num; +} + +/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */ +static char *join(const char *dir, const char *name) +{ + char *buffer; + + if (strlen(name) == 0) + buffer = kasprintf(GFP_NOIO | __GFP_HIGH, "%s", dir); + else + buffer = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/%s", dir, name); + return (!buffer) ? ERR_PTR(-ENOMEM) : buffer; +} + +static char **split(char *strings, unsigned int len, unsigned int *num) +{ + char *p, **ret; + + /* Count the strings. */ + *num = count_strings(strings, len); + + /* Transfer to one big alloc for easy freeing. */ + ret = kmalloc(*num * sizeof(char *) + len, GFP_NOIO | __GFP_HIGH); + if (!ret) { + kfree(strings); + return ERR_PTR(-ENOMEM); + } + memcpy(&ret[*num], strings, len); + kfree(strings); + + strings = (char *)&ret[*num]; + for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1) + ret[(*num)++] = p; + + return ret; +} + +char **xenbus_directory(struct xenbus_transaction t, + const char *dir, const char *node, unsigned int *num) +{ + char *strings, *path; + unsigned int len; + + path = join(dir, node); + if (IS_ERR(path)) + return (char **)path; + + strings = xs_single(t, XS_DIRECTORY, path, &len); + kfree(path); + if (IS_ERR(strings)) + return (char **)strings; + + return split(strings, len, num); +} +EXPORT_SYMBOL_GPL(xenbus_directory); + +/* Check if a path exists. Return 1 if it does. */ +int xenbus_exists(struct xenbus_transaction t, + const char *dir, const char *node) +{ + char **d; + int dir_n; + + d = xenbus_directory(t, dir, node, &dir_n); + if (IS_ERR(d)) + return 0; + kfree(d); + return 1; +} +EXPORT_SYMBOL_GPL(xenbus_exists); + +/* Get the value of a single file. + * Returns a kmalloced value: call free() on it after use. + * len indicates length in bytes. + */ +void *xenbus_read(struct xenbus_transaction t, + const char *dir, const char *node, unsigned int *len) +{ + char *path; + void *ret; + + path = join(dir, node); + if (IS_ERR(path)) + return (void *)path; + + ret = xs_single(t, XS_READ, path, len); + kfree(path); + return ret; +} +EXPORT_SYMBOL_GPL(xenbus_read); + +/* Write the value of a single file. + * Returns -err on failure. + */ +int xenbus_write(struct xenbus_transaction t, + const char *dir, const char *node, const char *string) +{ + const char *path; + struct kvec iovec[2]; + int ret; + + path = join(dir, node); + if (IS_ERR(path)) + return PTR_ERR(path); + + iovec[0].iov_base = (void *)path; + iovec[0].iov_len = strlen(path) + 1; + iovec[1].iov_base = (void *)string; + iovec[1].iov_len = strlen(string); + + ret = xs_error(xs_talkv(t, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL)); + kfree(path); + return ret; +} +EXPORT_SYMBOL_GPL(xenbus_write); + +/* Create a new directory. */ +int xenbus_mkdir(struct xenbus_transaction t, + const char *dir, const char *node) +{ + char *path; + int ret; + + path = join(dir, node); + if (IS_ERR(path)) + return PTR_ERR(path); + + ret = xs_error(xs_single(t, XS_MKDIR, path, NULL)); + kfree(path); + return ret; +} +EXPORT_SYMBOL_GPL(xenbus_mkdir); + +/* Destroy a file or directory (directories must be empty). */ +int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node) +{ + char *path; + int ret; + + path = join(dir, node); + if (IS_ERR(path)) + return PTR_ERR(path); + + ret = xs_error(xs_single(t, XS_RM, path, NULL)); + kfree(path); + return ret; +} +EXPORT_SYMBOL_GPL(xenbus_rm); + +/* Start a transaction: changes by others will not be seen during this + * transaction, and changes will not be visible to others until end. + */ +int xenbus_transaction_start(struct xenbus_transaction *t) +{ + char *id_str; + + transaction_start(); + + id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL); + if (IS_ERR(id_str)) { + transaction_end(); + return PTR_ERR(id_str); + } + + t->id = simple_strtoul(id_str, NULL, 0); + kfree(id_str); + return 0; +} +EXPORT_SYMBOL_GPL(xenbus_transaction_start); + +/* End a transaction. + * If abandon is true, transaction is discarded instead of committed. + */ +int xenbus_transaction_end(struct xenbus_transaction t, int abort) +{ + char abortstr[2]; + int err; + + if (abort) + strcpy(abortstr, "F"); + else + strcpy(abortstr, "T"); + + err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL)); + + transaction_end(); + + return err; +} +EXPORT_SYMBOL_GPL(xenbus_transaction_end); + +/* Single read and scanf: returns -errno or num scanned. */ +int xenbus_scanf(struct xenbus_transaction t, + const char *dir, const char *node, const char *fmt, ...) +{ + va_list ap; + int ret; + char *val; + + val = xenbus_read(t, dir, node, NULL); + if (IS_ERR(val)) + return PTR_ERR(val); + + va_start(ap, fmt); + ret = vsscanf(val, fmt, ap); + va_end(ap); + kfree(val); + /* Distinctive errno. */ + if (ret == 0) + return -ERANGE; + return ret; +} +EXPORT_SYMBOL_GPL(xenbus_scanf); + +/* Single printf and write: returns -errno or 0. */ +int xenbus_printf(struct xenbus_transaction t, + const char *dir, const char *node, const char *fmt, ...) +{ + va_list ap; + int ret; + char *buf; + + va_start(ap, fmt); + buf = kvasprintf(GFP_NOIO | __GFP_HIGH, fmt, ap); + va_end(ap); + + if (!buf) + return -ENOMEM; + + ret = xenbus_write(t, dir, node, buf); + + kfree(buf); + + return ret; +} +EXPORT_SYMBOL_GPL(xenbus_printf); + +/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */ +int xenbus_gather(struct xenbus_transaction t, const char *dir, ...) +{ + va_list ap; + const char *name; + int ret = 0; + + va_start(ap, dir); + while (ret == 0 && (name = va_arg(ap, char *)) != NULL) { + const char *fmt = va_arg(ap, char *); + void *result = va_arg(ap, void *); + char *p; + + p = xenbus_read(t, dir, name, NULL); + if (IS_ERR(p)) { + ret = PTR_ERR(p); + break; + } + if (fmt) { + if (sscanf(p, fmt, result) == 0) + ret = -EINVAL; + kfree(p); + } else + *(char **)result = p; + } + va_end(ap); + return ret; +} +EXPORT_SYMBOL_GPL(xenbus_gather); + +static int xs_watch(const char *path, const char *token) +{ + struct kvec iov[2]; + + iov[0].iov_base = (void *)path; + iov[0].iov_len = strlen(path) + 1; + iov[1].iov_base = (void *)token; + iov[1].iov_len = strlen(token) + 1; + + return xs_error(xs_talkv(XBT_NIL, XS_WATCH, iov, + ARRAY_SIZE(iov), NULL)); +} + +static int xs_unwatch(const char *path, const char *token) +{ + struct kvec iov[2]; + + iov[0].iov_base = (char *)path; + iov[0].iov_len = strlen(path) + 1; + iov[1].iov_base = (char *)token; + iov[1].iov_len = strlen(token) + 1; + + return xs_error(xs_talkv(XBT_NIL, XS_UNWATCH, iov, + ARRAY_SIZE(iov), NULL)); +} + +static struct xenbus_watch *find_watch(const char *token) +{ + struct xenbus_watch *i, *cmp; + + cmp = (void *)simple_strtoul(token, NULL, 16); + + list_for_each_entry(i, &watches, list) + if (i == cmp) + return i; + + return NULL; +} +/* + * Certain older XenBus toolstack cannot handle reading values that are + * not populated. Some Xen 3.4 installation are incapable of doing this + * so if we are running on anything older than 4 do not attempt to read + * control/platform-feature-xs_reset_watches. + */ +static bool xen_strict_xenbus_quirk(void) +{ +#ifdef CONFIG_X86 + uint32_t eax, ebx, ecx, edx, base; + + base = xen_cpuid_base(); + cpuid(base + 1, &eax, &ebx, &ecx, &edx); + + if ((eax >> 16) < 4) + return true; +#endif + return false; + +} +static void xs_reset_watches(void) +{ + int err, supported = 0; + + if (!xen_hvm_domain() || xen_initial_domain()) + return; + + if (xen_strict_xenbus_quirk()) + return; + + err = xenbus_scanf(XBT_NIL, "control", + "platform-feature-xs_reset_watches", "%d", &supported); + if (err != 1 || !supported) + return; + + err = xs_error(xs_single(XBT_NIL, XS_RESET_WATCHES, "", NULL)); + if (err && err != -EEXIST) + pr_warn("xs_reset_watches failed: %d\n", err); +} + +/* Register callback to watch this node. */ +int register_xenbus_watch(struct xenbus_watch *watch) +{ + /* Pointer in ascii is the token. */ + char token[sizeof(watch) * 2 + 1]; + int err; + + sprintf(token, "%lX", (long)watch); + + down_read(&xs_state.watch_mutex); + + spin_lock(&watches_lock); + BUG_ON(find_watch(token)); + list_add(&watch->list, &watches); + spin_unlock(&watches_lock); + + err = xs_watch(watch->node, token); + + if (err) { + spin_lock(&watches_lock); + list_del(&watch->list); + spin_unlock(&watches_lock); + } + + up_read(&xs_state.watch_mutex); + + return err; +} +EXPORT_SYMBOL_GPL(register_xenbus_watch); + +void unregister_xenbus_watch(struct xenbus_watch *watch) +{ + struct xs_stored_msg *msg, *tmp; + char token[sizeof(watch) * 2 + 1]; + int err; + + sprintf(token, "%lX", (long)watch); + + down_read(&xs_state.watch_mutex); + + spin_lock(&watches_lock); + BUG_ON(!find_watch(token)); + list_del(&watch->list); + spin_unlock(&watches_lock); + + err = xs_unwatch(watch->node, token); + if (err) + pr_warn("Failed to release watch %s: %i\n", watch->node, err); + + up_read(&xs_state.watch_mutex); + + /* Make sure there are no callbacks running currently (unless + its us) */ + if (current->pid != xenwatch_pid) + mutex_lock(&xenwatch_mutex); + + /* Cancel pending watch events. */ + spin_lock(&watch_events_lock); + list_for_each_entry_safe(msg, tmp, &watch_events, list) { + if (msg->u.watch.handle != watch) + continue; + list_del(&msg->list); + kfree(msg->u.watch.vec); + kfree(msg); + } + spin_unlock(&watch_events_lock); + + if (current->pid != xenwatch_pid) + mutex_unlock(&xenwatch_mutex); +} +EXPORT_SYMBOL_GPL(unregister_xenbus_watch); + +void xs_suspend(void) +{ + transaction_suspend(); + down_write(&xs_state.watch_mutex); + mutex_lock(&xs_state.request_mutex); + mutex_lock(&xs_state.response_mutex); +} + +void xs_resume(void) +{ + struct xenbus_watch *watch; + char token[sizeof(watch) * 2 + 1]; + + xb_init_comms(); + + mutex_unlock(&xs_state.response_mutex); + mutex_unlock(&xs_state.request_mutex); + transaction_resume(); + + /* No need for watches_lock: the watch_mutex is sufficient. */ + list_for_each_entry(watch, &watches, list) { + sprintf(token, "%lX", (long)watch); + xs_watch(watch->node, token); + } + + up_write(&xs_state.watch_mutex); +} + +void xs_suspend_cancel(void) +{ + mutex_unlock(&xs_state.response_mutex); + mutex_unlock(&xs_state.request_mutex); + up_write(&xs_state.watch_mutex); + mutex_unlock(&xs_state.transaction_mutex); +} + +static int xenwatch_thread(void *unused) +{ + struct list_head *ent; + struct xs_stored_msg *msg; + + for (;;) { + wait_event_interruptible(watch_events_waitq, + !list_empty(&watch_events)); + + if (kthread_should_stop()) + break; + + mutex_lock(&xenwatch_mutex); + + spin_lock(&watch_events_lock); + ent = watch_events.next; + if (ent != &watch_events) + list_del(ent); + spin_unlock(&watch_events_lock); + + if (ent != &watch_events) { + msg = list_entry(ent, struct xs_stored_msg, list); + msg->u.watch.handle->callback( + msg->u.watch.handle, + (const char **)msg->u.watch.vec, + msg->u.watch.vec_size); + kfree(msg->u.watch.vec); + kfree(msg); + } + + mutex_unlock(&xenwatch_mutex); + } + + return 0; +} + +static int process_msg(void) +{ + struct xs_stored_msg *msg; + char *body; + int err; + + /* + * We must disallow save/restore while reading a xenstore message. + * A partial read across s/r leaves us out of sync with xenstored. + */ + for (;;) { + err = xb_wait_for_data_to_read(); + if (err) + return err; + mutex_lock(&xs_state.response_mutex); + if (xb_data_to_read()) + break; + /* We raced with save/restore: pending data 'disappeared'. */ + mutex_unlock(&xs_state.response_mutex); + } + + + msg = kmalloc(sizeof(*msg), GFP_NOIO | __GFP_HIGH); + if (msg == NULL) { + err = -ENOMEM; + goto out; + } + + err = xb_read(&msg->hdr, sizeof(msg->hdr)); + if (err) { + kfree(msg); + goto out; + } + + if (msg->hdr.len > XENSTORE_PAYLOAD_MAX) { + kfree(msg); + err = -EINVAL; + goto out; + } + + body = kmalloc(msg->hdr.len + 1, GFP_NOIO | __GFP_HIGH); + if (body == NULL) { + kfree(msg); + err = -ENOMEM; + goto out; + } + + err = xb_read(body, msg->hdr.len); + if (err) { + kfree(body); + kfree(msg); + goto out; + } + body[msg->hdr.len] = '\0'; + + if (msg->hdr.type == XS_WATCH_EVENT) { + msg->u.watch.vec = split(body, msg->hdr.len, + &msg->u.watch.vec_size); + if (IS_ERR(msg->u.watch.vec)) { + err = PTR_ERR(msg->u.watch.vec); + kfree(msg); + goto out; + } + + spin_lock(&watches_lock); + msg->u.watch.handle = find_watch( + msg->u.watch.vec[XS_WATCH_TOKEN]); + if (msg->u.watch.handle != NULL) { + spin_lock(&watch_events_lock); + list_add_tail(&msg->list, &watch_events); + wake_up(&watch_events_waitq); + spin_unlock(&watch_events_lock); + } else { + kfree(msg->u.watch.vec); + kfree(msg); + } + spin_unlock(&watches_lock); + } else { + msg->u.reply.body = body; + spin_lock(&xs_state.reply_lock); + list_add_tail(&msg->list, &xs_state.reply_list); + spin_unlock(&xs_state.reply_lock); + wake_up(&xs_state.reply_waitq); + } + + out: + mutex_unlock(&xs_state.response_mutex); + return err; +} + +static int xenbus_thread(void *unused) +{ + int err; + + for (;;) { + err = process_msg(); + if (err) + pr_warn("error %d while reading message\n", err); + if (kthread_should_stop()) + break; + } + + return 0; +} + +int xs_init(void) +{ + int err; + struct task_struct *task; + + INIT_LIST_HEAD(&xs_state.reply_list); + spin_lock_init(&xs_state.reply_lock); + init_waitqueue_head(&xs_state.reply_waitq); + + mutex_init(&xs_state.request_mutex); + mutex_init(&xs_state.response_mutex); + mutex_init(&xs_state.transaction_mutex); + init_rwsem(&xs_state.watch_mutex); + atomic_set(&xs_state.transaction_count, 0); + init_waitqueue_head(&xs_state.transaction_wq); + + /* Initialize the shared memory rings to talk to xenstored */ + err = xb_init_comms(); + if (err) + return err; + + task = kthread_run(xenwatch_thread, NULL, "xenwatch"); + if (IS_ERR(task)) + return PTR_ERR(task); + xenwatch_pid = task->pid; + + task = kthread_run(xenbus_thread, NULL, "xenbus"); + if (IS_ERR(task)) + return PTR_ERR(task); + + /* shutdown watches for kexec boot */ + xs_reset_watches(); + + return 0; +} |