diff options
Diffstat (limited to 'drivers/misc/mic/vop')
-rw-r--r-- | drivers/misc/mic/vop/Makefile | 9 | ||||
-rw-r--r-- | drivers/misc/mic/vop/vop_debugfs.c | 232 | ||||
-rw-r--r-- | drivers/misc/mic/vop/vop_main.c | 755 | ||||
-rw-r--r-- | drivers/misc/mic/vop/vop_main.h | 170 | ||||
-rw-r--r-- | drivers/misc/mic/vop/vop_vringh.c | 1175 |
5 files changed, 2341 insertions, 0 deletions
diff --git a/drivers/misc/mic/vop/Makefile b/drivers/misc/mic/vop/Makefile new file mode 100644 index 000000000..78819c899 --- /dev/null +++ b/drivers/misc/mic/vop/Makefile @@ -0,0 +1,9 @@ +# +# Makefile - Intel MIC Linux driver. +# Copyright(c) 2016, Intel Corporation. +# +obj-m := vop.o + +vop-objs += vop_main.o +vop-objs += vop_debugfs.o +vop-objs += vop_vringh.o diff --git a/drivers/misc/mic/vop/vop_debugfs.c b/drivers/misc/mic/vop/vop_debugfs.c new file mode 100644 index 000000000..ab43884e5 --- /dev/null +++ b/drivers/misc/mic/vop/vop_debugfs.c @@ -0,0 +1,232 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel Virtio Over PCIe (VOP) driver. + * + */ +#include <linux/debugfs.h> +#include <linux/seq_file.h> + +#include "vop_main.h" + +static int vop_dp_show(struct seq_file *s, void *pos) +{ + struct mic_device_desc *d; + struct mic_device_ctrl *dc; + struct mic_vqconfig *vqconfig; + __u32 *features; + __u8 *config; + struct vop_info *vi = s->private; + struct vop_device *vpdev = vi->vpdev; + struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev); + int j, k; + + seq_printf(s, "Bootparam: magic 0x%x\n", + bootparam->magic); + seq_printf(s, "Bootparam: h2c_config_db %d\n", + bootparam->h2c_config_db); + seq_printf(s, "Bootparam: node_id %d\n", + bootparam->node_id); + seq_printf(s, "Bootparam: c2h_scif_db %d\n", + bootparam->c2h_scif_db); + seq_printf(s, "Bootparam: h2c_scif_db %d\n", + bootparam->h2c_scif_db); + seq_printf(s, "Bootparam: scif_host_dma_addr 0x%llx\n", + bootparam->scif_host_dma_addr); + seq_printf(s, "Bootparam: scif_card_dma_addr 0x%llx\n", + bootparam->scif_card_dma_addr); + + for (j = sizeof(*bootparam); + j < MIC_DP_SIZE; j += mic_total_desc_size(d)) { + d = (void *)bootparam + j; + dc = (void *)d + mic_aligned_desc_size(d); + + /* end of list */ + if (d->type == 0) + break; + + if (d->type == -1) + continue; + + seq_printf(s, "Type %d ", d->type); + seq_printf(s, "Num VQ %d ", d->num_vq); + seq_printf(s, "Feature Len %d\n", d->feature_len); + seq_printf(s, "Config Len %d ", d->config_len); + seq_printf(s, "Shutdown Status %d\n", d->status); + + for (k = 0; k < d->num_vq; k++) { + vqconfig = mic_vq_config(d) + k; + seq_printf(s, "vqconfig[%d]: ", k); + seq_printf(s, "address 0x%llx ", + vqconfig->address); + seq_printf(s, "num %d ", vqconfig->num); + seq_printf(s, "used address 0x%llx\n", + vqconfig->used_address); + } + + features = (__u32 *)mic_vq_features(d); + seq_printf(s, "Features: Host 0x%x ", features[0]); + seq_printf(s, "Guest 0x%x\n", features[1]); + + config = mic_vq_configspace(d); + for (k = 0; k < d->config_len; k++) + seq_printf(s, "config[%d]=%d\n", k, config[k]); + + seq_puts(s, "Device control:\n"); + seq_printf(s, "Config Change %d ", dc->config_change); + seq_printf(s, "Vdev reset %d\n", dc->vdev_reset); + seq_printf(s, "Guest Ack %d ", dc->guest_ack); + seq_printf(s, "Host ack %d\n", dc->host_ack); + seq_printf(s, "Used address updated %d ", + dc->used_address_updated); + seq_printf(s, "Vdev 0x%llx\n", dc->vdev); + seq_printf(s, "c2h doorbell %d ", dc->c2h_vdev_db); + seq_printf(s, "h2c doorbell %d\n", dc->h2c_vdev_db); + } + schedule_work(&vi->hotplug_work); + return 0; +} + +static int vop_dp_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, vop_dp_show, inode->i_private); +} + +static int vop_dp_debug_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations dp_ops = { + .owner = THIS_MODULE, + .open = vop_dp_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = vop_dp_debug_release +}; + +static int vop_vdev_info_show(struct seq_file *s, void *unused) +{ + struct vop_info *vi = s->private; + struct list_head *pos, *tmp; + struct vop_vdev *vdev; + int i, j; + + mutex_lock(&vi->vop_mutex); + list_for_each_safe(pos, tmp, &vi->vdev_list) { + vdev = list_entry(pos, struct vop_vdev, list); + seq_printf(s, "VDEV type %d state %s in %ld out %ld in_dma %ld out_dma %ld\n", + vdev->virtio_id, + vop_vdevup(vdev) ? "UP" : "DOWN", + vdev->in_bytes, + vdev->out_bytes, + vdev->in_bytes_dma, + vdev->out_bytes_dma); + for (i = 0; i < MIC_MAX_VRINGS; i++) { + struct vring_desc *desc; + struct vring_avail *avail; + struct vring_used *used; + struct vop_vringh *vvr = &vdev->vvr[i]; + struct vringh *vrh = &vvr->vrh; + int num = vrh->vring.num; + + if (!num) + continue; + desc = vrh->vring.desc; + seq_printf(s, "vring i %d avail_idx %d", + i, vvr->vring.info->avail_idx & (num - 1)); + seq_printf(s, " vring i %d avail_idx %d\n", + i, vvr->vring.info->avail_idx); + seq_printf(s, "vrh i %d weak_barriers %d", + i, vrh->weak_barriers); + seq_printf(s, " last_avail_idx %d last_used_idx %d", + vrh->last_avail_idx, vrh->last_used_idx); + seq_printf(s, " completed %d\n", vrh->completed); + for (j = 0; j < num; j++) { + seq_printf(s, "desc[%d] addr 0x%llx len %d", + j, desc->addr, desc->len); + seq_printf(s, " flags 0x%x next %d\n", + desc->flags, desc->next); + desc++; + } + avail = vrh->vring.avail; + seq_printf(s, "avail flags 0x%x idx %d\n", + vringh16_to_cpu(vrh, avail->flags), + vringh16_to_cpu(vrh, + avail->idx) & (num - 1)); + seq_printf(s, "avail flags 0x%x idx %d\n", + vringh16_to_cpu(vrh, avail->flags), + vringh16_to_cpu(vrh, avail->idx)); + for (j = 0; j < num; j++) + seq_printf(s, "avail ring[%d] %d\n", + j, avail->ring[j]); + used = vrh->vring.used; + seq_printf(s, "used flags 0x%x idx %d\n", + vringh16_to_cpu(vrh, used->flags), + vringh16_to_cpu(vrh, used->idx) & (num - 1)); + seq_printf(s, "used flags 0x%x idx %d\n", + vringh16_to_cpu(vrh, used->flags), + vringh16_to_cpu(vrh, used->idx)); + for (j = 0; j < num; j++) + seq_printf(s, "used ring[%d] id %d len %d\n", + j, vringh32_to_cpu(vrh, + used->ring[j].id), + vringh32_to_cpu(vrh, + used->ring[j].len)); + } + } + mutex_unlock(&vi->vop_mutex); + + return 0; +} + +static int vop_vdev_info_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, vop_vdev_info_show, inode->i_private); +} + +static int vop_vdev_info_debug_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations vdev_info_ops = { + .owner = THIS_MODULE, + .open = vop_vdev_info_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = vop_vdev_info_debug_release +}; + +void vop_init_debugfs(struct vop_info *vi) +{ + char name[16]; + + snprintf(name, sizeof(name), "%s%d", KBUILD_MODNAME, vi->vpdev->dnode); + vi->dbg = debugfs_create_dir(name, NULL); + if (!vi->dbg) { + pr_err("can't create debugfs dir vop\n"); + return; + } + debugfs_create_file("dp", 0444, vi->dbg, vi, &dp_ops); + debugfs_create_file("vdev_info", 0444, vi->dbg, vi, &vdev_info_ops); +} + +void vop_exit_debugfs(struct vop_info *vi) +{ + debugfs_remove_recursive(vi->dbg); +} diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c new file mode 100644 index 000000000..1a2b67f31 --- /dev/null +++ b/drivers/misc/mic/vop/vop_main.c @@ -0,0 +1,755 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Adapted from: + * + * virtio for kvm on s390 + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> + * + * Intel Virtio Over PCIe (VOP) driver. + * + */ +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/dma-mapping.h> + +#include "vop_main.h" + +#define VOP_MAX_VRINGS 4 + +/* + * _vop_vdev - Allocated per virtio device instance injected by the peer. + * + * @vdev: Virtio device + * @desc: Virtio device page descriptor + * @dc: Virtio device control + * @vpdev: VOP device which is the parent for this virtio device + * @vr: Buffer for accessing the VRING + * @used: Buffer for used + * @used_size: Size of the used buffer + * @reset_done: Track whether VOP reset is complete + * @virtio_cookie: Cookie returned upon requesting a interrupt + * @c2h_vdev_db: The doorbell used by the guest to interrupt the host + * @h2c_vdev_db: The doorbell used by the host to interrupt the guest + * @dnode: The destination node + */ +struct _vop_vdev { + struct virtio_device vdev; + struct mic_device_desc __iomem *desc; + struct mic_device_ctrl __iomem *dc; + struct vop_device *vpdev; + void __iomem *vr[VOP_MAX_VRINGS]; + dma_addr_t used[VOP_MAX_VRINGS]; + int used_size[VOP_MAX_VRINGS]; + struct completion reset_done; + struct mic_irq *virtio_cookie; + int c2h_vdev_db; + int h2c_vdev_db; + int dnode; +}; + +#define to_vopvdev(vd) container_of(vd, struct _vop_vdev, vdev) + +#define _vop_aligned_desc_size(d) __mic_align(_vop_desc_size(d), 8) + +/* Helper API to obtain the parent of the virtio device */ +static inline struct device *_vop_dev(struct _vop_vdev *vdev) +{ + return vdev->vdev.dev.parent; +} + +static inline unsigned _vop_desc_size(struct mic_device_desc __iomem *desc) +{ + return sizeof(*desc) + + ioread8(&desc->num_vq) * sizeof(struct mic_vqconfig) + + ioread8(&desc->feature_len) * 2 + + ioread8(&desc->config_len); +} + +static inline struct mic_vqconfig __iomem * +_vop_vq_config(struct mic_device_desc __iomem *desc) +{ + return (struct mic_vqconfig __iomem *)(desc + 1); +} + +static inline u8 __iomem * +_vop_vq_features(struct mic_device_desc __iomem *desc) +{ + return (u8 __iomem *)(_vop_vq_config(desc) + ioread8(&desc->num_vq)); +} + +static inline u8 __iomem * +_vop_vq_configspace(struct mic_device_desc __iomem *desc) +{ + return _vop_vq_features(desc) + ioread8(&desc->feature_len) * 2; +} + +static inline unsigned +_vop_total_desc_size(struct mic_device_desc __iomem *desc) +{ + return _vop_aligned_desc_size(desc) + sizeof(struct mic_device_ctrl); +} + +/* This gets the device's feature bits. */ +static u64 vop_get_features(struct virtio_device *vdev) +{ + unsigned int i, bits; + u32 features = 0; + struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc; + u8 __iomem *in_features = _vop_vq_features(desc); + int feature_len = ioread8(&desc->feature_len); + + bits = min_t(unsigned, feature_len, sizeof(vdev->features)) * 8; + for (i = 0; i < bits; i++) + if (ioread8(&in_features[i / 8]) & (BIT(i % 8))) + features |= BIT(i); + + return features; +} + +static int vop_finalize_features(struct virtio_device *vdev) +{ + unsigned int i, bits; + struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc; + u8 feature_len = ioread8(&desc->feature_len); + /* Second half of bitmap is features we accept. */ + u8 __iomem *out_features = + _vop_vq_features(desc) + feature_len; + + /* Give virtio_ring a chance to accept features. */ + vring_transport_features(vdev); + + memset_io(out_features, 0, feature_len); + bits = min_t(unsigned, feature_len, + sizeof(vdev->features)) * 8; + for (i = 0; i < bits; i++) { + if (__virtio_test_bit(vdev, i)) + iowrite8(ioread8(&out_features[i / 8]) | (1 << (i % 8)), + &out_features[i / 8]); + } + return 0; +} + +/* + * Reading and writing elements in config space + */ +static void vop_get(struct virtio_device *vdev, unsigned int offset, + void *buf, unsigned len) +{ + struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc; + + if (offset + len > ioread8(&desc->config_len)) + return; + memcpy_fromio(buf, _vop_vq_configspace(desc) + offset, len); +} + +static void vop_set(struct virtio_device *vdev, unsigned int offset, + const void *buf, unsigned len) +{ + struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc; + + if (offset + len > ioread8(&desc->config_len)) + return; + memcpy_toio(_vop_vq_configspace(desc) + offset, buf, len); +} + +/* + * The operations to get and set the status word just access the status + * field of the device descriptor. set_status also interrupts the host + * to tell about status changes. + */ +static u8 vop_get_status(struct virtio_device *vdev) +{ + return ioread8(&to_vopvdev(vdev)->desc->status); +} + +static void vop_set_status(struct virtio_device *dev, u8 status) +{ + struct _vop_vdev *vdev = to_vopvdev(dev); + struct vop_device *vpdev = vdev->vpdev; + + if (!status) + return; + iowrite8(status, &vdev->desc->status); + vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db); +} + +/* Inform host on a virtio device reset and wait for ack from host */ +static void vop_reset_inform_host(struct virtio_device *dev) +{ + struct _vop_vdev *vdev = to_vopvdev(dev); + struct mic_device_ctrl __iomem *dc = vdev->dc; + struct vop_device *vpdev = vdev->vpdev; + int retry; + + iowrite8(0, &dc->host_ack); + iowrite8(1, &dc->vdev_reset); + vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db); + + /* Wait till host completes all card accesses and acks the reset */ + for (retry = 100; retry--;) { + if (ioread8(&dc->host_ack)) + break; + msleep(100); + }; + + dev_dbg(_vop_dev(vdev), "%s: retry: %d\n", __func__, retry); + + /* Reset status to 0 in case we timed out */ + iowrite8(0, &vdev->desc->status); +} + +static void vop_reset(struct virtio_device *dev) +{ + struct _vop_vdev *vdev = to_vopvdev(dev); + + dev_dbg(_vop_dev(vdev), "%s: virtio id %d\n", + __func__, dev->id.device); + + vop_reset_inform_host(dev); + complete_all(&vdev->reset_done); +} + +/* + * The virtio_ring code calls this API when it wants to notify the Host. + */ +static bool vop_notify(struct virtqueue *vq) +{ + struct _vop_vdev *vdev = vq->priv; + struct vop_device *vpdev = vdev->vpdev; + + vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db); + return true; +} + +static void vop_del_vq(struct virtqueue *vq, int n) +{ + struct _vop_vdev *vdev = to_vopvdev(vq->vdev); + struct vring *vr = (struct vring *)(vq + 1); + struct vop_device *vpdev = vdev->vpdev; + + dma_unmap_single(&vpdev->dev, vdev->used[n], + vdev->used_size[n], DMA_BIDIRECTIONAL); + free_pages((unsigned long)vr->used, get_order(vdev->used_size[n])); + vring_del_virtqueue(vq); + vpdev->hw_ops->iounmap(vpdev, vdev->vr[n]); + vdev->vr[n] = NULL; +} + +static void vop_del_vqs(struct virtio_device *dev) +{ + struct _vop_vdev *vdev = to_vopvdev(dev); + struct virtqueue *vq, *n; + int idx = 0; + + dev_dbg(_vop_dev(vdev), "%s\n", __func__); + + list_for_each_entry_safe(vq, n, &dev->vqs, list) + vop_del_vq(vq, idx++); +} + +/* + * This routine will assign vring's allocated in host/io memory. Code in + * virtio_ring.c however continues to access this io memory as if it were local + * memory without io accessors. + */ +static struct virtqueue *vop_find_vq(struct virtio_device *dev, + unsigned index, + void (*callback)(struct virtqueue *vq), + const char *name) +{ + struct _vop_vdev *vdev = to_vopvdev(dev); + struct vop_device *vpdev = vdev->vpdev; + struct mic_vqconfig __iomem *vqconfig; + struct mic_vqconfig config; + struct virtqueue *vq; + void __iomem *va; + struct _mic_vring_info __iomem *info; + void *used; + int vr_size, _vr_size, err, magic; + struct vring *vr; + u8 type = ioread8(&vdev->desc->type); + + if (index >= ioread8(&vdev->desc->num_vq)) + return ERR_PTR(-ENOENT); + + if (!name) + return ERR_PTR(-ENOENT); + + /* First assign the vring's allocated in host memory */ + vqconfig = _vop_vq_config(vdev->desc) + index; + memcpy_fromio(&config, vqconfig, sizeof(config)); + _vr_size = vring_size(le16_to_cpu(config.num), MIC_VIRTIO_RING_ALIGN); + vr_size = PAGE_ALIGN(_vr_size + sizeof(struct _mic_vring_info)); + va = vpdev->hw_ops->ioremap(vpdev, le64_to_cpu(config.address), + vr_size); + if (!va) + return ERR_PTR(-ENOMEM); + vdev->vr[index] = va; + memset_io(va, 0x0, _vr_size); + vq = vring_new_virtqueue( + index, + le16_to_cpu(config.num), MIC_VIRTIO_RING_ALIGN, + dev, + false, + (void __force *)va, vop_notify, callback, name); + if (!vq) { + err = -ENOMEM; + goto unmap; + } + info = va + _vr_size; + magic = ioread32(&info->magic); + + if (WARN(magic != MIC_MAGIC + type + index, "magic mismatch")) { + err = -EIO; + goto unmap; + } + + /* Allocate and reassign used ring now */ + vdev->used_size[index] = PAGE_ALIGN(sizeof(__u16) * 3 + + sizeof(struct vring_used_elem) * + le16_to_cpu(config.num)); + used = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(vdev->used_size[index])); + if (!used) { + err = -ENOMEM; + dev_err(_vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, err); + goto del_vq; + } + vdev->used[index] = dma_map_single(&vpdev->dev, used, + vdev->used_size[index], + DMA_BIDIRECTIONAL); + if (dma_mapping_error(&vpdev->dev, vdev->used[index])) { + err = -ENOMEM; + dev_err(_vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, err); + goto free_used; + } + writeq(vdev->used[index], &vqconfig->used_address); + /* + * To reassign the used ring here we are directly accessing + * struct vring_virtqueue which is a private data structure + * in virtio_ring.c. At the minimum, a BUILD_BUG_ON() in + * vring_new_virtqueue() would ensure that + * (&vq->vring == (struct vring *) (&vq->vq + 1)); + */ + vr = (struct vring *)(vq + 1); + vr->used = used; + + vq->priv = vdev; + return vq; +free_used: + free_pages((unsigned long)used, + get_order(vdev->used_size[index])); +del_vq: + vring_del_virtqueue(vq); +unmap: + vpdev->hw_ops->iounmap(vpdev, vdev->vr[index]); + return ERR_PTR(err); +} + +static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char * const names[]) +{ + struct _vop_vdev *vdev = to_vopvdev(dev); + struct vop_device *vpdev = vdev->vpdev; + struct mic_device_ctrl __iomem *dc = vdev->dc; + int i, err, retry; + + /* We must have this many virtqueues. */ + if (nvqs > ioread8(&vdev->desc->num_vq)) + return -ENOENT; + + for (i = 0; i < nvqs; ++i) { + dev_dbg(_vop_dev(vdev), "%s: %d: %s\n", + __func__, i, names[i]); + vqs[i] = vop_find_vq(dev, i, callbacks[i], names[i]); + if (IS_ERR(vqs[i])) { + err = PTR_ERR(vqs[i]); + goto error; + } + } + + iowrite8(1, &dc->used_address_updated); + /* + * Send an interrupt to the host to inform it that used + * rings have been re-assigned. + */ + vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db); + for (retry = 100; --retry;) { + if (!ioread8(&dc->used_address_updated)) + break; + msleep(100); + }; + + dev_dbg(_vop_dev(vdev), "%s: retry: %d\n", __func__, retry); + if (!retry) { + err = -ENODEV; + goto error; + } + + return 0; +error: + vop_del_vqs(dev); + return err; +} + +/* + * The config ops structure as defined by virtio config + */ +static struct virtio_config_ops vop_vq_config_ops = { + .get_features = vop_get_features, + .finalize_features = vop_finalize_features, + .get = vop_get, + .set = vop_set, + .get_status = vop_get_status, + .set_status = vop_set_status, + .reset = vop_reset, + .find_vqs = vop_find_vqs, + .del_vqs = vop_del_vqs, +}; + +static irqreturn_t vop_virtio_intr_handler(int irq, void *data) +{ + struct _vop_vdev *vdev = data; + struct vop_device *vpdev = vdev->vpdev; + struct virtqueue *vq; + + vpdev->hw_ops->ack_interrupt(vpdev, vdev->h2c_vdev_db); + list_for_each_entry(vq, &vdev->vdev.vqs, list) + vring_interrupt(0, vq); + + return IRQ_HANDLED; +} + +static void vop_virtio_release_dev(struct device *_d) +{ + /* + * No need for a release method similar to virtio PCI. + * Provide an empty one to avoid getting a warning from core. + */ +} + +/* + * adds a new device and register it with virtio + * appropriate drivers are loaded by the device model + */ +static int _vop_add_device(struct mic_device_desc __iomem *d, + unsigned int offset, struct vop_device *vpdev, + int dnode) +{ + struct _vop_vdev *vdev; + int ret; + u8 type = ioread8(&d->type); + + vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); + if (!vdev) + return -ENOMEM; + + vdev->vpdev = vpdev; + vdev->vdev.dev.parent = &vpdev->dev; + vdev->vdev.dev.release = vop_virtio_release_dev; + vdev->vdev.id.device = type; + vdev->vdev.config = &vop_vq_config_ops; + vdev->desc = d; + vdev->dc = (void __iomem *)d + _vop_aligned_desc_size(d); + vdev->dnode = dnode; + vdev->vdev.priv = (void *)(u64)dnode; + init_completion(&vdev->reset_done); + + vdev->h2c_vdev_db = vpdev->hw_ops->next_db(vpdev); + vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev, + vop_virtio_intr_handler, "virtio intr", + vdev, vdev->h2c_vdev_db); + if (IS_ERR(vdev->virtio_cookie)) { + ret = PTR_ERR(vdev->virtio_cookie); + goto kfree; + } + iowrite8((u8)vdev->h2c_vdev_db, &vdev->dc->h2c_vdev_db); + vdev->c2h_vdev_db = ioread8(&vdev->dc->c2h_vdev_db); + + ret = register_virtio_device(&vdev->vdev); + if (ret) { + dev_err(_vop_dev(vdev), + "Failed to register vop device %u type %u\n", + offset, type); + goto free_irq; + } + writeq((u64)vdev, &vdev->dc->vdev); + dev_dbg(_vop_dev(vdev), "%s: registered vop device %u type %u vdev %p\n", + __func__, offset, type, vdev); + + return 0; + +free_irq: + vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev); +kfree: + kfree(vdev); + return ret; +} + +/* + * match for a vop device with a specific desc pointer + */ +static int vop_match_desc(struct device *dev, void *data) +{ + struct virtio_device *_dev = dev_to_virtio(dev); + struct _vop_vdev *vdev = to_vopvdev(_dev); + + return vdev->desc == (void __iomem *)data; +} + +static void _vop_handle_config_change(struct mic_device_desc __iomem *d, + unsigned int offset, + struct vop_device *vpdev) +{ + struct mic_device_ctrl __iomem *dc + = (void __iomem *)d + _vop_aligned_desc_size(d); + struct _vop_vdev *vdev = (struct _vop_vdev *)readq(&dc->vdev); + + if (ioread8(&dc->config_change) != MIC_VIRTIO_PARAM_CONFIG_CHANGED) + return; + + dev_dbg(&vpdev->dev, "%s %d\n", __func__, __LINE__); + virtio_config_changed(&vdev->vdev); + iowrite8(1, &dc->guest_ack); +} + +/* + * removes a virtio device if a hot remove event has been + * requested by the host. + */ +static int _vop_remove_device(struct mic_device_desc __iomem *d, + unsigned int offset, struct vop_device *vpdev) +{ + struct mic_device_ctrl __iomem *dc + = (void __iomem *)d + _vop_aligned_desc_size(d); + struct _vop_vdev *vdev = (struct _vop_vdev *)readq(&dc->vdev); + u8 status; + int ret = -1; + + if (ioread8(&dc->config_change) == MIC_VIRTIO_PARAM_DEV_REMOVE) { + dev_dbg(&vpdev->dev, + "%s %d config_change %d type %d vdev %p\n", + __func__, __LINE__, + ioread8(&dc->config_change), ioread8(&d->type), vdev); + status = ioread8(&d->status); + reinit_completion(&vdev->reset_done); + unregister_virtio_device(&vdev->vdev); + vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev); + iowrite8(-1, &dc->h2c_vdev_db); + if (status & VIRTIO_CONFIG_S_DRIVER_OK) + wait_for_completion(&vdev->reset_done); + kfree(vdev); + iowrite8(1, &dc->guest_ack); + dev_dbg(&vpdev->dev, "%s %d guest_ack %d\n", + __func__, __LINE__, ioread8(&dc->guest_ack)); + iowrite8(-1, &d->type); + ret = 0; + } + return ret; +} + +#define REMOVE_DEVICES true + +static void _vop_scan_devices(void __iomem *dp, struct vop_device *vpdev, + bool remove, int dnode) +{ + s8 type; + unsigned int i; + struct mic_device_desc __iomem *d; + struct mic_device_ctrl __iomem *dc; + struct device *dev; + int ret; + + for (i = sizeof(struct mic_bootparam); + i < MIC_DP_SIZE; i += _vop_total_desc_size(d)) { + d = dp + i; + dc = (void __iomem *)d + _vop_aligned_desc_size(d); + /* + * This read barrier is paired with the corresponding write + * barrier on the host which is inserted before adding or + * removing a virtio device descriptor, by updating the type. + */ + rmb(); + type = ioread8(&d->type); + + /* end of list */ + if (type == 0) + break; + + if (type == -1) + continue; + + /* device already exists */ + dev = device_find_child(&vpdev->dev, (void __force *)d, + vop_match_desc); + if (dev) { + if (remove) + iowrite8(MIC_VIRTIO_PARAM_DEV_REMOVE, + &dc->config_change); + put_device(dev); + _vop_handle_config_change(d, i, vpdev); + ret = _vop_remove_device(d, i, vpdev); + if (remove) { + iowrite8(0, &dc->config_change); + iowrite8(0, &dc->guest_ack); + } + continue; + } + + /* new device */ + dev_dbg(&vpdev->dev, "%s %d Adding new virtio device %p\n", + __func__, __LINE__, d); + if (!remove) + _vop_add_device(d, i, vpdev, dnode); + } +} + +static void vop_scan_devices(struct vop_info *vi, + struct vop_device *vpdev, bool remove) +{ + void __iomem *dp = vpdev->hw_ops->get_remote_dp(vpdev); + + if (!dp) + return; + mutex_lock(&vi->vop_mutex); + _vop_scan_devices(dp, vpdev, remove, vpdev->dnode); + mutex_unlock(&vi->vop_mutex); +} + +/* + * vop_hotplug_device tries to find changes in the device page. + */ +static void vop_hotplug_devices(struct work_struct *work) +{ + struct vop_info *vi = container_of(work, struct vop_info, + hotplug_work); + + vop_scan_devices(vi, vi->vpdev, !REMOVE_DEVICES); +} + +/* + * Interrupt handler for hot plug/config changes etc. + */ +static irqreturn_t vop_extint_handler(int irq, void *data) +{ + struct vop_info *vi = data; + struct mic_bootparam __iomem *bp; + struct vop_device *vpdev = vi->vpdev; + + bp = vpdev->hw_ops->get_remote_dp(vpdev); + dev_dbg(&vpdev->dev, "%s %d hotplug work\n", + __func__, __LINE__); + vpdev->hw_ops->ack_interrupt(vpdev, ioread8(&bp->h2c_config_db)); + schedule_work(&vi->hotplug_work); + return IRQ_HANDLED; +} + +static int vop_driver_probe(struct vop_device *vpdev) +{ + struct vop_info *vi; + int rc; + + vi = kzalloc(sizeof(*vi), GFP_KERNEL); + if (!vi) { + rc = -ENOMEM; + goto exit; + } + dev_set_drvdata(&vpdev->dev, vi); + vi->vpdev = vpdev; + + mutex_init(&vi->vop_mutex); + INIT_WORK(&vi->hotplug_work, vop_hotplug_devices); + if (vpdev->dnode) { + rc = vop_host_init(vi); + if (rc < 0) + goto free; + } else { + struct mic_bootparam __iomem *bootparam; + + vop_scan_devices(vi, vpdev, !REMOVE_DEVICES); + + vi->h2c_config_db = vpdev->hw_ops->next_db(vpdev); + vi->cookie = vpdev->hw_ops->request_irq(vpdev, + vop_extint_handler, + "virtio_config_intr", + vi, vi->h2c_config_db); + if (IS_ERR(vi->cookie)) { + rc = PTR_ERR(vi->cookie); + goto free; + } + bootparam = vpdev->hw_ops->get_remote_dp(vpdev); + iowrite8(vi->h2c_config_db, &bootparam->h2c_config_db); + } + vop_init_debugfs(vi); + return 0; +free: + kfree(vi); +exit: + return rc; +} + +static void vop_driver_remove(struct vop_device *vpdev) +{ + struct vop_info *vi = dev_get_drvdata(&vpdev->dev); + + if (vpdev->dnode) { + vop_host_uninit(vi); + } else { + struct mic_bootparam __iomem *bootparam = + vpdev->hw_ops->get_remote_dp(vpdev); + if (bootparam) + iowrite8(-1, &bootparam->h2c_config_db); + vpdev->hw_ops->free_irq(vpdev, vi->cookie, vi); + flush_work(&vi->hotplug_work); + vop_scan_devices(vi, vpdev, REMOVE_DEVICES); + } + vop_exit_debugfs(vi); + kfree(vi); +} + +static struct vop_device_id id_table[] = { + { VOP_DEV_TRNSP, VOP_DEV_ANY_ID }, + { 0 }, +}; + +static struct vop_driver vop_driver = { + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = vop_driver_probe, + .remove = vop_driver_remove, +}; + +module_vop_driver(vop_driver); + +MODULE_DEVICE_TABLE(mbus, id_table); +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) Virtio Over PCIe (VOP) driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/mic/vop/vop_main.h b/drivers/misc/mic/vop/vop_main.h new file mode 100644 index 000000000..ba47ec7a6 --- /dev/null +++ b/drivers/misc/mic/vop/vop_main.h @@ -0,0 +1,170 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel Virtio Over PCIe (VOP) driver. + * + */ +#ifndef _VOP_MAIN_H_ +#define _VOP_MAIN_H_ + +#include <linux/vringh.h> +#include <linux/virtio_config.h> +#include <linux/virtio.h> +#include <linux/miscdevice.h> + +#include <linux/mic_common.h> +#include "../common/mic_dev.h" + +#include "../bus/vop_bus.h" + +/* + * Note on endianness. + * 1. Host can be both BE or LE + * 2. Guest/card is LE. Host uses le_to_cpu to access desc/avail + * rings and ioreadXX/iowriteXX to access used ring. + * 3. Device page exposed by host to guest contains LE values. Guest + * accesses these using ioreadXX/iowriteXX etc. This way in general we + * obey the virtio spec according to which guest works with native + * endianness and host is aware of guest endianness and does all + * required endianness conversion. + * 4. Data provided from user space to guest (in ADD_DEVICE and + * CONFIG_CHANGE ioctl's) is not interpreted by the driver and should be + * in guest endianness. + */ + +/* + * vop_info - Allocated per invocation of VOP probe + * + * @vpdev: VOP device + * @hotplug_work: Handle virtio device creation, deletion and configuration + * @cookie: Cookie received upon requesting a virtio configuration interrupt + * @h2c_config_db: The doorbell used by the peer to indicate a config change + * @vdev_list: List of "active" virtio devices injected in the peer node + * @vop_mutex: Synchronize access to the device page as well as serialize + * creation/deletion of virtio devices on the peer node + * @dp: Peer device page information + * @dbg: Debugfs entry + * @dma_ch: The DMA channel used by this transport for data transfers. + * @name: Name for this transport used in misc device creation. + * @miscdev: The misc device registered. + */ +struct vop_info { + struct vop_device *vpdev; + struct work_struct hotplug_work; + struct mic_irq *cookie; + int h2c_config_db; + struct list_head vdev_list; + struct mutex vop_mutex; + void __iomem *dp; + struct dentry *dbg; + struct dma_chan *dma_ch; + char name[16]; + struct miscdevice miscdev; +}; + +/** + * struct vop_vringh - Virtio ring host information. + * + * @vring: The VOP vring used for setting up user space mappings. + * @vrh: The host VRINGH used for accessing the card vrings. + * @riov: The VRINGH read kernel IOV. + * @wiov: The VRINGH write kernel IOV. + * @head: The VRINGH head index address passed to vringh_getdesc_kern(..). + * @vr_mutex: Mutex for synchronizing access to the VRING. + * @buf: Temporary kernel buffer used to copy in/out data + * from/to the card via DMA. + * @buf_da: dma address of buf. + * @vdev: Back pointer to VOP virtio device for vringh_notify(..). + */ +struct vop_vringh { + struct mic_vring vring; + struct vringh vrh; + struct vringh_kiov riov; + struct vringh_kiov wiov; + u16 head; + struct mutex vr_mutex; + void *buf; + dma_addr_t buf_da; + struct vop_vdev *vdev; +}; + +/** + * struct vop_vdev - Host information for a card Virtio device. + * + * @virtio_id - Virtio device id. + * @waitq - Waitqueue to allow ring3 apps to poll. + * @vpdev - pointer to VOP bus device. + * @poll_wake - Used for waking up threads blocked in poll. + * @out_bytes - Debug stats for number of bytes copied from host to card. + * @in_bytes - Debug stats for number of bytes copied from card to host. + * @out_bytes_dma - Debug stats for number of bytes copied from host to card + * using DMA. + * @in_bytes_dma - Debug stats for number of bytes copied from card to host + * using DMA. + * @tx_len_unaligned - Debug stats for number of bytes copied to the card where + * the transfer length did not have the required DMA alignment. + * @tx_dst_unaligned - Debug stats for number of bytes copied where the + * destination address on the card did not have the required DMA alignment. + * @vvr - Store per VRING data structures. + * @virtio_bh_work - Work struct used to schedule virtio bottom half handling. + * @dd - Virtio device descriptor. + * @dc - Virtio device control fields. + * @list - List of Virtio devices. + * @virtio_db - The doorbell used by the card to interrupt the host. + * @virtio_cookie - The cookie returned while requesting interrupts. + * @vi: Transport information. + * @vdev_mutex: Mutex synchronizing virtio device injection, + * removal and data transfers. + * @destroy: Track if a virtio device is being destroyed. + * @deleted: The virtio device has been deleted. + */ +struct vop_vdev { + int virtio_id; + wait_queue_head_t waitq; + struct vop_device *vpdev; + int poll_wake; + unsigned long out_bytes; + unsigned long in_bytes; + unsigned long out_bytes_dma; + unsigned long in_bytes_dma; + unsigned long tx_len_unaligned; + unsigned long tx_dst_unaligned; + unsigned long rx_dst_unaligned; + struct vop_vringh vvr[MIC_MAX_VRINGS]; + struct work_struct virtio_bh_work; + struct mic_device_desc *dd; + struct mic_device_ctrl *dc; + struct list_head list; + int virtio_db; + struct mic_irq *virtio_cookie; + struct vop_info *vi; + struct mutex vdev_mutex; + struct completion destroy; + bool deleted; +}; + +/* Helper API to check if a virtio device is running */ +static inline bool vop_vdevup(struct vop_vdev *vdev) +{ + return !!vdev->dd->status; +} + +void vop_init_debugfs(struct vop_info *vi); +void vop_exit_debugfs(struct vop_info *vi); +int vop_host_init(struct vop_info *vi); +void vop_host_uninit(struct vop_info *vi); +#endif diff --git a/drivers/misc/mic/vop/vop_vringh.c b/drivers/misc/mic/vop/vop_vringh.c new file mode 100644 index 000000000..af57d2caa --- /dev/null +++ b/drivers/misc/mic/vop/vop_vringh.c @@ -0,0 +1,1175 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel Virtio Over PCIe (VOP) driver. + * + */ +#include <linux/sched.h> +#include <linux/poll.h> +#include <linux/dma-mapping.h> + +#include <linux/mic_common.h> +#include "../common/mic_dev.h" + +#include <linux/mic_ioctl.h> +#include "vop_main.h" + +/* Helper API to obtain the VOP PCIe device */ +static inline struct device *vop_dev(struct vop_vdev *vdev) +{ + return vdev->vpdev->dev.parent; +} + +/* Helper API to check if a virtio device is initialized */ +static inline int vop_vdev_inited(struct vop_vdev *vdev) +{ + if (!vdev) + return -EINVAL; + /* Device has not been created yet */ + if (!vdev->dd || !vdev->dd->type) { + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, -EINVAL); + return -EINVAL; + } + /* Device has been removed/deleted */ + if (vdev->dd->type == -1) { + dev_dbg(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, -ENODEV); + return -ENODEV; + } + return 0; +} + +static void _vop_notify(struct vringh *vrh) +{ + struct vop_vringh *vvrh = container_of(vrh, struct vop_vringh, vrh); + struct vop_vdev *vdev = vvrh->vdev; + struct vop_device *vpdev = vdev->vpdev; + s8 db = vdev->dc->h2c_vdev_db; + + if (db != -1) + vpdev->hw_ops->send_intr(vpdev, db); +} + +static void vop_virtio_init_post(struct vop_vdev *vdev) +{ + struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd); + struct vop_device *vpdev = vdev->vpdev; + int i, used_size; + + for (i = 0; i < vdev->dd->num_vq; i++) { + used_size = PAGE_ALIGN(sizeof(u16) * 3 + + sizeof(struct vring_used_elem) * + le16_to_cpu(vqconfig->num)); + if (!le64_to_cpu(vqconfig[i].used_address)) { + dev_warn(vop_dev(vdev), "used_address zero??\n"); + continue; + } + vdev->vvr[i].vrh.vring.used = + (void __force *)vpdev->hw_ops->ioremap( + vpdev, + le64_to_cpu(vqconfig[i].used_address), + used_size); + } + + vdev->dc->used_address_updated = 0; + + dev_info(vop_dev(vdev), "%s: device type %d LINKUP\n", + __func__, vdev->virtio_id); +} + +static inline void vop_virtio_device_reset(struct vop_vdev *vdev) +{ + int i; + + dev_dbg(vop_dev(vdev), "%s: status %d device type %d RESET\n", + __func__, vdev->dd->status, vdev->virtio_id); + + for (i = 0; i < vdev->dd->num_vq; i++) + /* + * Avoid lockdep false positive. The + 1 is for the vop + * mutex which is held in the reset devices code path. + */ + mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1); + + /* 0 status means "reset" */ + vdev->dd->status = 0; + vdev->dc->vdev_reset = 0; + vdev->dc->host_ack = 1; + + for (i = 0; i < vdev->dd->num_vq; i++) { + struct vringh *vrh = &vdev->vvr[i].vrh; + + vdev->vvr[i].vring.info->avail_idx = 0; + vrh->completed = 0; + vrh->last_avail_idx = 0; + vrh->last_used_idx = 0; + } + + for (i = 0; i < vdev->dd->num_vq; i++) + mutex_unlock(&vdev->vvr[i].vr_mutex); +} + +static void vop_virtio_reset_devices(struct vop_info *vi) +{ + struct list_head *pos, *tmp; + struct vop_vdev *vdev; + + list_for_each_safe(pos, tmp, &vi->vdev_list) { + vdev = list_entry(pos, struct vop_vdev, list); + vop_virtio_device_reset(vdev); + vdev->poll_wake = 1; + wake_up(&vdev->waitq); + } +} + +static void vop_bh_handler(struct work_struct *work) +{ + struct vop_vdev *vdev = container_of(work, struct vop_vdev, + virtio_bh_work); + + if (vdev->dc->used_address_updated) + vop_virtio_init_post(vdev); + + if (vdev->dc->vdev_reset) + vop_virtio_device_reset(vdev); + + vdev->poll_wake = 1; + wake_up(&vdev->waitq); +} + +static irqreturn_t _vop_virtio_intr_handler(int irq, void *data) +{ + struct vop_vdev *vdev = data; + struct vop_device *vpdev = vdev->vpdev; + + vpdev->hw_ops->ack_interrupt(vpdev, vdev->virtio_db); + schedule_work(&vdev->virtio_bh_work); + return IRQ_HANDLED; +} + +static int vop_virtio_config_change(struct vop_vdev *vdev, void *argp) +{ + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake); + int ret = 0, retry, i; + struct vop_device *vpdev = vdev->vpdev; + struct vop_info *vi = dev_get_drvdata(&vpdev->dev); + struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev); + s8 db = bootparam->h2c_config_db; + + mutex_lock(&vi->vop_mutex); + for (i = 0; i < vdev->dd->num_vq; i++) + mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1); + + if (db == -1 || vdev->dd->type == -1) { + ret = -EIO; + goto exit; + } + + memcpy(mic_vq_configspace(vdev->dd), argp, vdev->dd->config_len); + vdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED; + vpdev->hw_ops->send_intr(vpdev, db); + + for (retry = 100; retry--;) { + ret = wait_event_timeout(wake, vdev->dc->guest_ack, + msecs_to_jiffies(100)); + if (ret) + break; + } + + dev_dbg(vop_dev(vdev), + "%s %d retry: %d\n", __func__, __LINE__, retry); + vdev->dc->config_change = 0; + vdev->dc->guest_ack = 0; +exit: + for (i = 0; i < vdev->dd->num_vq; i++) + mutex_unlock(&vdev->vvr[i].vr_mutex); + mutex_unlock(&vi->vop_mutex); + return ret; +} + +static int vop_copy_dp_entry(struct vop_vdev *vdev, + struct mic_device_desc *argp, __u8 *type, + struct mic_device_desc **devpage) +{ + struct vop_device *vpdev = vdev->vpdev; + struct mic_device_desc *devp; + struct mic_vqconfig *vqconfig; + int ret = 0, i; + bool slot_found = false; + + vqconfig = mic_vq_config(argp); + for (i = 0; i < argp->num_vq; i++) { + if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) { + ret = -EINVAL; + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, ret); + goto exit; + } + } + + /* Find the first free device page entry */ + for (i = sizeof(struct mic_bootparam); + i < MIC_DP_SIZE - mic_total_desc_size(argp); + i += mic_total_desc_size(devp)) { + devp = vpdev->hw_ops->get_dp(vpdev) + i; + if (devp->type == 0 || devp->type == -1) { + slot_found = true; + break; + } + } + if (!slot_found) { + ret = -EINVAL; + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, ret); + goto exit; + } + /* + * Save off the type before doing the memcpy. Type will be set in the + * end after completing all initialization for the new device. + */ + *type = argp->type; + argp->type = 0; + memcpy(devp, argp, mic_desc_size(argp)); + + *devpage = devp; +exit: + return ret; +} + +static void vop_init_device_ctrl(struct vop_vdev *vdev, + struct mic_device_desc *devpage) +{ + struct mic_device_ctrl *dc; + + dc = (void *)devpage + mic_aligned_desc_size(devpage); + + dc->config_change = 0; + dc->guest_ack = 0; + dc->vdev_reset = 0; + dc->host_ack = 0; + dc->used_address_updated = 0; + dc->c2h_vdev_db = -1; + dc->h2c_vdev_db = -1; + vdev->dc = dc; +} + +static int vop_virtio_add_device(struct vop_vdev *vdev, + struct mic_device_desc *argp) +{ + struct vop_info *vi = vdev->vi; + struct vop_device *vpdev = vi->vpdev; + struct mic_device_desc *dd = NULL; + struct mic_vqconfig *vqconfig; + int vr_size, i, j, ret; + u8 type = 0; + s8 db = -1; + char irqname[16]; + struct mic_bootparam *bootparam; + u16 num; + dma_addr_t vr_addr; + + bootparam = vpdev->hw_ops->get_dp(vpdev); + init_waitqueue_head(&vdev->waitq); + INIT_LIST_HEAD(&vdev->list); + vdev->vpdev = vpdev; + + ret = vop_copy_dp_entry(vdev, argp, &type, &dd); + if (ret) { + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, ret); + kfree(vdev); + return ret; + } + + vop_init_device_ctrl(vdev, dd); + + vdev->dd = dd; + vdev->virtio_id = type; + vqconfig = mic_vq_config(dd); + INIT_WORK(&vdev->virtio_bh_work, vop_bh_handler); + + for (i = 0; i < dd->num_vq; i++) { + struct vop_vringh *vvr = &vdev->vvr[i]; + struct mic_vring *vr = &vdev->vvr[i].vring; + + num = le16_to_cpu(vqconfig[i].num); + mutex_init(&vvr->vr_mutex); + vr_size = PAGE_ALIGN(vring_size(num, MIC_VIRTIO_RING_ALIGN) + + sizeof(struct _mic_vring_info)); + vr->va = (void *) + __get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(vr_size)); + if (!vr->va) { + ret = -ENOMEM; + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, ret); + goto err; + } + vr->len = vr_size; + vr->info = vr->va + vring_size(num, MIC_VIRTIO_RING_ALIGN); + vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id + i); + vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(&vpdev->dev, vr_addr)) { + free_pages((unsigned long)vr->va, get_order(vr_size)); + ret = -ENOMEM; + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, ret); + goto err; + } + vqconfig[i].address = cpu_to_le64(vr_addr); + + vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN); + ret = vringh_init_kern(&vvr->vrh, + *(u32 *)mic_vq_features(vdev->dd), + num, false, vr->vr.desc, vr->vr.avail, + vr->vr.used); + if (ret) { + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, ret); + goto err; + } + vringh_kiov_init(&vvr->riov, NULL, 0); + vringh_kiov_init(&vvr->wiov, NULL, 0); + vvr->head = USHRT_MAX; + vvr->vdev = vdev; + vvr->vrh.notify = _vop_notify; + dev_dbg(&vpdev->dev, + "%s %d index %d va %p info %p vr_size 0x%x\n", + __func__, __LINE__, i, vr->va, vr->info, vr_size); + vvr->buf = (void *)__get_free_pages(GFP_KERNEL, + get_order(VOP_INT_DMA_BUF_SIZE)); + vvr->buf_da = dma_map_single(&vpdev->dev, + vvr->buf, VOP_INT_DMA_BUF_SIZE, + DMA_BIDIRECTIONAL); + } + + snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index, + vdev->virtio_id); + vdev->virtio_db = vpdev->hw_ops->next_db(vpdev); + vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev, + _vop_virtio_intr_handler, irqname, vdev, + vdev->virtio_db); + if (IS_ERR(vdev->virtio_cookie)) { + ret = PTR_ERR(vdev->virtio_cookie); + dev_dbg(&vpdev->dev, "request irq failed\n"); + goto err; + } + + vdev->dc->c2h_vdev_db = vdev->virtio_db; + + /* + * Order the type update with previous stores. This write barrier + * is paired with the corresponding read barrier before the uncached + * system memory read of the type, on the card while scanning the + * device page. + */ + smp_wmb(); + dd->type = type; + argp->type = type; + + if (bootparam) { + db = bootparam->h2c_config_db; + if (db != -1) + vpdev->hw_ops->send_intr(vpdev, db); + } + dev_dbg(&vpdev->dev, "Added virtio id %d db %d\n", dd->type, db); + return 0; +err: + vqconfig = mic_vq_config(dd); + for (j = 0; j < i; j++) { + struct vop_vringh *vvr = &vdev->vvr[j]; + + dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[j].address), + vvr->vring.len, DMA_BIDIRECTIONAL); + free_pages((unsigned long)vvr->vring.va, + get_order(vvr->vring.len)); + } + return ret; +} + +static void vop_dev_remove(struct vop_info *pvi, struct mic_device_ctrl *devp, + struct vop_device *vpdev) +{ + struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev); + s8 db; + int ret, retry; + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake); + + devp->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE; + db = bootparam->h2c_config_db; + if (db != -1) + vpdev->hw_ops->send_intr(vpdev, db); + else + goto done; + for (retry = 15; retry--;) { + ret = wait_event_timeout(wake, devp->guest_ack, + msecs_to_jiffies(1000)); + if (ret) + break; + } +done: + devp->config_change = 0; + devp->guest_ack = 0; +} + +static void vop_virtio_del_device(struct vop_vdev *vdev) +{ + struct vop_info *vi = vdev->vi; + struct vop_device *vpdev = vdev->vpdev; + int i; + struct mic_vqconfig *vqconfig; + struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev); + + if (!bootparam) + goto skip_hot_remove; + vop_dev_remove(vi, vdev->dc, vpdev); +skip_hot_remove: + vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev); + flush_work(&vdev->virtio_bh_work); + vqconfig = mic_vq_config(vdev->dd); + for (i = 0; i < vdev->dd->num_vq; i++) { + struct vop_vringh *vvr = &vdev->vvr[i]; + + dma_unmap_single(&vpdev->dev, + vvr->buf_da, VOP_INT_DMA_BUF_SIZE, + DMA_BIDIRECTIONAL); + free_pages((unsigned long)vvr->buf, + get_order(VOP_INT_DMA_BUF_SIZE)); + vringh_kiov_cleanup(&vvr->riov); + vringh_kiov_cleanup(&vvr->wiov); + dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[i].address), + vvr->vring.len, DMA_BIDIRECTIONAL); + free_pages((unsigned long)vvr->vring.va, + get_order(vvr->vring.len)); + } + /* + * Order the type update with previous stores. This write barrier + * is paired with the corresponding read barrier before the uncached + * system memory read of the type, on the card while scanning the + * device page. + */ + smp_wmb(); + vdev->dd->type = -1; +} + +/* + * vop_sync_dma - Wrapper for synchronous DMAs. + * + * @dev - The address of the pointer to the device instance used + * for DMA registration. + * @dst - destination DMA address. + * @src - source DMA address. + * @len - size of the transfer. + * + * Return DMA_SUCCESS on success + */ +static int vop_sync_dma(struct vop_vdev *vdev, dma_addr_t dst, dma_addr_t src, + size_t len) +{ + int err = 0; + struct dma_device *ddev; + struct dma_async_tx_descriptor *tx; + struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev); + struct dma_chan *vop_ch = vi->dma_ch; + + if (!vop_ch) { + err = -EBUSY; + goto error; + } + ddev = vop_ch->device; + tx = ddev->device_prep_dma_memcpy(vop_ch, dst, src, len, + DMA_PREP_FENCE); + if (!tx) { + err = -ENOMEM; + goto error; + } else { + dma_cookie_t cookie; + + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + err = -ENOMEM; + goto error; + } + dma_async_issue_pending(vop_ch); + err = dma_sync_wait(vop_ch, cookie); + } +error: + if (err) + dev_err(&vi->vpdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + return err; +} + +#define VOP_USE_DMA true + +/* + * Initiates the copies across the PCIe bus from card memory to a user + * space buffer. When transfers are done using DMA, source/destination + * addresses and transfer length must follow the alignment requirements of + * the MIC DMA engine. + */ +static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf, + size_t len, u64 daddr, size_t dlen, + int vr_idx) +{ + struct vop_device *vpdev = vdev->vpdev; + void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len); + struct vop_vringh *vvr = &vdev->vvr[vr_idx]; + struct vop_info *vi = dev_get_drvdata(&vpdev->dev); + size_t dma_alignment = 1 << vi->dma_ch->device->copy_align; + bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1); + size_t dma_offset, partlen; + int err; + + if (!VOP_USE_DMA) { + if (copy_to_user(ubuf, (void __force *)dbuf, len)) { + err = -EFAULT; + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, err); + goto err; + } + vdev->in_bytes += len; + err = 0; + goto err; + } + + dma_offset = daddr - round_down(daddr, dma_alignment); + daddr -= dma_offset; + len += dma_offset; + /* + * X100 uses DMA addresses as seen by the card so adding + * the aperture base is not required for DMA. However x200 + * requires DMA addresses to be an offset into the bar so + * add the aperture base for x200. + */ + if (x200) + daddr += vpdev->aper->pa; + while (len) { + partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE); + err = vop_sync_dma(vdev, vvr->buf_da, daddr, + ALIGN(partlen, dma_alignment)); + if (err) { + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, err); + goto err; + } + if (copy_to_user(ubuf, vvr->buf + dma_offset, + partlen - dma_offset)) { + err = -EFAULT; + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, err); + goto err; + } + daddr += partlen; + ubuf += partlen; + dbuf += partlen; + vdev->in_bytes_dma += partlen; + vdev->in_bytes += partlen; + len -= partlen; + dma_offset = 0; + } + err = 0; +err: + vpdev->hw_ops->iounmap(vpdev, dbuf); + dev_dbg(vop_dev(vdev), + "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n", + __func__, ubuf, dbuf, len, vr_idx); + return err; +} + +/* + * Initiates copies across the PCIe bus from a user space buffer to card + * memory. When transfers are done using DMA, source/destination addresses + * and transfer length must follow the alignment requirements of the MIC + * DMA engine. + */ +static int vop_virtio_copy_from_user(struct vop_vdev *vdev, void __user *ubuf, + size_t len, u64 daddr, size_t dlen, + int vr_idx) +{ + struct vop_device *vpdev = vdev->vpdev; + void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len); + struct vop_vringh *vvr = &vdev->vvr[vr_idx]; + struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev); + size_t dma_alignment = 1 << vi->dma_ch->device->copy_align; + bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1); + size_t partlen; + bool dma = VOP_USE_DMA; + int err = 0; + + if (daddr & (dma_alignment - 1)) { + vdev->tx_dst_unaligned += len; + dma = false; + } else if (ALIGN(len, dma_alignment) > dlen) { + vdev->tx_len_unaligned += len; + dma = false; + } + + if (!dma) + goto memcpy; + + /* + * X100 uses DMA addresses as seen by the card so adding + * the aperture base is not required for DMA. However x200 + * requires DMA addresses to be an offset into the bar so + * add the aperture base for x200. + */ + if (x200) + daddr += vpdev->aper->pa; + while (len) { + partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE); + + if (copy_from_user(vvr->buf, ubuf, partlen)) { + err = -EFAULT; + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, err); + goto err; + } + err = vop_sync_dma(vdev, daddr, vvr->buf_da, + ALIGN(partlen, dma_alignment)); + if (err) { + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, err); + goto err; + } + daddr += partlen; + ubuf += partlen; + dbuf += partlen; + vdev->out_bytes_dma += partlen; + vdev->out_bytes += partlen; + len -= partlen; + } +memcpy: + /* + * We are copying to IO below and should ideally use something + * like copy_from_user_toio(..) if it existed. + */ + if (copy_from_user((void __force *)dbuf, ubuf, len)) { + err = -EFAULT; + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, err); + goto err; + } + vdev->out_bytes += len; + err = 0; +err: + vpdev->hw_ops->iounmap(vpdev, dbuf); + dev_dbg(vop_dev(vdev), + "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n", + __func__, ubuf, dbuf, len, vr_idx); + return err; +} + +#define MIC_VRINGH_READ true + +/* Determine the total number of bytes consumed in a VRINGH KIOV */ +static inline u32 vop_vringh_iov_consumed(struct vringh_kiov *iov) +{ + int i; + u32 total = iov->consumed; + + for (i = 0; i < iov->i; i++) + total += iov->iov[i].iov_len; + return total; +} + +/* + * Traverse the VRINGH KIOV and issue the APIs to trigger the copies. + * This API is heavily based on the vringh_iov_xfer(..) implementation + * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..) + * and vringh_iov_push_kern(..) directly is because there is no + * way to override the VRINGH xfer(..) routines as of v3.10. + */ +static int vop_vringh_copy(struct vop_vdev *vdev, struct vringh_kiov *iov, + void __user *ubuf, size_t len, bool read, int vr_idx, + size_t *out_len) +{ + int ret = 0; + size_t partlen, tot_len = 0; + + while (len && iov->i < iov->used) { + struct kvec *kiov = &iov->iov[iov->i]; + + partlen = min(kiov->iov_len, len); + if (read) + ret = vop_virtio_copy_to_user(vdev, ubuf, partlen, + (u64)kiov->iov_base, + kiov->iov_len, + vr_idx); + else + ret = vop_virtio_copy_from_user(vdev, ubuf, partlen, + (u64)kiov->iov_base, + kiov->iov_len, + vr_idx); + if (ret) { + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, ret); + break; + } + len -= partlen; + ubuf += partlen; + tot_len += partlen; + iov->consumed += partlen; + kiov->iov_len -= partlen; + kiov->iov_base += partlen; + if (!kiov->iov_len) { + /* Fix up old iov element then increment. */ + kiov->iov_len = iov->consumed; + kiov->iov_base -= iov->consumed; + + iov->consumed = 0; + iov->i++; + } + } + *out_len = tot_len; + return ret; +} + +/* + * Use the standard VRINGH infrastructure in the kernel to fetch new + * descriptors, initiate the copies and update the used ring. + */ +static int _vop_virtio_copy(struct vop_vdev *vdev, struct mic_copy_desc *copy) +{ + int ret = 0; + u32 iovcnt = copy->iovcnt; + struct iovec iov; + struct iovec __user *u_iov = copy->iov; + void __user *ubuf = NULL; + struct vop_vringh *vvr = &vdev->vvr[copy->vr_idx]; + struct vringh_kiov *riov = &vvr->riov; + struct vringh_kiov *wiov = &vvr->wiov; + struct vringh *vrh = &vvr->vrh; + u16 *head = &vvr->head; + struct mic_vring *vr = &vvr->vring; + size_t len = 0, out_len; + + copy->out_len = 0; + /* Fetch a new IOVEC if all previous elements have been processed */ + if (riov->i == riov->used && wiov->i == wiov->used) { + ret = vringh_getdesc_kern(vrh, riov, wiov, + head, GFP_KERNEL); + /* Check if there are available descriptors */ + if (ret <= 0) + return ret; + } + while (iovcnt) { + if (!len) { + /* Copy over a new iovec from user space. */ + ret = copy_from_user(&iov, u_iov, sizeof(*u_iov)); + if (ret) { + ret = -EINVAL; + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, ret); + break; + } + len = iov.iov_len; + ubuf = iov.iov_base; + } + /* Issue all the read descriptors first */ + ret = vop_vringh_copy(vdev, riov, ubuf, len, + MIC_VRINGH_READ, copy->vr_idx, &out_len); + if (ret) { + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, ret); + break; + } + len -= out_len; + ubuf += out_len; + copy->out_len += out_len; + /* Issue the write descriptors next */ + ret = vop_vringh_copy(vdev, wiov, ubuf, len, + !MIC_VRINGH_READ, copy->vr_idx, &out_len); + if (ret) { + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, ret); + break; + } + len -= out_len; + ubuf += out_len; + copy->out_len += out_len; + if (!len) { + /* One user space iovec is now completed */ + iovcnt--; + u_iov++; + } + /* Exit loop if all elements in KIOVs have been processed. */ + if (riov->i == riov->used && wiov->i == wiov->used) + break; + } + /* + * Update the used ring if a descriptor was available and some data was + * copied in/out and the user asked for a used ring update. + */ + if (*head != USHRT_MAX && copy->out_len && copy->update_used) { + u32 total = 0; + + /* Determine the total data consumed */ + total += vop_vringh_iov_consumed(riov); + total += vop_vringh_iov_consumed(wiov); + vringh_complete_kern(vrh, *head, total); + *head = USHRT_MAX; + if (vringh_need_notify_kern(vrh) > 0) + vringh_notify(vrh); + vringh_kiov_cleanup(riov); + vringh_kiov_cleanup(wiov); + /* Update avail idx for user space */ + vr->info->avail_idx = vrh->last_avail_idx; + } + return ret; +} + +static inline int vop_verify_copy_args(struct vop_vdev *vdev, + struct mic_copy_desc *copy) +{ + if (!vdev || copy->vr_idx >= vdev->dd->num_vq) + return -EINVAL; + return 0; +} + +/* Copy a specified number of virtio descriptors in a chain */ +static int vop_virtio_copy_desc(struct vop_vdev *vdev, + struct mic_copy_desc *copy) +{ + int err; + struct vop_vringh *vvr; + + err = vop_verify_copy_args(vdev, copy); + if (err) + return err; + + vvr = &vdev->vvr[copy->vr_idx]; + mutex_lock(&vvr->vr_mutex); + if (!vop_vdevup(vdev)) { + err = -ENODEV; + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, err); + goto err; + } + err = _vop_virtio_copy(vdev, copy); + if (err) { + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, err); + } +err: + mutex_unlock(&vvr->vr_mutex); + return err; +} + +static int vop_open(struct inode *inode, struct file *f) +{ + struct vop_vdev *vdev; + struct vop_info *vi = container_of(f->private_data, + struct vop_info, miscdev); + + vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); + if (!vdev) + return -ENOMEM; + vdev->vi = vi; + mutex_init(&vdev->vdev_mutex); + f->private_data = vdev; + init_completion(&vdev->destroy); + complete(&vdev->destroy); + return 0; +} + +static int vop_release(struct inode *inode, struct file *f) +{ + struct vop_vdev *vdev = f->private_data, *vdev_tmp; + struct vop_info *vi = vdev->vi; + struct list_head *pos, *tmp; + bool found = false; + + mutex_lock(&vdev->vdev_mutex); + if (vdev->deleted) + goto unlock; + mutex_lock(&vi->vop_mutex); + list_for_each_safe(pos, tmp, &vi->vdev_list) { + vdev_tmp = list_entry(pos, struct vop_vdev, list); + if (vdev == vdev_tmp) { + vop_virtio_del_device(vdev); + list_del(pos); + found = true; + break; + } + } + mutex_unlock(&vi->vop_mutex); +unlock: + mutex_unlock(&vdev->vdev_mutex); + if (!found) + wait_for_completion(&vdev->destroy); + f->private_data = NULL; + kfree(vdev); + return 0; +} + +static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg) +{ + struct vop_vdev *vdev = f->private_data; + struct vop_info *vi = vdev->vi; + void __user *argp = (void __user *)arg; + int ret; + + switch (cmd) { + case MIC_VIRTIO_ADD_DEVICE: + { + struct mic_device_desc dd, *dd_config; + + if (copy_from_user(&dd, argp, sizeof(dd))) + return -EFAULT; + + if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE || + dd.num_vq > MIC_MAX_VRINGS) + return -EINVAL; + + dd_config = kzalloc(mic_desc_size(&dd), GFP_KERNEL); + if (!dd_config) + return -ENOMEM; + if (copy_from_user(dd_config, argp, mic_desc_size(&dd))) { + ret = -EFAULT; + goto free_ret; + } + /* Ensure desc has not changed between the two reads */ + if (memcmp(&dd, dd_config, sizeof(dd))) { + ret = -EINVAL; + goto free_ret; + } + /* Ensure desc has not changed between the two reads */ + if (memcmp(&dd, dd_config, sizeof(dd))) { + ret = -EINVAL; + goto free_ret; + } + mutex_lock(&vdev->vdev_mutex); + mutex_lock(&vi->vop_mutex); + ret = vop_virtio_add_device(vdev, dd_config); + if (ret) + goto unlock_ret; + list_add_tail(&vdev->list, &vi->vdev_list); +unlock_ret: + mutex_unlock(&vi->vop_mutex); + mutex_unlock(&vdev->vdev_mutex); +free_ret: + kfree(dd_config); + return ret; + } + case MIC_VIRTIO_COPY_DESC: + { + struct mic_copy_desc copy; + + mutex_lock(&vdev->vdev_mutex); + ret = vop_vdev_inited(vdev); + if (ret) + goto _unlock_ret; + + if (copy_from_user(©, argp, sizeof(copy))) { + ret = -EFAULT; + goto _unlock_ret; + } + + ret = vop_virtio_copy_desc(vdev, ©); + if (ret < 0) + goto _unlock_ret; + if (copy_to_user( + &((struct mic_copy_desc __user *)argp)->out_len, + ©.out_len, sizeof(copy.out_len))) + ret = -EFAULT; +_unlock_ret: + mutex_unlock(&vdev->vdev_mutex); + return ret; + } + case MIC_VIRTIO_CONFIG_CHANGE: + { + void *buf; + + mutex_lock(&vdev->vdev_mutex); + ret = vop_vdev_inited(vdev); + if (ret) + goto __unlock_ret; + buf = kzalloc(vdev->dd->config_len, GFP_KERNEL); + if (!buf) { + ret = -ENOMEM; + goto __unlock_ret; + } + if (copy_from_user(buf, argp, vdev->dd->config_len)) { + ret = -EFAULT; + goto done; + } + ret = vop_virtio_config_change(vdev, buf); +done: + kfree(buf); +__unlock_ret: + mutex_unlock(&vdev->vdev_mutex); + return ret; + } + default: + return -ENOIOCTLCMD; + }; + return 0; +} + +/* + * We return POLLIN | POLLOUT from poll when new buffers are enqueued, and + * not when previously enqueued buffers may be available. This means that + * in the card->host (TX) path, when userspace is unblocked by poll it + * must drain all available descriptors or it can stall. + */ +static unsigned int vop_poll(struct file *f, poll_table *wait) +{ + struct vop_vdev *vdev = f->private_data; + int mask = 0; + + mutex_lock(&vdev->vdev_mutex); + if (vop_vdev_inited(vdev)) { + mask = POLLERR; + goto done; + } + poll_wait(f, &vdev->waitq, wait); + if (vop_vdev_inited(vdev)) { + mask = POLLERR; + } else if (vdev->poll_wake) { + vdev->poll_wake = 0; + mask = POLLIN | POLLOUT; + } +done: + mutex_unlock(&vdev->vdev_mutex); + return mask; +} + +static inline int +vop_query_offset(struct vop_vdev *vdev, unsigned long offset, + unsigned long *size, unsigned long *pa) +{ + struct vop_device *vpdev = vdev->vpdev; + unsigned long start = MIC_DP_SIZE; + int i; + + /* + * MMAP interface is as follows: + * offset region + * 0x0 virtio device_page + * 0x1000 first vring + * 0x1000 + size of 1st vring second vring + * .... + */ + if (!offset) { + *pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev)); + *size = MIC_DP_SIZE; + return 0; + } + + for (i = 0; i < vdev->dd->num_vq; i++) { + struct vop_vringh *vvr = &vdev->vvr[i]; + + if (offset == start) { + *pa = virt_to_phys(vvr->vring.va); + *size = vvr->vring.len; + return 0; + } + start += vvr->vring.len; + } + return -1; +} + +/* + * Maps the device page and virtio rings to user space for readonly access. + */ +static int vop_mmap(struct file *f, struct vm_area_struct *vma) +{ + struct vop_vdev *vdev = f->private_data; + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size; + int i, err; + + err = vop_vdev_inited(vdev); + if (err) + goto ret; + if (vma->vm_flags & VM_WRITE) { + err = -EACCES; + goto ret; + } + while (size_rem) { + i = vop_query_offset(vdev, offset, &size, &pa); + if (i < 0) { + err = -EINVAL; + goto ret; + } + err = remap_pfn_range(vma, vma->vm_start + offset, + pa >> PAGE_SHIFT, size, + vma->vm_page_prot); + if (err) + goto ret; + size_rem -= size; + offset += size; + } +ret: + return err; +} + +static const struct file_operations vop_fops = { + .open = vop_open, + .release = vop_release, + .unlocked_ioctl = vop_ioctl, + .poll = vop_poll, + .mmap = vop_mmap, + .owner = THIS_MODULE, +}; + +int vop_host_init(struct vop_info *vi) +{ + int rc; + struct miscdevice *mdev; + struct vop_device *vpdev = vi->vpdev; + + INIT_LIST_HEAD(&vi->vdev_list); + vi->dma_ch = vpdev->dma_ch; + mdev = &vi->miscdev; + mdev->minor = MISC_DYNAMIC_MINOR; + snprintf(vi->name, sizeof(vi->name), "vop_virtio%d", vpdev->index); + mdev->name = vi->name; + mdev->fops = &vop_fops; + mdev->parent = &vpdev->dev; + + rc = misc_register(mdev); + if (rc) + dev_err(&vpdev->dev, "%s failed rc %d\n", __func__, rc); + return rc; +} + +void vop_host_uninit(struct vop_info *vi) +{ + struct list_head *pos, *tmp; + struct vop_vdev *vdev; + + mutex_lock(&vi->vop_mutex); + vop_virtio_reset_devices(vi); + list_for_each_safe(pos, tmp, &vi->vdev_list) { + vdev = list_entry(pos, struct vop_vdev, list); + list_del(pos); + reinit_completion(&vdev->destroy); + mutex_unlock(&vi->vop_mutex); + mutex_lock(&vdev->vdev_mutex); + vop_virtio_del_device(vdev); + vdev->deleted = true; + mutex_unlock(&vdev->vdev_mutex); + complete(&vdev->destroy); + mutex_lock(&vi->vop_mutex); + } + mutex_unlock(&vi->vop_mutex); + misc_deregister(&vi->miscdev); +} |