diff options
Diffstat (limited to 'drivers/iommu')
-rw-r--r-- | drivers/iommu/Kconfig | 25 | ||||
-rw-r--r-- | drivers/iommu/Makefile | 3 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu.c | 175 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_init.c | 122 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_types.h | 15 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_v2.c | 20 | ||||
-rw-r--r-- | drivers/iommu/arm-smmu-v3.c | 155 | ||||
-rw-r--r-- | drivers/iommu/arm-smmu.c | 132 | ||||
-rw-r--r-- | drivers/iommu/dma-iommu.c | 529 | ||||
-rw-r--r-- | drivers/iommu/dmar.c | 42 | ||||
-rw-r--r-- | drivers/iommu/fsl_pamu.c | 2 | ||||
-rw-r--r-- | drivers/iommu/fsl_pamu_domain.c | 41 | ||||
-rw-r--r-- | drivers/iommu/intel-iommu.c | 392 | ||||
-rw-r--r-- | drivers/iommu/intel-svm.c | 622 | ||||
-rw-r--r-- | drivers/iommu/intel_irq_remapping.c | 64 | ||||
-rw-r--r-- | drivers/iommu/iommu.c | 48 | ||||
-rw-r--r-- | drivers/iommu/ipmmu-vmsa.c | 2 | ||||
-rw-r--r-- | drivers/iommu/irq_remapping.c | 12 | ||||
-rw-r--r-- | drivers/iommu/omap-iommu.c | 58 | ||||
-rw-r--r-- | drivers/iommu/omap-iommu.h | 9 | ||||
-rw-r--r-- | drivers/iommu/s390-iommu.c | 356 |
21 files changed, 2329 insertions, 495 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index cbe6a890a..b9094e9da 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -48,6 +48,13 @@ config OF_IOMMU def_bool y depends on OF && IOMMU_API +# IOMMU-agnostic DMA-mapping layer +config IOMMU_DMA + bool + depends on NEED_SG_DMA_LENGTH + select IOMMU_API + select IOMMU_IOVA + config FSL_PAMU bool "Freescale IOMMU support" depends on PPC32 @@ -134,6 +141,16 @@ config INTEL_IOMMU and include PCI device scope covered by these DMA remapping devices. +config INTEL_IOMMU_SVM + bool "Support for Shared Virtual Memory with Intel IOMMU" + depends on INTEL_IOMMU && X86 + select PCI_PASID + select MMU_NOTIFIER + help + Shared Virtual Memory (SVM) provides a facility for devices + to access DMA resources through process address space by + means of a Process Address Space ID (PASID). + config INTEL_IOMMU_DEFAULT_ON def_bool y prompt "Enable Intel DMA Remapping Devices by default" @@ -361,6 +378,7 @@ config ARM_SMMU_V3 depends on ARM64 && PCI select IOMMU_API select IOMMU_IO_PGTABLE_LPAE + select GENERIC_MSI_IRQ_DOMAIN help Support for implementations of the ARM System MMU architecture version 3 providing translation support to a PCIe root complex. @@ -368,4 +386,11 @@ config ARM_SMMU_V3 Say Y here if your system includes an IOMMU device implementing the ARM SMMUv3 architecture. +config S390_IOMMU + def_bool y if S390 && PCI + depends on S390 && PCI + select IOMMU_API + help + Support for the IOMMU API for s390 PCI devices. + endif # IOMMU_SUPPORT diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index c6dcc513d..68faca022 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -1,6 +1,7 @@ obj-$(CONFIG_IOMMU_API) += iommu.o obj-$(CONFIG_IOMMU_API) += iommu-traces.o obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o +obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o obj-$(CONFIG_IOMMU_IOVA) += iova.o @@ -12,6 +13,7 @@ obj-$(CONFIG_ARM_SMMU) += arm-smmu.o obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o obj-$(CONFIG_DMAR_TABLE) += dmar.o obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o +obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o @@ -23,3 +25,4 @@ obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o +obj-$(CONFIG_S390_IOMMU) += s390-iommu.o diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 532e2a211..8b2be1e77 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -89,8 +89,6 @@ static struct dma_map_ops amd_iommu_dma_ops; struct iommu_dev_data { struct list_head list; /* For domain->dev_list */ struct list_head dev_data_list; /* For global dev_data_list */ - struct list_head alias_list; /* Link alias-groups together */ - struct iommu_dev_data *alias_data;/* The alias dev_data */ struct protection_domain *domain; /* Domain the device is bound to */ u16 devid; /* PCI Device ID */ bool iommu_v2; /* Device can make use of IOMMUv2 */ @@ -136,8 +134,6 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid) if (!dev_data) return NULL; - INIT_LIST_HEAD(&dev_data->alias_list); - dev_data->devid = devid; spin_lock_irqsave(&dev_data_list_lock, flags); @@ -147,17 +143,6 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid) return dev_data; } -static void free_dev_data(struct iommu_dev_data *dev_data) -{ - unsigned long flags; - - spin_lock_irqsave(&dev_data_list_lock, flags); - list_del(&dev_data->dev_data_list); - spin_unlock_irqrestore(&dev_data_list_lock, flags); - - kfree(dev_data); -} - static struct iommu_dev_data *search_dev_data(u16 devid) { struct iommu_dev_data *dev_data; @@ -311,73 +296,10 @@ out: iommu_group_put(group); } -static int __last_alias(struct pci_dev *pdev, u16 alias, void *data) -{ - *(u16 *)data = alias; - return 0; -} - -static u16 get_alias(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - u16 devid, ivrs_alias, pci_alias; - - devid = get_device_id(dev); - ivrs_alias = amd_iommu_alias_table[devid]; - pci_for_each_dma_alias(pdev, __last_alias, &pci_alias); - - if (ivrs_alias == pci_alias) - return ivrs_alias; - - /* - * DMA alias showdown - * - * The IVRS is fairly reliable in telling us about aliases, but it - * can't know about every screwy device. If we don't have an IVRS - * reported alias, use the PCI reported alias. In that case we may - * still need to initialize the rlookup and dev_table entries if the - * alias is to a non-existent device. - */ - if (ivrs_alias == devid) { - if (!amd_iommu_rlookup_table[pci_alias]) { - amd_iommu_rlookup_table[pci_alias] = - amd_iommu_rlookup_table[devid]; - memcpy(amd_iommu_dev_table[pci_alias].data, - amd_iommu_dev_table[devid].data, - sizeof(amd_iommu_dev_table[pci_alias].data)); - } - - return pci_alias; - } - - pr_info("AMD-Vi: Using IVRS reported alias %02x:%02x.%d " - "for device %s[%04x:%04x], kernel reported alias " - "%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias), - PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device, - PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias), - PCI_FUNC(pci_alias)); - - /* - * If we don't have a PCI DMA alias and the IVRS alias is on the same - * bus, then the IVRS table may know about a quirk that we don't. - */ - if (pci_alias == devid && - PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) { - pdev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN; - pdev->dma_alias_devfn = ivrs_alias & 0xff; - pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n", - PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias), - dev_name(dev)); - } - - return ivrs_alias; -} - static int iommu_init_device(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct iommu_dev_data *dev_data; - u16 alias; if (dev->archdata.iommu) return 0; @@ -386,24 +308,6 @@ static int iommu_init_device(struct device *dev) if (!dev_data) return -ENOMEM; - alias = get_alias(dev); - - if (alias != dev_data->devid) { - struct iommu_dev_data *alias_data; - - alias_data = find_dev_data(alias); - if (alias_data == NULL) { - pr_err("AMD-Vi: Warning: Unhandled device %s\n", - dev_name(dev)); - free_dev_data(dev_data); - return -ENOTSUPP; - } - dev_data->alias_data = alias_data; - - /* Add device to the alias_list */ - list_add(&dev_data->alias_list, &alias_data->alias_list); - } - if (pci_iommuv2_capable(pdev)) { struct amd_iommu *iommu; @@ -445,9 +349,6 @@ static void iommu_uninit_device(struct device *dev) iommu_group_remove_device(dev); - /* Unlink from alias, it may change if another device is re-plugged */ - dev_data->alias_data = NULL; - /* Remove dma-ops */ dev->archdata.dma_ops = NULL; @@ -633,7 +534,7 @@ static void iommu_poll_events(struct amd_iommu *iommu) while (head != tail) { iommu_print_event(iommu, iommu->evt_buf + head); - head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; + head = (head + EVENT_ENTRY_SIZE) % EVT_BUFFER_SIZE; } writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); @@ -783,7 +684,7 @@ static void copy_cmd_to_buffer(struct amd_iommu *iommu, u8 *target; target = iommu->cmd_buf + tail; - tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; + tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; /* Copy command to buffer */ memcpy(target, cmd, sizeof(*cmd)); @@ -950,15 +851,13 @@ static int iommu_queue_command_sync(struct amd_iommu *iommu, u32 left, tail, head, next_tail; unsigned long flags; - WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED); - again: spin_lock_irqsave(&iommu->lock, flags); head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); - next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; - left = (head - next_tail) % iommu->cmd_buf_size; + next_tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; + left = (head - next_tail) % CMD_BUFFER_SIZE; if (left <= 2) { struct iommu_cmd sync_cmd; @@ -1114,11 +1013,15 @@ static int device_flush_iotlb(struct iommu_dev_data *dev_data, static int device_flush_dte(struct iommu_dev_data *dev_data) { struct amd_iommu *iommu; + u16 alias; int ret; iommu = amd_iommu_rlookup_table[dev_data->devid]; + alias = amd_iommu_alias_table[dev_data->devid]; ret = iommu_flush_dte(iommu, dev_data->devid); + if (!ret && alias != dev_data->devid) + ret = iommu_flush_dte(iommu, alias); if (ret) return ret; @@ -1984,27 +1887,33 @@ static void do_attach(struct iommu_dev_data *dev_data, struct protection_domain *domain) { struct amd_iommu *iommu; + u16 alias; bool ats; iommu = amd_iommu_rlookup_table[dev_data->devid]; + alias = amd_iommu_alias_table[dev_data->devid]; ats = dev_data->ats.enabled; /* Update data structures */ dev_data->domain = domain; list_add(&dev_data->list, &domain->dev_list); - set_dte_entry(dev_data->devid, domain, ats); /* Do reference counting */ domain->dev_iommu[iommu->index] += 1; domain->dev_cnt += 1; - /* Flush the DTE entry */ + /* Update device table */ + set_dte_entry(dev_data->devid, domain, ats); + if (alias != dev_data->devid) + set_dte_entry(dev_data->devid, domain, ats); + device_flush_dte(dev_data); } static void do_detach(struct iommu_dev_data *dev_data) { struct amd_iommu *iommu; + u16 alias; /* * First check if the device is still attached. It might already @@ -2016,6 +1925,7 @@ static void do_detach(struct iommu_dev_data *dev_data) return; iommu = amd_iommu_rlookup_table[dev_data->devid]; + alias = amd_iommu_alias_table[dev_data->devid]; /* decrease reference counters */ dev_data->domain->dev_iommu[iommu->index] -= 1; @@ -2025,6 +1935,8 @@ static void do_detach(struct iommu_dev_data *dev_data) dev_data->domain = NULL; list_del(&dev_data->list); clear_dte_entry(dev_data->devid); + if (alias != dev_data->devid) + clear_dte_entry(alias); /* Flush the DTE entry */ device_flush_dte(dev_data); @@ -2037,29 +1949,23 @@ static void do_detach(struct iommu_dev_data *dev_data) static int __attach_device(struct iommu_dev_data *dev_data, struct protection_domain *domain) { - struct iommu_dev_data *head, *entry; int ret; + /* + * Must be called with IRQs disabled. Warn here to detect early + * when its not. + */ + WARN_ON(!irqs_disabled()); + /* lock domain */ spin_lock(&domain->lock); - head = dev_data; - - if (head->alias_data != NULL) - head = head->alias_data; - - /* Now we have the root of the alias group, if any */ - ret = -EBUSY; - if (head->domain != NULL) + if (dev_data->domain != NULL) goto out_unlock; /* Attach alias group root */ - do_attach(head, domain); - - /* Attach other devices in the alias group */ - list_for_each_entry(entry, &head->alias_list, alias_list) - do_attach(entry, domain); + do_attach(dev_data, domain); ret = 0; @@ -2209,26 +2115,24 @@ static int attach_device(struct device *dev, */ static void __detach_device(struct iommu_dev_data *dev_data) { - struct iommu_dev_data *head, *entry; struct protection_domain *domain; - unsigned long flags; - BUG_ON(!dev_data->domain); - - domain = dev_data->domain; + /* + * Must be called with IRQs disabled. Warn here to detect early + * when its not. + */ + WARN_ON(!irqs_disabled()); - spin_lock_irqsave(&domain->lock, flags); + if (WARN_ON(!dev_data->domain)) + return; - head = dev_data; - if (head->alias_data != NULL) - head = head->alias_data; + domain = dev_data->domain; - list_for_each_entry(entry, &head->alias_list, alias_list) - do_detach(entry); + spin_lock(&domain->lock); - do_detach(head); + do_detach(dev_data); - spin_unlock_irqrestore(&domain->lock, flags); + spin_unlock(&domain->lock); } /* @@ -2764,7 +2668,7 @@ static void *alloc_coherent(struct device *dev, size_t size, page = alloc_pages(flag | __GFP_NOWARN, get_order(size)); if (!page) { - if (!(flag & __GFP_WAIT)) + if (!gfpflags_allow_blocking(flag)) return NULL; page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, @@ -3198,6 +3102,7 @@ static const struct iommu_ops amd_iommu_ops = { .iova_to_phys = amd_iommu_iova_to_phys, .add_device = amd_iommu_add_device, .remove_device = amd_iommu_remove_device, + .device_group = pci_device_group, .get_dm_regions = amd_iommu_get_dm_regions, .put_dm_regions = amd_iommu_put_dm_regions, .pgsize_bitmap = AMD_IOMMU_PGSIZES, diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 1b066e7d1..013bdfff2 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -138,7 +138,7 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have to handle */ LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings we find in ACPI */ -u32 amd_iommu_unmap_flush; /* if true, flush on every unmap */ +bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */ @@ -408,20 +408,6 @@ static inline int ivhd_entry_length(u8 *ivhd) } /* - * This function reads the last device id the IOMMU has to handle from the PCI - * capability header for this IOMMU - */ -static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) -{ - u32 cap; - - cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); - update_last_devid(PCI_DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); - - return 0; -} - -/* * After reading the highest device id from the IOMMU PCI capability header * this function looks if there is a higher device id defined in the ACPI table */ @@ -433,14 +419,13 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) p += sizeof(*h); end += h->length; - find_last_devid_on_pci(PCI_BUS_NUM(h->devid), - PCI_SLOT(h->devid), - PCI_FUNC(h->devid), - h->cap_ptr); - while (p < end) { dev = (struct ivhd_entry *)p; switch (dev->type) { + case IVHD_DEV_ALL: + /* Use maximum BDF value for DEV_ALL */ + update_last_devid(0xffff); + break; case IVHD_DEV_SELECT: case IVHD_DEV_RANGE_END: case IVHD_DEV_ALIAS: @@ -513,17 +498,12 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table) * write commands to that buffer later and the IOMMU will execute them * asynchronously */ -static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) +static int __init alloc_command_buffer(struct amd_iommu *iommu) { - u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(CMD_BUFFER_SIZE)); - - if (cmd_buf == NULL) - return NULL; - - iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED; + iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(CMD_BUFFER_SIZE)); - return cmd_buf; + return iommu->cmd_buf ? 0 : -ENOMEM; } /* @@ -557,27 +537,20 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu) &entry, sizeof(entry)); amd_iommu_reset_cmd_buffer(iommu); - iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED); } static void __init free_command_buffer(struct amd_iommu *iommu) { - free_pages((unsigned long)iommu->cmd_buf, - get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED))); + free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); } /* allocates the memory where the IOMMU will log its events to */ -static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) +static int __init alloc_event_buffer(struct amd_iommu *iommu) { - iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(EVT_BUFFER_SIZE)); + iommu->evt_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(EVT_BUFFER_SIZE)); - if (iommu->evt_buf == NULL) - return NULL; - - iommu->evt_buf_size = EVT_BUFFER_SIZE; - - return iommu->evt_buf; + return iommu->evt_buf ? 0 : -ENOMEM; } static void iommu_enable_event_buffer(struct amd_iommu *iommu) @@ -604,15 +577,12 @@ static void __init free_event_buffer(struct amd_iommu *iommu) } /* allocates the memory where the IOMMU will log its events to */ -static u8 * __init alloc_ppr_log(struct amd_iommu *iommu) +static int __init alloc_ppr_log(struct amd_iommu *iommu) { - iommu->ppr_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(PPR_LOG_SIZE)); - - if (iommu->ppr_log == NULL) - return NULL; + iommu->ppr_log = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(PPR_LOG_SIZE)); - return iommu->ppr_log; + return iommu->ppr_log ? 0 : -ENOMEM; } static void iommu_enable_ppr_log(struct amd_iommu *iommu) @@ -835,20 +805,10 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, switch (e->type) { case IVHD_DEV_ALL: - DUMP_printk(" DEV_ALL\t\t\t first devid: %02x:%02x.%x" - " last device %02x:%02x.%x flags: %02x\n", - PCI_BUS_NUM(iommu->first_device), - PCI_SLOT(iommu->first_device), - PCI_FUNC(iommu->first_device), - PCI_BUS_NUM(iommu->last_device), - PCI_SLOT(iommu->last_device), - PCI_FUNC(iommu->last_device), - e->flags); + DUMP_printk(" DEV_ALL\t\t\tflags: %02x\n", e->flags); - for (dev_i = iommu->first_device; - dev_i <= iommu->last_device; ++dev_i) - set_dev_entry_from_acpi(iommu, dev_i, - e->flags, 0); + for (dev_i = 0; dev_i <= amd_iommu_last_bdf; ++dev_i) + set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0); break; case IVHD_DEV_SELECT: @@ -1004,17 +964,6 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, return 0; } -/* Initializes the device->iommu mapping for the driver */ -static int __init init_iommu_devices(struct amd_iommu *iommu) -{ - u32 i; - - for (i = iommu->first_device; i <= iommu->last_device; ++i) - set_iommu_for_device(iommu, i); - - return 0; -} - static void __init free_iommu_one(struct amd_iommu *iommu) { free_command_buffer(iommu); @@ -1111,12 +1060,10 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) if (!iommu->mmio_base) return -ENOMEM; - iommu->cmd_buf = alloc_command_buffer(iommu); - if (!iommu->cmd_buf) + if (alloc_command_buffer(iommu)) return -ENOMEM; - iommu->evt_buf = alloc_event_buffer(iommu); - if (!iommu->evt_buf) + if (alloc_event_buffer(iommu)) return -ENOMEM; iommu->int_enabled = false; @@ -1135,8 +1082,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) */ amd_iommu_rlookup_table[iommu->devid] = NULL; - init_iommu_devices(iommu); - return 0; } @@ -1266,11 +1211,6 @@ static int iommu_init_pci(struct amd_iommu *iommu) pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET, &misc); - iommu->first_device = PCI_DEVID(MMIO_GET_BUS(range), - MMIO_GET_FD(range)); - iommu->last_device = PCI_DEVID(MMIO_GET_BUS(range), - MMIO_GET_LD(range)); - if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) amd_iommu_iotlb_sup = false; @@ -1308,11 +1248,8 @@ static int iommu_init_pci(struct amd_iommu *iommu) amd_iommu_v2_present = true; } - if (iommu_feature(iommu, FEATURE_PPR)) { - iommu->ppr_log = alloc_ppr_log(iommu); - if (!iommu->ppr_log) - return -ENOMEM; - } + if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu)) + return -ENOMEM; if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) amd_iommu_np_cache = true; @@ -1758,11 +1695,8 @@ static void __init free_on_init_error(void) free_pages((unsigned long)irq_lookup_table, get_order(rlookup_table_size)); - if (amd_iommu_irq_cache) { - kmem_cache_destroy(amd_iommu_irq_cache); - amd_iommu_irq_cache = NULL; - - } + kmem_cache_destroy(amd_iommu_irq_cache); + amd_iommu_irq_cache = NULL; free_pages((unsigned long)amd_iommu_rlookup_table, get_order(rlookup_table_size)); @@ -2201,7 +2135,7 @@ int __init amd_iommu_detect(void) iommu_detected = 1; x86_init.iommu.iommu_init = amd_iommu_init; - return 0; + return 1; } /**************************************************************************** diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index c9b64722f..b08cf57bf 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -295,9 +295,9 @@ #define IOMMU_PTE_IR (1ULL << 61) #define IOMMU_PTE_IW (1ULL << 62) +#define DTE_FLAG_IOTLB (1ULL << 32) +#define DTE_FLAG_GV (1ULL << 55) #define DTE_FLAG_MASK (0x3ffULL << 32) -#define DTE_FLAG_IOTLB (0x01UL << 32) -#define DTE_FLAG_GV (0x01ULL << 55) #define DTE_GLX_SHIFT (56) #define DTE_GLX_MASK (3) @@ -517,11 +517,6 @@ struct amd_iommu { /* pci domain of this IOMMU */ u16 pci_seg; - /* first device this IOMMU handles. read from PCI */ - u16 first_device; - /* last device this IOMMU handles. read from PCI */ - u16 last_device; - /* start of exclusion range of that IOMMU */ u64 exclusion_start; /* length of exclusion range of that IOMMU */ @@ -529,11 +524,7 @@ struct amd_iommu { /* command buffer virtual address */ u8 *cmd_buf; - /* size of command buffer */ - u32 cmd_buf_size; - /* size of event buffer */ - u32 evt_buf_size; /* event buffer virtual address */ u8 *evt_buf; @@ -675,7 +666,7 @@ extern unsigned long *amd_iommu_pd_alloc_bitmap; * If true, the addresses will be flushed on unmap time, not when * they are reused */ -extern u32 amd_iommu_unmap_flush; +extern bool amd_iommu_unmap_flush; /* Smallest max PASID supported by any IOMMU in the system */ extern u32 amd_iommu_max_pasid; diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index d21d4edf7..7caf2fa23 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -494,6 +494,22 @@ static void handle_fault_error(struct fault *fault) } } +static bool access_error(struct vm_area_struct *vma, struct fault *fault) +{ + unsigned long requested = 0; + + if (fault->flags & PPR_FAULT_EXEC) + requested |= VM_EXEC; + + if (fault->flags & PPR_FAULT_READ) + requested |= VM_READ; + + if (fault->flags & PPR_FAULT_WRITE) + requested |= VM_WRITE; + + return (requested & ~vma->vm_flags) != 0; +} + static void do_fault(struct work_struct *work) { struct fault *fault = container_of(work, struct fault, work); @@ -516,8 +532,8 @@ static void do_fault(struct work_struct *work) goto out; } - if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) { - /* handle_mm_fault would BUG_ON() */ + /* Check if we have the right permissions on the vma */ + if (access_error(vma, fault)) { up_read(&mm->mmap_sem); handle_fault_error(fault); goto out; diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 286e890e7..4e5118a4c 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -26,8 +26,10 @@ #include <linux/iommu.h> #include <linux/iopoll.h> #include <linux/module.h> +#include <linux/msi.h> #include <linux/of.h> #include <linux/of_address.h> +#include <linux/of_platform.h> #include <linux/pci.h> #include <linux/platform_device.h> @@ -403,6 +405,31 @@ enum pri_resp { PRI_RESP_SUCC, }; +enum arm_smmu_msi_index { + EVTQ_MSI_INDEX, + GERROR_MSI_INDEX, + PRIQ_MSI_INDEX, + ARM_SMMU_MAX_MSIS, +}; + +static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = { + [EVTQ_MSI_INDEX] = { + ARM_SMMU_EVTQ_IRQ_CFG0, + ARM_SMMU_EVTQ_IRQ_CFG1, + ARM_SMMU_EVTQ_IRQ_CFG2, + }, + [GERROR_MSI_INDEX] = { + ARM_SMMU_GERROR_IRQ_CFG0, + ARM_SMMU_GERROR_IRQ_CFG1, + ARM_SMMU_GERROR_IRQ_CFG2, + }, + [PRIQ_MSI_INDEX] = { + ARM_SMMU_PRIQ_IRQ_CFG0, + ARM_SMMU_PRIQ_IRQ_CFG1, + ARM_SMMU_PRIQ_IRQ_CFG2, + }, +}; + struct arm_smmu_cmdq_ent { /* Common fields */ u8 opcode; @@ -570,7 +597,6 @@ struct arm_smmu_device { unsigned int sid_bits; struct arm_smmu_strtab_cfg strtab_cfg; - struct list_head list; }; /* SMMU private data for an IOMMU group */ @@ -605,10 +631,6 @@ struct arm_smmu_domain { struct iommu_domain domain; }; -/* Our list of SMMU instances */ -static DEFINE_SPINLOCK(arm_smmu_devices_lock); -static LIST_HEAD(arm_smmu_devices); - struct arm_smmu_option_prop { u32 opt; const char *prop; @@ -1427,7 +1449,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg) { int ret; - u16 asid; + int asid; struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; @@ -1439,10 +1461,11 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, &cfg->cdptr_dma, GFP_KERNEL); if (!cfg->cdptr) { dev_warn(smmu->dev, "failed to allocate context descriptor\n"); + ret = -ENOMEM; goto out_free_asid; } - cfg->cd.asid = asid; + cfg->cd.asid = (u16)asid; cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; cfg->cd.tcr = pgtbl_cfg->arm_lpae_s1_cfg.tcr; cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair[0]; @@ -1456,7 +1479,7 @@ out_free_asid: static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg) { - u16 vmid; + int vmid; struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; @@ -1464,7 +1487,7 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, if (IS_ERR_VALUE(vmid)) return vmid; - cfg->vmid = vmid; + cfg->vmid = (u16)vmid; cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; cfg->vtcr = pgtbl_cfg->arm_lpae_s2_cfg.vtcr; return 0; @@ -1726,7 +1749,8 @@ static void __arm_smmu_release_pci_iommudata(void *data) static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev) { struct device_node *of_node; - struct arm_smmu_device *curr, *smmu = NULL; + struct platform_device *smmu_pdev; + struct arm_smmu_device *smmu = NULL; struct pci_bus *bus = pdev->bus; /* Walk up to the root bus */ @@ -1739,14 +1763,10 @@ static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev) return NULL; /* See if we can find an SMMU corresponding to the phandle */ - spin_lock(&arm_smmu_devices_lock); - list_for_each_entry(curr, &arm_smmu_devices, list) { - if (curr->dev->of_node == of_node) { - smmu = curr; - break; - } - } - spin_unlock(&arm_smmu_devices_lock); + smmu_pdev = of_find_device_by_node(of_node); + if (smmu_pdev) + smmu = platform_get_drvdata(smmu_pdev); + of_node_put(of_node); return smmu; } @@ -1902,6 +1922,7 @@ static struct iommu_ops arm_smmu_ops = { .iova_to_phys = arm_smmu_iova_to_phys, .add_device = arm_smmu_add_device, .remove_device = arm_smmu_remove_device, + .device_group = pci_device_group, .domain_get_attr = arm_smmu_domain_get_attr, .domain_set_attr = arm_smmu_domain_set_attr, .pgsize_bitmap = -1UL, /* Restricted during device attach */ @@ -2186,6 +2207,72 @@ static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val, 1, ARM_SMMU_POLL_TIMEOUT_US); } +static void arm_smmu_free_msis(void *data) +{ + struct device *dev = data; + platform_msi_domain_free_irqs(dev); +} + +static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) +{ + phys_addr_t doorbell; + struct device *dev = msi_desc_to_dev(desc); + struct arm_smmu_device *smmu = dev_get_drvdata(dev); + phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index]; + + doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo; + doorbell &= MSI_CFG0_ADDR_MASK << MSI_CFG0_ADDR_SHIFT; + + writeq_relaxed(doorbell, smmu->base + cfg[0]); + writel_relaxed(msg->data, smmu->base + cfg[1]); + writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]); +} + +static void arm_smmu_setup_msis(struct arm_smmu_device *smmu) +{ + struct msi_desc *desc; + int ret, nvec = ARM_SMMU_MAX_MSIS; + struct device *dev = smmu->dev; + + /* Clear the MSI address regs */ + writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0); + writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0); + + if (smmu->features & ARM_SMMU_FEAT_PRI) + writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0); + else + nvec--; + + if (!(smmu->features & ARM_SMMU_FEAT_MSI)) + return; + + /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */ + ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg); + if (ret) { + dev_warn(dev, "failed to allocate MSIs\n"); + return; + } + + for_each_msi_entry(desc, dev) { + switch (desc->platform.msi_index) { + case EVTQ_MSI_INDEX: + smmu->evtq.q.irq = desc->irq; + break; + case GERROR_MSI_INDEX: + smmu->gerr_irq = desc->irq; + break; + case PRIQ_MSI_INDEX: + smmu->priq.q.irq = desc->irq; + break; + default: /* Unknown */ + continue; + } + } + + /* Add callback to free MSIs on teardown */ + devm_add_action(dev, arm_smmu_free_msis, dev); +} + static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) { int ret, irq; @@ -2199,11 +2286,9 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) return ret; } - /* Clear the MSI address regs */ - writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0); - writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0); + arm_smmu_setup_msis(smmu); - /* Request wired interrupt lines */ + /* Request interrupt lines */ irq = smmu->evtq.q.irq; if (irq) { ret = devm_request_threaded_irq(smmu->dev, irq, @@ -2232,8 +2317,6 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) } if (smmu->features & ARM_SMMU_FEAT_PRI) { - writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0); - irq = smmu->priq.q.irq; if (irq) { ret = devm_request_threaded_irq(smmu->dev, irq, @@ -2612,16 +2695,14 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev) if (ret) return ret; + /* Record our private device structure */ + platform_set_drvdata(pdev, smmu); + /* Reset the device */ ret = arm_smmu_device_reset(smmu); if (ret) goto out_free_structures; - /* Record our private device structure */ - INIT_LIST_HEAD(&smmu->list); - spin_lock(&arm_smmu_devices_lock); - list_add(&smmu->list, &arm_smmu_devices); - spin_unlock(&arm_smmu_devices_lock); return 0; out_free_structures: @@ -2631,21 +2712,7 @@ out_free_structures: static int arm_smmu_device_remove(struct platform_device *pdev) { - struct arm_smmu_device *curr, *smmu = NULL; - struct device *dev = &pdev->dev; - - spin_lock(&arm_smmu_devices_lock); - list_for_each_entry(curr, &arm_smmu_devices, list) { - if (curr->dev == dev) { - smmu = curr; - list_del(&smmu->list); - break; - } - } - spin_unlock(&arm_smmu_devices_lock); - - if (!smmu) - return -ENODEV; + struct arm_smmu_device *smmu = platform_get_drvdata(pdev); arm_smmu_device_disable(smmu); arm_smmu_free_structures(smmu); diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 48a39dfa9..47dc7a793 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -70,6 +70,18 @@ ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \ ? 0x400 : 0)) +#ifdef CONFIG_64BIT +#define smmu_writeq writeq_relaxed +#else +#define smmu_writeq(reg64, addr) \ + do { \ + u64 __val = (reg64); \ + void __iomem *__addr = (addr); \ + writel_relaxed(__val >> 32, __addr + 4); \ + writel_relaxed(__val, __addr); \ + } while (0) +#endif + /* Configuration registers */ #define ARM_SMMU_GR0_sCR0 0x0 #define sCR0_CLIENTPD (1 << 0) @@ -185,10 +197,8 @@ #define ARM_SMMU_CB_SCTLR 0x0 #define ARM_SMMU_CB_RESUME 0x8 #define ARM_SMMU_CB_TTBCR2 0x10 -#define ARM_SMMU_CB_TTBR0_LO 0x20 -#define ARM_SMMU_CB_TTBR0_HI 0x24 -#define ARM_SMMU_CB_TTBR1_LO 0x28 -#define ARM_SMMU_CB_TTBR1_HI 0x2c +#define ARM_SMMU_CB_TTBR0 0x20 +#define ARM_SMMU_CB_TTBR1 0x28 #define ARM_SMMU_CB_TTBCR 0x30 #define ARM_SMMU_CB_S1_MAIR0 0x38 #define ARM_SMMU_CB_S1_MAIR1 0x3c @@ -226,7 +236,7 @@ #define TTBCR2_SEP_SHIFT 15 #define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT) -#define TTBRn_HI_ASID_SHIFT 16 +#define TTBRn_ASID_SHIFT 48 #define FSR_MULTI (1 << 31) #define FSR_SS (1 << 30) @@ -695,12 +705,12 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg) { u32 reg; + u64 reg64; bool stage1; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; struct arm_smmu_device *smmu = smmu_domain->smmu; - void __iomem *cb_base, *gr0_base, *gr1_base; + void __iomem *cb_base, *gr1_base; - gr0_base = ARM_SMMU_GR0(smmu); gr1_base = ARM_SMMU_GR1(smmu); stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); @@ -738,22 +748,17 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, /* TTBRs */ if (stage1) { - reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO); - reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0] >> 32; - reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI); - - reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1]; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_LO); - reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1] >> 32; - reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_HI); + reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; + + reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) << TTBRn_ASID_SHIFT; + smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR0); + + reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1]; + reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) << TTBRn_ASID_SHIFT; + smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR1); } else { - reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO); - reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr >> 32; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI); + reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; + smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR0); } /* TTBCR */ @@ -1212,17 +1217,15 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, /* ATS1 registers can only be written atomically */ va = iova & ~0xfffUL; -#ifdef CONFIG_64BIT if (smmu->version == ARM_SMMU_V2) - writeq_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR); + smmu_writeq(va, cb_base + ARM_SMMU_CB_ATS1PR); else -#endif writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR); if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp, !(tmp & ATSR_ACTIVE), 5, 50)) { dev_err(dev, - "iova to phys timed out on 0x%pad. Falling back to software table walk.\n", + "iova to phys timed out on %pad. Falling back to software table walk.\n", &iova); return ops->iova_to_phys(ops, iova); } @@ -1292,33 +1295,25 @@ static void __arm_smmu_release_pci_iommudata(void *data) kfree(data); } -static int arm_smmu_add_pci_device(struct pci_dev *pdev) +static int arm_smmu_init_pci_device(struct pci_dev *pdev, + struct iommu_group *group) { - int i, ret; - u16 sid; - struct iommu_group *group; struct arm_smmu_master_cfg *cfg; - - group = iommu_group_get_for_dev(&pdev->dev); - if (IS_ERR(group)) - return PTR_ERR(group); + u16 sid; + int i; cfg = iommu_group_get_iommudata(group); if (!cfg) { cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); - if (!cfg) { - ret = -ENOMEM; - goto out_put_group; - } + if (!cfg) + return -ENOMEM; iommu_group_set_iommudata(group, cfg, __arm_smmu_release_pci_iommudata); } - if (cfg->num_streamids >= MAX_MASTER_STREAMIDS) { - ret = -ENOSPC; - goto out_put_group; - } + if (cfg->num_streamids >= MAX_MASTER_STREAMIDS) + return -ENOSPC; /* * Assume Stream ID == Requester ID for now. @@ -1334,16 +1329,13 @@ static int arm_smmu_add_pci_device(struct pci_dev *pdev) cfg->streamids[cfg->num_streamids++] = sid; return 0; -out_put_group: - iommu_group_put(group); - return ret; } -static int arm_smmu_add_platform_device(struct device *dev) +static int arm_smmu_init_platform_device(struct device *dev, + struct iommu_group *group) { - struct iommu_group *group; - struct arm_smmu_master *master; struct arm_smmu_device *smmu = find_smmu_for_device(dev); + struct arm_smmu_master *master; if (!smmu) return -ENODEV; @@ -1352,21 +1344,20 @@ static int arm_smmu_add_platform_device(struct device *dev) if (!master) return -ENODEV; - /* No automatic group creation for platform devices */ - group = iommu_group_alloc(); - if (IS_ERR(group)) - return PTR_ERR(group); - iommu_group_set_iommudata(group, &master->cfg, NULL); - return iommu_group_add_device(group, dev); + + return 0; } static int arm_smmu_add_device(struct device *dev) { - if (dev_is_pci(dev)) - return arm_smmu_add_pci_device(to_pci_dev(dev)); + struct iommu_group *group; + + group = iommu_group_get_for_dev(dev); + if (IS_ERR(group)) + return PTR_ERR(group); - return arm_smmu_add_platform_device(dev); + return 0; } static void arm_smmu_remove_device(struct device *dev) @@ -1374,6 +1365,32 @@ static void arm_smmu_remove_device(struct device *dev) iommu_group_remove_device(dev); } +static struct iommu_group *arm_smmu_device_group(struct device *dev) +{ + struct iommu_group *group; + int ret; + + if (dev_is_pci(dev)) + group = pci_device_group(dev); + else + group = generic_device_group(dev); + + if (IS_ERR(group)) + return group; + + if (dev_is_pci(dev)) + ret = arm_smmu_init_pci_device(to_pci_dev(dev), group); + else + ret = arm_smmu_init_platform_device(dev, group); + + if (ret) { + iommu_group_put(group); + group = ERR_PTR(ret); + } + + return group; +} + static int arm_smmu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr attr, void *data) { @@ -1430,6 +1447,7 @@ static struct iommu_ops arm_smmu_ops = { .iova_to_phys = arm_smmu_iova_to_phys, .add_device = arm_smmu_add_device, .remove_device = arm_smmu_remove_device, + .device_group = arm_smmu_device_group, .domain_get_attr = arm_smmu_domain_get_attr, .domain_set_attr = arm_smmu_domain_set_attr, .pgsize_bitmap = -1UL, /* Restricted during device attach */ diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c new file mode 100644 index 000000000..72d618266 --- /dev/null +++ b/drivers/iommu/dma-iommu.c @@ -0,0 +1,529 @@ +/* + * A fairly generic DMA-API to IOMMU-API glue layer. + * + * Copyright (C) 2014-2015 ARM Ltd. + * + * based in part on arch/arm/mm/dma-mapping.c: + * Copyright (C) 2000-2004 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/device.h> +#include <linux/dma-iommu.h> +#include <linux/gfp.h> +#include <linux/huge_mm.h> +#include <linux/iommu.h> +#include <linux/iova.h> +#include <linux/mm.h> +#include <linux/scatterlist.h> +#include <linux/vmalloc.h> + +int iommu_dma_init(void) +{ + return iova_cache_get(); +} + +/** + * iommu_get_dma_cookie - Acquire DMA-API resources for a domain + * @domain: IOMMU domain to prepare for DMA-API usage + * + * IOMMU drivers should normally call this from their domain_alloc + * callback when domain->type == IOMMU_DOMAIN_DMA. + */ +int iommu_get_dma_cookie(struct iommu_domain *domain) +{ + struct iova_domain *iovad; + + if (domain->iova_cookie) + return -EEXIST; + + iovad = kzalloc(sizeof(*iovad), GFP_KERNEL); + domain->iova_cookie = iovad; + + return iovad ? 0 : -ENOMEM; +} +EXPORT_SYMBOL(iommu_get_dma_cookie); + +/** + * iommu_put_dma_cookie - Release a domain's DMA mapping resources + * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() + * + * IOMMU drivers should normally call this from their domain_free callback. + */ +void iommu_put_dma_cookie(struct iommu_domain *domain) +{ + struct iova_domain *iovad = domain->iova_cookie; + + if (!iovad) + return; + + put_iova_domain(iovad); + kfree(iovad); + domain->iova_cookie = NULL; +} +EXPORT_SYMBOL(iommu_put_dma_cookie); + +/** + * iommu_dma_init_domain - Initialise a DMA mapping domain + * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() + * @base: IOVA at which the mappable address space starts + * @size: Size of IOVA space + * + * @base and @size should be exact multiples of IOMMU page granularity to + * avoid rounding surprises. If necessary, we reserve the page at address 0 + * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but + * any change which could make prior IOVAs invalid will fail. + */ +int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size) +{ + struct iova_domain *iovad = domain->iova_cookie; + unsigned long order, base_pfn, end_pfn; + + if (!iovad) + return -ENODEV; + + /* Use the smallest supported page size for IOVA granularity */ + order = __ffs(domain->ops->pgsize_bitmap); + base_pfn = max_t(unsigned long, 1, base >> order); + end_pfn = (base + size - 1) >> order; + + /* Check the domain allows at least some access to the device... */ + if (domain->geometry.force_aperture) { + if (base > domain->geometry.aperture_end || + base + size <= domain->geometry.aperture_start) { + pr_warn("specified DMA range outside IOMMU capability\n"); + return -EFAULT; + } + /* ...then finally give it a kicking to make sure it fits */ + base_pfn = max_t(unsigned long, base_pfn, + domain->geometry.aperture_start >> order); + end_pfn = min_t(unsigned long, end_pfn, + domain->geometry.aperture_end >> order); + } + + /* All we can safely do with an existing domain is enlarge it */ + if (iovad->start_pfn) { + if (1UL << order != iovad->granule || + base_pfn != iovad->start_pfn || + end_pfn < iovad->dma_32bit_pfn) { + pr_warn("Incompatible range for DMA domain\n"); + return -EFAULT; + } + iovad->dma_32bit_pfn = end_pfn; + } else { + init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn); + } + return 0; +} +EXPORT_SYMBOL(iommu_dma_init_domain); + +/** + * dma_direction_to_prot - Translate DMA API directions to IOMMU API page flags + * @dir: Direction of DMA transfer + * @coherent: Is the DMA master cache-coherent? + * + * Return: corresponding IOMMU API page protection flags + */ +int dma_direction_to_prot(enum dma_data_direction dir, bool coherent) +{ + int prot = coherent ? IOMMU_CACHE : 0; + + switch (dir) { + case DMA_BIDIRECTIONAL: + return prot | IOMMU_READ | IOMMU_WRITE; + case DMA_TO_DEVICE: + return prot | IOMMU_READ; + case DMA_FROM_DEVICE: + return prot | IOMMU_WRITE; + default: + return 0; + } +} + +static struct iova *__alloc_iova(struct iova_domain *iovad, size_t size, + dma_addr_t dma_limit) +{ + unsigned long shift = iova_shift(iovad); + unsigned long length = iova_align(iovad, size) >> shift; + + /* + * Enforce size-alignment to be safe - there could perhaps be an + * attribute to control this per-device, or at least per-domain... + */ + return alloc_iova(iovad, length, dma_limit >> shift, true); +} + +/* The IOVA allocator knows what we mapped, so just unmap whatever that was */ +static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr) +{ + struct iova_domain *iovad = domain->iova_cookie; + unsigned long shift = iova_shift(iovad); + unsigned long pfn = dma_addr >> shift; + struct iova *iova = find_iova(iovad, pfn); + size_t size; + + if (WARN_ON(!iova)) + return; + + size = iova_size(iova) << shift; + size -= iommu_unmap(domain, pfn << shift, size); + /* ...and if we can't, then something is horribly, horribly wrong */ + WARN_ON(size > 0); + __free_iova(iovad, iova); +} + +static void __iommu_dma_free_pages(struct page **pages, int count) +{ + while (count--) + __free_page(pages[count]); + kvfree(pages); +} + +static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp) +{ + struct page **pages; + unsigned int i = 0, array_size = count * sizeof(*pages); + unsigned int order = MAX_ORDER; + + if (array_size <= PAGE_SIZE) + pages = kzalloc(array_size, GFP_KERNEL); + else + pages = vzalloc(array_size); + if (!pages) + return NULL; + + /* IOMMU can map any pages, so himem can also be used here */ + gfp |= __GFP_NOWARN | __GFP_HIGHMEM; + + while (count) { + struct page *page = NULL; + int j; + + /* + * Higher-order allocations are a convenience rather + * than a necessity, hence using __GFP_NORETRY until + * falling back to single-page allocations. + */ + for (order = min_t(unsigned int, order, __fls(count)); + order > 0; order--) { + page = alloc_pages(gfp | __GFP_NORETRY, order); + if (!page) + continue; + if (PageCompound(page)) { + if (!split_huge_page(page)) + break; + __free_pages(page, order); + } else { + split_page(page, order); + break; + } + } + if (!page) + page = alloc_page(gfp); + if (!page) { + __iommu_dma_free_pages(pages, i); + return NULL; + } + j = 1 << order; + count -= j; + while (j--) + pages[i++] = page++; + } + return pages; +} + +/** + * iommu_dma_free - Free a buffer allocated by iommu_dma_alloc() + * @dev: Device which owns this buffer + * @pages: Array of buffer pages as returned by iommu_dma_alloc() + * @size: Size of buffer in bytes + * @handle: DMA address of buffer + * + * Frees both the pages associated with the buffer, and the array + * describing them + */ +void iommu_dma_free(struct device *dev, struct page **pages, size_t size, + dma_addr_t *handle) +{ + __iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle); + __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT); + *handle = DMA_ERROR_CODE; +} + +/** + * iommu_dma_alloc - Allocate and map a buffer contiguous in IOVA space + * @dev: Device to allocate memory for. Must be a real device + * attached to an iommu_dma_domain + * @size: Size of buffer in bytes + * @gfp: Allocation flags + * @prot: IOMMU mapping flags + * @handle: Out argument for allocated DMA handle + * @flush_page: Arch callback which must ensure PAGE_SIZE bytes from the + * given VA/PA are visible to the given non-coherent device. + * + * If @size is less than PAGE_SIZE, then a full CPU page will be allocated, + * but an IOMMU which supports smaller pages might not map the whole thing. + * + * Return: Array of struct page pointers describing the buffer, + * or NULL on failure. + */ +struct page **iommu_dma_alloc(struct device *dev, size_t size, + gfp_t gfp, int prot, dma_addr_t *handle, + void (*flush_page)(struct device *, const void *, phys_addr_t)) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct iova_domain *iovad = domain->iova_cookie; + struct iova *iova; + struct page **pages; + struct sg_table sgt; + dma_addr_t dma_addr; + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + + *handle = DMA_ERROR_CODE; + + pages = __iommu_dma_alloc_pages(count, gfp); + if (!pages) + return NULL; + + iova = __alloc_iova(iovad, size, dev->coherent_dma_mask); + if (!iova) + goto out_free_pages; + + size = iova_align(iovad, size); + if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, GFP_KERNEL)) + goto out_free_iova; + + if (!(prot & IOMMU_CACHE)) { + struct sg_mapping_iter miter; + /* + * The CPU-centric flushing implied by SG_MITER_TO_SG isn't + * sufficient here, so skip it by using the "wrong" direction. + */ + sg_miter_start(&miter, sgt.sgl, sgt.orig_nents, SG_MITER_FROM_SG); + while (sg_miter_next(&miter)) + flush_page(dev, miter.addr, page_to_phys(miter.page)); + sg_miter_stop(&miter); + } + + dma_addr = iova_dma_addr(iovad, iova); + if (iommu_map_sg(domain, dma_addr, sgt.sgl, sgt.orig_nents, prot) + < size) + goto out_free_sg; + + *handle = dma_addr; + sg_free_table(&sgt); + return pages; + +out_free_sg: + sg_free_table(&sgt); +out_free_iova: + __free_iova(iovad, iova); +out_free_pages: + __iommu_dma_free_pages(pages, count); + return NULL; +} + +/** + * iommu_dma_mmap - Map a buffer into provided user VMA + * @pages: Array representing buffer from iommu_dma_alloc() + * @size: Size of buffer in bytes + * @vma: VMA describing requested userspace mapping + * + * Maps the pages of the buffer in @pages into @vma. The caller is responsible + * for verifying the correct size and protection of @vma beforehand. + */ + +int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma) +{ + unsigned long uaddr = vma->vm_start; + unsigned int i, count = PAGE_ALIGN(size) >> PAGE_SHIFT; + int ret = -ENXIO; + + for (i = vma->vm_pgoff; i < count && uaddr < vma->vm_end; i++) { + ret = vm_insert_page(vma, uaddr, pages[i]); + if (ret) + break; + uaddr += PAGE_SIZE; + } + return ret; +} + +dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, int prot) +{ + dma_addr_t dma_addr; + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct iova_domain *iovad = domain->iova_cookie; + phys_addr_t phys = page_to_phys(page) + offset; + size_t iova_off = iova_offset(iovad, phys); + size_t len = iova_align(iovad, size + iova_off); + struct iova *iova = __alloc_iova(iovad, len, dma_get_mask(dev)); + + if (!iova) + return DMA_ERROR_CODE; + + dma_addr = iova_dma_addr(iovad, iova); + if (iommu_map(domain, dma_addr, phys - iova_off, len, prot)) { + __free_iova(iovad, iova); + return DMA_ERROR_CODE; + } + return dma_addr + iova_off; +} + +void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle); +} + +/* + * Prepare a successfully-mapped scatterlist to give back to the caller. + * Handling IOVA concatenation can come later, if needed + */ +static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, + dma_addr_t dma_addr) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) { + /* Un-swizzling the fields here, hence the naming mismatch */ + unsigned int s_offset = sg_dma_address(s); + unsigned int s_length = sg_dma_len(s); + unsigned int s_dma_len = s->length; + + s->offset = s_offset; + s->length = s_length; + sg_dma_address(s) = dma_addr + s_offset; + dma_addr += s_dma_len; + } + return i; +} + +/* + * If mapping failed, then just restore the original list, + * but making sure the DMA fields are invalidated. + */ +static void __invalidate_sg(struct scatterlist *sg, int nents) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) { + if (sg_dma_address(s) != DMA_ERROR_CODE) + s->offset = sg_dma_address(s); + if (sg_dma_len(s)) + s->length = sg_dma_len(s); + sg_dma_address(s) = DMA_ERROR_CODE; + sg_dma_len(s) = 0; + } +} + +/* + * The DMA API client is passing in a scatterlist which could describe + * any old buffer layout, but the IOMMU API requires everything to be + * aligned to IOMMU pages. Hence the need for this complicated bit of + * impedance-matching, to be able to hand off a suitably-aligned list, + * but still preserve the original offsets and sizes for the caller. + */ +int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, int prot) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct iova_domain *iovad = domain->iova_cookie; + struct iova *iova; + struct scatterlist *s, *prev = NULL; + dma_addr_t dma_addr; + size_t iova_len = 0; + int i; + + /* + * Work out how much IOVA space we need, and align the segments to + * IOVA granules for the IOMMU driver to handle. With some clever + * trickery we can modify the list in-place, but reversibly, by + * hiding the original data in the as-yet-unused DMA fields. + */ + for_each_sg(sg, s, nents, i) { + size_t s_offset = iova_offset(iovad, s->offset); + size_t s_length = s->length; + + sg_dma_address(s) = s_offset; + sg_dma_len(s) = s_length; + s->offset -= s_offset; + s_length = iova_align(iovad, s_length + s_offset); + s->length = s_length; + + /* + * The simple way to avoid the rare case of a segment + * crossing the boundary mask is to pad the previous one + * to end at a naturally-aligned IOVA for this one's size, + * at the cost of potentially over-allocating a little. + */ + if (prev) { + size_t pad_len = roundup_pow_of_two(s_length); + + pad_len = (pad_len - iova_len) & (pad_len - 1); + prev->length += pad_len; + iova_len += pad_len; + } + + iova_len += s_length; + prev = s; + } + + iova = __alloc_iova(iovad, iova_len, dma_get_mask(dev)); + if (!iova) + goto out_restore_sg; + + /* + * We'll leave any physical concatenation to the IOMMU driver's + * implementation - it knows better than we do. + */ + dma_addr = iova_dma_addr(iovad, iova); + if (iommu_map_sg(domain, dma_addr, sg, nents, prot) < iova_len) + goto out_free_iova; + + return __finalise_sg(dev, sg, nents, dma_addr); + +out_free_iova: + __free_iova(iovad, iova); +out_restore_sg: + __invalidate_sg(sg, nents); + return 0; +} + +void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + /* + * The scatterlist segments are mapped into a single + * contiguous IOVA allocation, so this is incredibly easy. + */ + __iommu_dma_unmap(iommu_get_domain_for_dev(dev), sg_dma_address(sg)); +} + +int iommu_dma_supported(struct device *dev, u64 mask) +{ + /* + * 'Special' IOMMUs which don't have the same addressing capability + * as the CPU will have to wait until we have some way to query that + * before they'll be able to use this framework. + */ + return 1; +} + +int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return dma_addr == DMA_ERROR_CODE; +} diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 8757f8dfc..80e3c1760 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1086,6 +1086,11 @@ static void free_iommu(struct intel_iommu *iommu) iommu_device_destroy(iommu->iommu_dev); if (iommu->irq) { + if (iommu->pr_irq) { + free_irq(iommu->pr_irq, iommu); + dmar_free_hwirq(iommu->pr_irq); + iommu->pr_irq = 0; + } free_irq(iommu->irq, iommu); dmar_free_hwirq(iommu->irq); iommu->irq = 0; @@ -1493,53 +1498,68 @@ static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type) } } + +static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq) +{ + if (iommu->irq == irq) + return DMAR_FECTL_REG; + else if (iommu->pr_irq == irq) + return DMAR_PECTL_REG; + else + BUG(); +} + void dmar_msi_unmask(struct irq_data *data) { struct intel_iommu *iommu = irq_data_get_irq_handler_data(data); + int reg = dmar_msi_reg(iommu, data->irq); unsigned long flag; /* unmask it */ raw_spin_lock_irqsave(&iommu->register_lock, flag); - writel(0, iommu->reg + DMAR_FECTL_REG); + writel(0, iommu->reg + reg); /* Read a reg to force flush the post write */ - readl(iommu->reg + DMAR_FECTL_REG); + readl(iommu->reg + reg); raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } void dmar_msi_mask(struct irq_data *data) { - unsigned long flag; struct intel_iommu *iommu = irq_data_get_irq_handler_data(data); + int reg = dmar_msi_reg(iommu, data->irq); + unsigned long flag; /* mask it */ raw_spin_lock_irqsave(&iommu->register_lock, flag); - writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG); + writel(DMA_FECTL_IM, iommu->reg + reg); /* Read a reg to force flush the post write */ - readl(iommu->reg + DMAR_FECTL_REG); + readl(iommu->reg + reg); raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } void dmar_msi_write(int irq, struct msi_msg *msg) { struct intel_iommu *iommu = irq_get_handler_data(irq); + int reg = dmar_msi_reg(iommu, irq); unsigned long flag; raw_spin_lock_irqsave(&iommu->register_lock, flag); - writel(msg->data, iommu->reg + DMAR_FEDATA_REG); - writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG); - writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG); + writel(msg->data, iommu->reg + reg + 4); + writel(msg->address_lo, iommu->reg + reg + 8); + writel(msg->address_hi, iommu->reg + reg + 12); raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } void dmar_msi_read(int irq, struct msi_msg *msg) { struct intel_iommu *iommu = irq_get_handler_data(irq); + int reg = dmar_msi_reg(iommu, irq); unsigned long flag; raw_spin_lock_irqsave(&iommu->register_lock, flag); - msg->data = readl(iommu->reg + DMAR_FEDATA_REG); - msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG); - msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG); + msg->data = readl(iommu->reg + reg + 4); + msg->address_lo = readl(iommu->reg + reg + 8); + msg->address_hi = readl(iommu->reg + reg + 12); raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index 2570f2a25..a34355fca 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -20,11 +20,11 @@ #include "fsl_pamu.h" +#include <linux/fsl/guts.h> #include <linux/interrupt.h> #include <linux/genalloc.h> #include <asm/mpc85xx.h> -#include <asm/fsl_guts.h> /* define indexes for each operation mapping scenario */ #define OMI_QMAN 0x00 diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index 1d452930c..da0e1e30e 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -923,7 +923,7 @@ static struct iommu_group *get_pci_device_group(struct pci_dev *pdev) pci_endpt_partioning = check_pci_ctl_endpt_part(pci_ctl); /* We can partition PCIe devices so assign device group to the device */ if (pci_endpt_partioning) { - group = iommu_group_get_for_dev(&pdev->dev); + group = pci_device_group(&pdev->dev); /* * PCIe controller is not a paritionable entity @@ -956,44 +956,34 @@ static struct iommu_group *get_pci_device_group(struct pci_dev *pdev) return group; } -static int fsl_pamu_add_device(struct device *dev) +static struct iommu_group *fsl_pamu_device_group(struct device *dev) { struct iommu_group *group = ERR_PTR(-ENODEV); - struct pci_dev *pdev; - const u32 *prop; - int ret = 0, len; + int len; /* * For platform devices we allocate a separate group for * each of the devices. */ - if (dev_is_pci(dev)) { - pdev = to_pci_dev(dev); - /* Don't create device groups for virtual PCI bridges */ - if (pdev->subordinate) - return 0; + if (dev_is_pci(dev)) + group = get_pci_device_group(to_pci_dev(dev)); + else if (of_get_property(dev->of_node, "fsl,liodn", &len)) + group = get_device_iommu_group(dev); - group = get_pci_device_group(pdev); + return group; +} - } else { - prop = of_get_property(dev->of_node, "fsl,liodn", &len); - if (prop) - group = get_device_iommu_group(dev); - } +static int fsl_pamu_add_device(struct device *dev) +{ + struct iommu_group *group; + group = iommu_group_get_for_dev(dev); if (IS_ERR(group)) return PTR_ERR(group); - /* - * Check if device has already been added to an iommu group. - * Group could have already been created for a PCI device in - * the iommu_group_get_for_dev path. - */ - if (!dev->iommu_group) - ret = iommu_group_add_device(group, dev); - iommu_group_put(group); - return ret; + + return 0; } static void fsl_pamu_remove_device(struct device *dev) @@ -1072,6 +1062,7 @@ static const struct iommu_ops fsl_pamu_ops = { .domain_get_attr = fsl_pamu_get_domain_attr, .add_device = fsl_pamu_add_device, .remove_device = fsl_pamu_remove_device, + .device_group = fsl_pamu_device_group, }; int __init pamu_domain_init(void) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index d65cf4239..ac7387686 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -34,6 +34,7 @@ #include <linux/mempool.h> #include <linux/memory.h> #include <linux/timer.h> +#include <linux/io.h> #include <linux/iova.h> #include <linux/iommu.h> #include <linux/intel-iommu.h> @@ -418,10 +419,13 @@ struct device_domain_info { struct list_head global; /* link to global list */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ - struct { - u8 enabled:1; - u8 qdep; - } ats; /* ATS state */ + u8 pasid_supported:3; + u8 pasid_enabled:1; + u8 pri_supported:1; + u8 pri_enabled:1; + u8 ats_supported:1; + u8 ats_enabled:1; + u8 ats_qdep; struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ struct intel_iommu *iommu; /* IOMMU used by this device */ struct dmar_domain *domain; /* pointer to domain */ @@ -497,13 +501,37 @@ static int dmar_forcedac; static int intel_iommu_strict; static int intel_iommu_superpage = 1; static int intel_iommu_ecs = 1; +static int intel_iommu_pasid28; +static int iommu_identity_mapping; + +#define IDENTMAP_ALL 1 +#define IDENTMAP_GFX 2 +#define IDENTMAP_AZALIA 4 -/* We only actually use ECS when PASID support (on the new bit 40) - * is also advertised. Some early implementations — the ones with - * PASID support on bit 28 — have issues even when we *only* use - * extended root/context tables. */ +/* Broadwell and Skylake have broken ECS support — normal so-called "second + * level" translation of DMA requests-without-PASID doesn't actually happen + * unless you also set the NESTE bit in an extended context-entry. Which of + * course means that SVM doesn't work because it's trying to do nested + * translation of the physical addresses it finds in the process page tables, + * through the IOVA->phys mapping found in the "second level" page tables. + * + * The VT-d specification was retroactively changed to change the definition + * of the capability bits and pretend that Broadwell/Skylake never happened... + * but unfortunately the wrong bit was changed. It's ECS which is broken, but + * for some reason it was the PASID capability bit which was redefined (from + * bit 28 on BDW/SKL to bit 40 in future). + * + * So our test for ECS needs to eschew those implementations which set the old + * PASID capabiity bit 28, since those are the ones on which ECS is broken. + * Unless we are working around the 'pasid28' limitations, that is, by putting + * the device into passthrough mode for normal DMA and thus masking the bug. + */ #define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \ - ecap_pasid(iommu->ecap)) + (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap))) +/* PASID support is thus enabled if ECS is enabled and *either* of the old + * or new capability bits are set. */ +#define pasid_enabled(iommu) (ecs_enabled(iommu) && \ + (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap))) int intel_iommu_gfx_mapped; EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); @@ -566,6 +594,11 @@ static int __init intel_iommu_setup(char *str) printk(KERN_INFO "Intel-IOMMU: disable extended context table support\n"); intel_iommu_ecs = 0; + } else if (!strncmp(str, "pasid28", 7)) { + printk(KERN_INFO + "Intel-IOMMU: enable pre-production PASID support\n"); + intel_iommu_pasid28 = 1; + iommu_identity_mapping |= IDENTMAP_GFX; } str += strcspn(str, ","); @@ -1407,37 +1440,22 @@ static struct device_domain_info * iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu, u8 bus, u8 devfn) { - bool found = false; struct device_domain_info *info; - struct pci_dev *pdev; assert_spin_locked(&device_domain_lock); - if (!ecap_dev_iotlb_support(iommu->ecap)) - return NULL; - if (!iommu->qi) return NULL; list_for_each_entry(info, &domain->devices, link) if (info->iommu == iommu && info->bus == bus && info->devfn == devfn) { - found = true; + if (info->ats_supported && info->dev) + return info; break; } - if (!found || !info->dev || !dev_is_pci(info->dev)) - return NULL; - - pdev = to_pci_dev(info->dev); - - if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS)) - return NULL; - - if (!dmar_find_matched_atsr_unit(pdev)) - return NULL; - - return info; + return NULL; } static void iommu_enable_dev_iotlb(struct device_domain_info *info) @@ -1448,20 +1466,48 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info) return; pdev = to_pci_dev(info->dev); - if (pci_enable_ats(pdev, VTD_PAGE_SHIFT)) - return; - info->ats.enabled = 1; - info->ats.qdep = pci_ats_queue_depth(pdev); +#ifdef CONFIG_INTEL_IOMMU_SVM + /* The PCIe spec, in its wisdom, declares that the behaviour of + the device if you enable PASID support after ATS support is + undefined. So always enable PASID support on devices which + have it, even if we can't yet know if we're ever going to + use it. */ + if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1)) + info->pasid_enabled = 1; + + if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32)) + info->pri_enabled = 1; +#endif + if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) { + info->ats_enabled = 1; + info->ats_qdep = pci_ats_queue_depth(pdev); + } } static void iommu_disable_dev_iotlb(struct device_domain_info *info) { - if (!info->ats.enabled) + struct pci_dev *pdev; + + if (dev_is_pci(info->dev)) return; - pci_disable_ats(to_pci_dev(info->dev)); - info->ats.enabled = 0; + pdev = to_pci_dev(info->dev); + + if (info->ats_enabled) { + pci_disable_ats(pdev); + info->ats_enabled = 0; + } +#ifdef CONFIG_INTEL_IOMMU_SVM + if (info->pri_enabled) { + pci_disable_pri(pdev); + info->pri_enabled = 0; + } + if (info->pasid_enabled) { + pci_disable_pasid(pdev); + info->pasid_enabled = 0; + } +#endif } static void iommu_flush_dev_iotlb(struct dmar_domain *domain, @@ -1473,11 +1519,11 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, spin_lock_irqsave(&device_domain_lock, flags); list_for_each_entry(info, &domain->devices, link) { - if (!info->ats.enabled) + if (!info->ats_enabled) continue; sid = info->bus << 8 | info->devfn; - qdep = info->ats.qdep; + qdep = info->ats_qdep; qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask); } spin_unlock_irqrestore(&device_domain_lock, flags); @@ -1667,6 +1713,14 @@ static void free_dmar_iommu(struct intel_iommu *iommu) /* free context mapping */ free_context_table(iommu); + +#ifdef CONFIG_INTEL_IOMMU_SVM + if (pasid_enabled(iommu)) { + if (ecap_prs(iommu->ecap)) + intel_svm_finish_prq(iommu); + intel_svm_free_pasid_tables(iommu); + } +#endif } static struct dmar_domain *alloc_domain(int flags) @@ -1934,8 +1988,10 @@ static int domain_context_mapping_one(struct dmar_domain *domain, } info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); - translation = info ? CONTEXT_TT_DEV_IOTLB : - CONTEXT_TT_MULTI_LEVEL; + if (info && info->ats_supported) + translation = CONTEXT_TT_DEV_IOTLB; + else + translation = CONTEXT_TT_MULTI_LEVEL; context_set_address_root(context, virt_to_phys(pgd)); context_set_address_width(context, iommu->agaw); @@ -2103,7 +2159,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, sg_res = aligned_nrpages(sg->offset, sg->length); sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; sg->dma_length = sg->length; - pteval = (sg_phys(sg) & PAGE_MASK) | prot; + pteval = page_to_phys(sg_page(sg)) | prot; phys_pfn = pteval >> VTD_PAGE_SHIFT; } @@ -2273,12 +2329,34 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, info->bus = bus; info->devfn = devfn; - info->ats.enabled = 0; - info->ats.qdep = 0; + info->ats_supported = info->pasid_supported = info->pri_supported = 0; + info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0; + info->ats_qdep = 0; info->dev = dev; info->domain = domain; info->iommu = iommu; + if (dev && dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(info->dev); + + if (ecap_dev_iotlb_support(iommu->ecap) && + pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) && + dmar_find_matched_atsr_unit(pdev)) + info->ats_supported = 1; + + if (ecs_enabled(iommu)) { + if (pasid_enabled(iommu)) { + int features = pci_pasid_features(pdev); + if (features >= 0) + info->pasid_supported = features | 1; + } + + if (info->ats_supported && ecap_prs(iommu->ecap) && + pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI)) + info->pri_supported = 1; + } + } + spin_lock_irqsave(&device_domain_lock, flags); if (dev) found = find_domain(dev); @@ -2404,11 +2482,6 @@ found_domain: return domain; } -static int iommu_identity_mapping; -#define IDENTMAP_ALL 1 -#define IDENTMAP_GFX 2 -#define IDENTMAP_AZALIA 4 - static int iommu_domain_identity_map(struct dmar_domain *domain, unsigned long long start, unsigned long long end) @@ -2434,17 +2507,11 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, DMA_PTE_READ|DMA_PTE_WRITE); } -static int iommu_prepare_identity_map(struct device *dev, - unsigned long long start, - unsigned long long end) +static int domain_prepare_identity_map(struct device *dev, + struct dmar_domain *domain, + unsigned long long start, + unsigned long long end) { - struct dmar_domain *domain; - int ret; - - domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); - if (!domain) - return -ENOMEM; - /* For _hardware_ passthrough, don't bother. But for software passthrough, we do it anyway -- it may indicate a memory range which is reserved in E820, so which didn't get set @@ -2464,8 +2531,7 @@ static int iommu_prepare_identity_map(struct device *dev, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); - ret = -EIO; - goto error; + return -EIO; } if (end >> agaw_to_width(domain->agaw)) { @@ -2475,18 +2541,27 @@ static int iommu_prepare_identity_map(struct device *dev, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); - ret = -EIO; - goto error; + return -EIO; } - ret = iommu_domain_identity_map(domain, start, end); - if (ret) - goto error; + return iommu_domain_identity_map(domain, start, end); +} - return 0; +static int iommu_prepare_identity_map(struct device *dev, + unsigned long long start, + unsigned long long end) +{ + struct dmar_domain *domain; + int ret; + + domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); + if (!domain) + return -ENOMEM; + + ret = domain_prepare_identity_map(dev, domain, start, end); + if (ret) + domain_exit(domain); - error: - domain_exit(domain); return ret; } @@ -2812,18 +2887,18 @@ static void intel_iommu_init_qi(struct intel_iommu *iommu) } static int copy_context_table(struct intel_iommu *iommu, - struct root_entry __iomem *old_re, + struct root_entry *old_re, struct context_entry **tbl, int bus, bool ext) { int tbl_idx, pos = 0, idx, devfn, ret = 0, did; - struct context_entry __iomem *old_ce = NULL; struct context_entry *new_ce = NULL, ce; + struct context_entry *old_ce = NULL; struct root_entry re; phys_addr_t old_ce_phys; tbl_idx = ext ? bus * 2 : bus; - memcpy_fromio(&re, old_re, sizeof(re)); + memcpy(&re, old_re, sizeof(re)); for (devfn = 0; devfn < 256; devfn++) { /* First calculate the correct index */ @@ -2858,7 +2933,8 @@ static int copy_context_table(struct intel_iommu *iommu, } ret = -ENOMEM; - old_ce = ioremap_cache(old_ce_phys, PAGE_SIZE); + old_ce = memremap(old_ce_phys, PAGE_SIZE, + MEMREMAP_WB); if (!old_ce) goto out; @@ -2870,7 +2946,7 @@ static int copy_context_table(struct intel_iommu *iommu, } /* Now copy the context entry */ - memcpy_fromio(&ce, old_ce + idx, sizeof(ce)); + memcpy(&ce, old_ce + idx, sizeof(ce)); if (!__context_present(&ce)) continue; @@ -2906,7 +2982,7 @@ static int copy_context_table(struct intel_iommu *iommu, __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE); out_unmap: - iounmap(old_ce); + memunmap(old_ce); out: return ret; @@ -2914,8 +2990,8 @@ out: static int copy_translation_tables(struct intel_iommu *iommu) { - struct root_entry __iomem *old_rt; struct context_entry **ctxt_tbls; + struct root_entry *old_rt; phys_addr_t old_rt_phys; int ctxt_table_entries; unsigned long flags; @@ -2940,7 +3016,7 @@ static int copy_translation_tables(struct intel_iommu *iommu) if (!old_rt_phys) return -EINVAL; - old_rt = ioremap_cache(old_rt_phys, PAGE_SIZE); + old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB); if (!old_rt) return -ENOMEM; @@ -2989,7 +3065,7 @@ static int copy_translation_tables(struct intel_iommu *iommu) ret = 0; out_unmap: - iounmap(old_rt); + memunmap(old_rt); return ret; } @@ -3100,6 +3176,10 @@ static int __init init_dmars(void) if (!ecap_pass_through(iommu->ecap)) hw_pass_through = 0; +#ifdef CONFIG_INTEL_IOMMU_SVM + if (pasid_enabled(iommu)) + intel_svm_alloc_pasid_tables(iommu); +#endif } if (iommu_pass_through) @@ -3187,6 +3267,13 @@ domains_done: iommu_flush_write_buffer(iommu); +#ifdef CONFIG_INTEL_IOMMU_SVM + if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) { + ret = intel_svm_enable_prq(iommu); + if (ret) + goto free_iommu; + } +#endif ret = dmar_set_interrupt(iommu); if (ret) goto free_iommu; @@ -3246,7 +3333,10 @@ static struct iova *intel_alloc_iova(struct device *dev, static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev) { + struct dmar_rmrr_unit *rmrr; struct dmar_domain *domain; + struct device *i_dev; + int i, ret; domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); if (!domain) { @@ -3255,6 +3345,23 @@ static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev) return NULL; } + /* We have a new domain - setup possible RMRRs for the device */ + rcu_read_lock(); + for_each_rmrr_units(rmrr) { + for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, + i, i_dev) { + if (i_dev != dev) + continue; + + ret = domain_prepare_identity_map(dev, domain, + rmrr->base_address, + rmrr->end_address); + if (ret) + dev_err(dev, "Mapping reserved region failed\n"); + } + } + rcu_read_unlock(); + return domain; } @@ -3540,7 +3647,7 @@ static void *intel_alloc_coherent(struct device *dev, size_t size, flags |= GFP_DMA32; } - if (flags & __GFP_WAIT) { + if (gfpflags_allow_blocking(flags)) { unsigned int count = size >> PAGE_SHIFT; page = dma_alloc_from_contiguous(dev, count, order); @@ -3597,7 +3704,7 @@ static int intel_nontranslate_map_sg(struct device *hddev, for_each_sg(sglist, sg, nelems, i) { BUG_ON(!sg_page(sg)); - sg->dma_address = sg_phys(sg); + sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset; sg->dma_length = sg->length; } return nelems; @@ -4115,6 +4222,11 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) if (ret) goto out; +#ifdef CONFIG_INTEL_IOMMU_SVM + if (pasid_enabled(iommu)) + intel_svm_alloc_pasid_tables(iommu); +#endif + if (dmaru->ignored) { /* * we always have to disable PMRs or DMA may fail on this device @@ -4126,6 +4238,14 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) intel_iommu_init_qi(iommu); iommu_flush_write_buffer(iommu); + +#ifdef CONFIG_INTEL_IOMMU_SVM + if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) { + ret = intel_svm_enable_prq(iommu); + if (ret) + goto disable_iommu; + } +#endif ret = dmar_set_interrupt(iommu); if (ret) goto disable_iommu; @@ -4194,14 +4314,17 @@ int dmar_find_matched_atsr_unit(struct pci_dev *dev) dev = pci_physfn(dev); for (bus = dev->bus; bus; bus = bus->parent) { bridge = bus->self; - if (!bridge || !pci_is_pcie(bridge) || + /* If it's an integrated device, allow ATS */ + if (!bridge) + return 1; + /* Connected via non-PCIe: no ATS */ + if (!pci_is_pcie(bridge) || pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) return 0; + /* If we found the root port, look it up in the ATSR */ if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) break; } - if (!bridge) - return 0; rcu_read_lock(); list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) { @@ -4865,6 +4988,114 @@ static void intel_iommu_remove_device(struct device *dev) iommu_device_unlink(iommu->iommu_dev, dev); } +#ifdef CONFIG_INTEL_IOMMU_SVM +int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev) +{ + struct device_domain_info *info; + struct context_entry *context; + struct dmar_domain *domain; + unsigned long flags; + u64 ctx_lo; + int ret; + + domain = get_valid_domain_for_dev(sdev->dev); + if (!domain) + return -EINVAL; + + spin_lock_irqsave(&device_domain_lock, flags); + spin_lock(&iommu->lock); + + ret = -EINVAL; + info = sdev->dev->archdata.iommu; + if (!info || !info->pasid_supported) + goto out; + + context = iommu_context_addr(iommu, info->bus, info->devfn, 0); + if (WARN_ON(!context)) + goto out; + + ctx_lo = context[0].lo; + + sdev->did = domain->iommu_did[iommu->seq_id]; + sdev->sid = PCI_DEVID(info->bus, info->devfn); + + if (!(ctx_lo & CONTEXT_PASIDE)) { + context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table); + context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | ecap_pss(iommu->ecap); + wmb(); + /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both + * extended to permit requests-with-PASID if the PASIDE bit + * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH, + * however, the PASIDE bit is ignored and requests-with-PASID + * are unconditionally blocked. Which makes less sense. + * So convert from CONTEXT_TT_PASS_THROUGH to one of the new + * "guest mode" translation types depending on whether ATS + * is available or not. Annoyingly, we can't use the new + * modes *unless* PASIDE is set. */ + if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) { + ctx_lo &= ~CONTEXT_TT_MASK; + if (info->ats_supported) + ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2; + else + ctx_lo |= CONTEXT_TT_PT_PASID << 2; + } + ctx_lo |= CONTEXT_PASIDE; + if (iommu->pasid_state_table) + ctx_lo |= CONTEXT_DINVE; + if (info->pri_supported) + ctx_lo |= CONTEXT_PRS; + context[0].lo = ctx_lo; + wmb(); + iommu->flush.flush_context(iommu, sdev->did, sdev->sid, + DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL); + } + + /* Enable PASID support in the device, if it wasn't already */ + if (!info->pasid_enabled) + iommu_enable_dev_iotlb(info); + + if (info->ats_enabled) { + sdev->dev_iotlb = 1; + sdev->qdep = info->ats_qdep; + if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) + sdev->qdep = 0; + } + ret = 0; + + out: + spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&device_domain_lock, flags); + + return ret; +} + +struct intel_iommu *intel_svm_device_to_iommu(struct device *dev) +{ + struct intel_iommu *iommu; + u8 bus, devfn; + + if (iommu_dummy(dev)) { + dev_warn(dev, + "No IOMMU translation for device; cannot enable SVM\n"); + return NULL; + } + + iommu = device_to_iommu(dev, &bus, &devfn); + if ((!iommu)) { + dev_err(dev, "No IOMMU for device; cannot enable SVM\n"); + return NULL; + } + + if (!iommu->pasid_table) { + dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n"); + return NULL; + } + + return iommu; +} +#endif /* CONFIG_INTEL_IOMMU_SVM */ + static const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, @@ -4877,6 +5108,7 @@ static const struct iommu_ops intel_iommu_ops = { .iova_to_phys = intel_iommu_iova_to_phys, .add_device = intel_iommu_add_device, .remove_device = intel_iommu_remove_device, + .device_group = pci_device_group, .pgsize_bitmap = INTEL_IOMMU_PGSIZES, }; diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c new file mode 100644 index 000000000..50464833d --- /dev/null +++ b/drivers/iommu/intel-svm.c @@ -0,0 +1,622 @@ +/* + * Copyright © 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * Authors: David Woodhouse <dwmw2@infradead.org> + */ + +#include <linux/intel-iommu.h> +#include <linux/mmu_notifier.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/intel-svm.h> +#include <linux/rculist.h> +#include <linux/pci.h> +#include <linux/pci-ats.h> +#include <linux/dmar.h> +#include <linux/interrupt.h> + +static irqreturn_t prq_event_thread(int irq, void *d); + +struct pasid_entry { + u64 val; +}; + +struct pasid_state_entry { + u64 val; +}; + +int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu) +{ + struct page *pages; + int order; + + order = ecap_pss(iommu->ecap) + 7 - PAGE_SHIFT; + if (order < 0) + order = 0; + + pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!pages) { + pr_warn("IOMMU: %s: Failed to allocate PASID table\n", + iommu->name); + return -ENOMEM; + } + iommu->pasid_table = page_address(pages); + pr_info("%s: Allocated order %d PASID table.\n", iommu->name, order); + + if (ecap_dis(iommu->ecap)) { + pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); + if (pages) + iommu->pasid_state_table = page_address(pages); + else + pr_warn("IOMMU: %s: Failed to allocate PASID state table\n", + iommu->name); + } + + idr_init(&iommu->pasid_idr); + + return 0; +} + +int intel_svm_free_pasid_tables(struct intel_iommu *iommu) +{ + int order; + + order = ecap_pss(iommu->ecap) + 7 - PAGE_SHIFT; + if (order < 0) + order = 0; + + if (iommu->pasid_table) { + free_pages((unsigned long)iommu->pasid_table, order); + iommu->pasid_table = NULL; + } + if (iommu->pasid_state_table) { + free_pages((unsigned long)iommu->pasid_state_table, order); + iommu->pasid_state_table = NULL; + } + idr_destroy(&iommu->pasid_idr); + return 0; +} + +#define PRQ_ORDER 0 + +int intel_svm_enable_prq(struct intel_iommu *iommu) +{ + struct page *pages; + int irq, ret; + + pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); + if (!pages) { + pr_warn("IOMMU: %s: Failed to allocate page request queue\n", + iommu->name); + return -ENOMEM; + } + iommu->prq = page_address(pages); + + irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu); + if (irq <= 0) { + pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n", + iommu->name); + ret = -EINVAL; + err: + free_pages((unsigned long)iommu->prq, PRQ_ORDER); + iommu->prq = NULL; + return ret; + } + iommu->pr_irq = irq; + + snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id); + + ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT, + iommu->prq_name, iommu); + if (ret) { + pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n", + iommu->name); + dmar_free_hwirq(irq); + goto err; + } + dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); + dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); + dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER); + + return 0; +} + +int intel_svm_finish_prq(struct intel_iommu *iommu) +{ + dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); + dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); + dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL); + + free_irq(iommu->pr_irq, iommu); + dmar_free_hwirq(iommu->pr_irq); + iommu->pr_irq = 0; + + free_pages((unsigned long)iommu->prq, PRQ_ORDER); + iommu->prq = NULL; + + return 0; +} + +static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev, + unsigned long address, unsigned long pages, int ih, int gl) +{ + struct qi_desc desc; + + if (pages == -1) { + /* For global kernel pages we have to flush them in *all* PASIDs + * because that's the only option the hardware gives us. Despite + * the fact that they are actually only accessible through one. */ + if (gl) + desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) | + QI_EIOTLB_GRAN(QI_GRAN_ALL_ALL) | QI_EIOTLB_TYPE; + else + desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) | + QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE; + desc.high = 0; + } else { + int mask = ilog2(__roundup_pow_of_two(pages)); + + desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) | + QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE; + desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(gl) | + QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask); + } + qi_submit_sync(&desc, svm->iommu); + + if (sdev->dev_iotlb) { + desc.low = QI_DEV_EIOTLB_PASID(svm->pasid) | QI_DEV_EIOTLB_SID(sdev->sid) | + QI_DEV_EIOTLB_QDEP(sdev->qdep) | QI_DEIOTLB_TYPE; + if (pages == -1) { + desc.high = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) | QI_DEV_EIOTLB_SIZE; + } else if (pages > 1) { + /* The least significant zero bit indicates the size. So, + * for example, an "address" value of 0x12345f000 will + * flush from 0x123440000 to 0x12347ffff (256KiB). */ + unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT); + unsigned long mask = __rounddown_pow_of_two(address ^ last);; + + desc.high = QI_DEV_EIOTLB_ADDR((address & ~mask) | (mask - 1)) | QI_DEV_EIOTLB_SIZE; + } else { + desc.high = QI_DEV_EIOTLB_ADDR(address); + } + qi_submit_sync(&desc, svm->iommu); + } +} + +static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, + unsigned long pages, int ih, int gl) +{ + struct intel_svm_dev *sdev; + + /* Try deferred invalidate if available */ + if (svm->iommu->pasid_state_table && + !cmpxchg64(&svm->iommu->pasid_state_table[svm->pasid].val, 0, 1ULL << 63)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(sdev, &svm->devs, list) + intel_flush_svm_range_dev(svm, sdev, address, pages, ih, gl); + rcu_read_unlock(); +} + +static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm, + unsigned long address, pte_t pte) +{ + struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); + + intel_flush_svm_range(svm, address, 1, 1, 0); +} + +static void intel_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm, + unsigned long address) +{ + struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); + + intel_flush_svm_range(svm, address, 1, 1, 0); +} + +/* Pages have been freed at this point */ +static void intel_invalidate_range(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); + + intel_flush_svm_range(svm, start, + (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0, 0); +} + + +static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev, int pasid) +{ + struct qi_desc desc; + + desc.high = 0; + desc.low = QI_PC_TYPE | QI_PC_DID(sdev->did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid); + + qi_submit_sync(&desc, svm->iommu); +} + +static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) +{ + struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); + + svm->iommu->pasid_table[svm->pasid].val = 0; + + /* There's no need to do any flush because we can't get here if there + * are any devices left anyway. */ + WARN_ON(!list_empty(&svm->devs)); +} + +static const struct mmu_notifier_ops intel_mmuops = { + .release = intel_mm_release, + .change_pte = intel_change_pte, + .invalidate_page = intel_invalidate_page, + .invalidate_range = intel_invalidate_range, +}; + +static DEFINE_MUTEX(pasid_mutex); + +int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops) +{ + struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); + struct intel_svm_dev *sdev; + struct intel_svm *svm = NULL; + struct mm_struct *mm = NULL; + int pasid_max; + int ret; + + if (WARN_ON(!iommu)) + return -EINVAL; + + if (dev_is_pci(dev)) { + pasid_max = pci_max_pasids(to_pci_dev(dev)); + if (pasid_max < 0) + return -EINVAL; + } else + pasid_max = 1 << 20; + + if ((flags & SVM_FLAG_SUPERVISOR_MODE)) { + if (!ecap_srs(iommu->ecap)) + return -EINVAL; + } else if (pasid) { + mm = get_task_mm(current); + BUG_ON(!mm); + } + + mutex_lock(&pasid_mutex); + if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) { + int i; + + idr_for_each_entry(&iommu->pasid_idr, svm, i) { + if (svm->mm != mm || + (svm->flags & SVM_FLAG_PRIVATE_PASID)) + continue; + + if (svm->pasid >= pasid_max) { + dev_warn(dev, + "Limited PASID width. Cannot use existing PASID %d\n", + svm->pasid); + ret = -ENOSPC; + goto out; + } + + list_for_each_entry(sdev, &svm->devs, list) { + if (dev == sdev->dev) { + if (sdev->ops != ops) { + ret = -EBUSY; + goto out; + } + sdev->users++; + goto success; + } + } + + break; + } + } + + sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); + if (!sdev) { + ret = -ENOMEM; + goto out; + } + sdev->dev = dev; + + ret = intel_iommu_enable_pasid(iommu, sdev); + if (ret || !pasid) { + /* If they don't actually want to assign a PASID, this is + * just an enabling check/preparation. */ + kfree(sdev); + goto out; + } + /* Finish the setup now we know we're keeping it */ + sdev->users = 1; + sdev->ops = ops; + init_rcu_head(&sdev->rcu); + + if (!svm) { + svm = kzalloc(sizeof(*svm), GFP_KERNEL); + if (!svm) { + ret = -ENOMEM; + kfree(sdev); + goto out; + } + svm->iommu = iommu; + + if (pasid_max > 2 << ecap_pss(iommu->ecap)) + pasid_max = 2 << ecap_pss(iommu->ecap); + + /* Do not use PASID 0 in caching mode (virtualised IOMMU) */ + ret = idr_alloc(&iommu->pasid_idr, svm, + !!cap_caching_mode(iommu->cap), + pasid_max - 1, GFP_KERNEL); + if (ret < 0) { + kfree(svm); + goto out; + } + svm->pasid = ret; + svm->notifier.ops = &intel_mmuops; + svm->mm = mm; + svm->flags = flags; + INIT_LIST_HEAD_RCU(&svm->devs); + ret = -ENOMEM; + if (mm) { + ret = mmu_notifier_register(&svm->notifier, mm); + if (ret) { + idr_remove(&svm->iommu->pasid_idr, svm->pasid); + kfree(svm); + kfree(sdev); + goto out; + } + iommu->pasid_table[svm->pasid].val = (u64)__pa(mm->pgd) | 1; + mm = NULL; + } else + iommu->pasid_table[svm->pasid].val = (u64)__pa(init_mm.pgd) | 1 | (1ULL << 11); + wmb(); + /* In caching mode, we still have to flush with PASID 0 when + * a PASID table entry becomes present. Not entirely clear + * *why* that would be the case — surely we could just issue + * a flush with the PASID value that we've changed? The PASID + * is the index into the table, after all. It's not like domain + * IDs in the case of the equivalent context-entry change in + * caching mode. And for that matter it's not entirely clear why + * a VMM would be in the business of caching the PASID table + * anyway. Surely that can be left entirely to the guest? */ + if (cap_caching_mode(iommu->cap)) + intel_flush_pasid_dev(svm, sdev, 0); + } + list_add_rcu(&sdev->list, &svm->devs); + + success: + *pasid = svm->pasid; + ret = 0; + out: + mutex_unlock(&pasid_mutex); + if (mm) + mmput(mm); + return ret; +} +EXPORT_SYMBOL_GPL(intel_svm_bind_mm); + +int intel_svm_unbind_mm(struct device *dev, int pasid) +{ + struct intel_svm_dev *sdev; + struct intel_iommu *iommu; + struct intel_svm *svm; + int ret = -EINVAL; + + mutex_lock(&pasid_mutex); + iommu = intel_svm_device_to_iommu(dev); + if (!iommu || !iommu->pasid_table) + goto out; + + svm = idr_find(&iommu->pasid_idr, pasid); + if (!svm) + goto out; + + list_for_each_entry(sdev, &svm->devs, list) { + if (dev == sdev->dev) { + ret = 0; + sdev->users--; + if (!sdev->users) { + list_del_rcu(&sdev->list); + /* Flush the PASID cache and IOTLB for this device. + * Note that we do depend on the hardware *not* using + * the PASID any more. Just as we depend on other + * devices never using PASIDs that they have no right + * to use. We have a *shared* PASID table, because it's + * large and has to be physically contiguous. So it's + * hard to be as defensive as we might like. */ + intel_flush_pasid_dev(svm, sdev, svm->pasid); + intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm); + kfree_rcu(sdev, rcu); + + if (list_empty(&svm->devs)) { + mmu_notifier_unregister(&svm->notifier, svm->mm); + + idr_remove(&svm->iommu->pasid_idr, svm->pasid); + if (svm->mm) + mmput(svm->mm); + /* We mandate that no page faults may be outstanding + * for the PASID when intel_svm_unbind_mm() is called. + * If that is not obeyed, subtle errors will happen. + * Let's make them less subtle... */ + memset(svm, 0x6b, sizeof(*svm)); + kfree(svm); + } + } + break; + } + } + out: + mutex_unlock(&pasid_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(intel_svm_unbind_mm); + +/* Page request queue descriptor */ +struct page_req_dsc { + u64 srr:1; + u64 bof:1; + u64 pasid_present:1; + u64 lpig:1; + u64 pasid:20; + u64 bus:8; + u64 private:23; + u64 prg_index:9; + u64 rd_req:1; + u64 wr_req:1; + u64 exe_req:1; + u64 priv_req:1; + u64 devfn:8; + u64 addr:52; +}; + +#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x10) + +static bool access_error(struct vm_area_struct *vma, struct page_req_dsc *req) +{ + unsigned long requested = 0; + + if (req->exe_req) + requested |= VM_EXEC; + + if (req->rd_req) + requested |= VM_READ; + + if (req->wr_req) + requested |= VM_WRITE; + + return (requested & ~vma->vm_flags) != 0; +} + +static irqreturn_t prq_event_thread(int irq, void *d) +{ + struct intel_iommu *iommu = d; + struct intel_svm *svm = NULL; + int head, tail, handled = 0; + + tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; + head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; + while (head != tail) { + struct intel_svm_dev *sdev; + struct vm_area_struct *vma; + struct page_req_dsc *req; + struct qi_desc resp; + int ret, result; + u64 address; + + handled = 1; + + req = &iommu->prq[head / sizeof(*req)]; + + result = QI_RESP_FAILURE; + address = (u64)req->addr << VTD_PAGE_SHIFT; + if (!req->pasid_present) { + pr_err("%s: Page request without PASID: %08llx %08llx\n", + iommu->name, ((unsigned long long *)req)[0], + ((unsigned long long *)req)[1]); + goto bad_req; + } + + if (!svm || svm->pasid != req->pasid) { + rcu_read_lock(); + svm = idr_find(&iommu->pasid_idr, req->pasid); + /* It *can't* go away, because the driver is not permitted + * to unbind the mm while any page faults are outstanding. + * So we only need RCU to protect the internal idr code. */ + rcu_read_unlock(); + + if (!svm) { + pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n", + iommu->name, req->pasid, ((unsigned long long *)req)[0], + ((unsigned long long *)req)[1]); + goto no_pasid; + } + } + + result = QI_RESP_INVALID; + /* Since we're using init_mm.pgd directly, we should never take + * any faults on kernel addresses. */ + if (!svm->mm) + goto bad_req; + down_read(&svm->mm->mmap_sem); + vma = find_extend_vma(svm->mm, address); + if (!vma || address < vma->vm_start) + goto invalid; + + if (access_error(vma, req)) + goto invalid; + + ret = handle_mm_fault(svm->mm, vma, address, + req->wr_req ? FAULT_FLAG_WRITE : 0); + if (ret & VM_FAULT_ERROR) + goto invalid; + + result = QI_RESP_SUCCESS; + invalid: + up_read(&svm->mm->mmap_sem); + bad_req: + /* Accounting for major/minor faults? */ + rcu_read_lock(); + list_for_each_entry_rcu(sdev, &svm->devs, list) { + if (sdev->sid == PCI_DEVID(req->bus, req->devfn)) + break; + } + /* Other devices can go away, but the drivers are not permitted + * to unbind while any page faults might be in flight. So it's + * OK to drop the 'lock' here now we have it. */ + rcu_read_unlock(); + + if (WARN_ON(&sdev->list == &svm->devs)) + sdev = NULL; + + if (sdev && sdev->ops && sdev->ops->fault_cb) { + int rwxp = (req->rd_req << 3) | (req->wr_req << 2) | + (req->exe_req << 1) | (req->priv_req); + sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, req->private, rwxp, result); + } + /* We get here in the error case where the PASID lookup failed, + and these can be NULL. Do not use them below this point! */ + sdev = NULL; + svm = NULL; + no_pasid: + if (req->lpig) { + /* Page Group Response */ + resp.low = QI_PGRP_PASID(req->pasid) | + QI_PGRP_DID((req->bus << 8) | req->devfn) | + QI_PGRP_PASID_P(req->pasid_present) | + QI_PGRP_RESP_TYPE; + resp.high = QI_PGRP_IDX(req->prg_index) | + QI_PGRP_PRIV(req->private) | QI_PGRP_RESP_CODE(result); + + qi_submit_sync(&resp, iommu); + } else if (req->srr) { + /* Page Stream Response */ + resp.low = QI_PSTRM_IDX(req->prg_index) | + QI_PSTRM_PRIV(req->private) | QI_PSTRM_BUS(req->bus) | + QI_PSTRM_PASID(req->pasid) | QI_PSTRM_RESP_TYPE; + resp.high = QI_PSTRM_ADDR(address) | QI_PSTRM_DEVFN(req->devfn) | + QI_PSTRM_RESP_CODE(result); + + qi_submit_sync(&resp, iommu); + } + + head = (head + sizeof(*req)) & PRQ_RING_MASK; + } + + dmar_writeq(iommu->reg + DMAR_PQH_REG, tail); + + return IRQ_RETVAL(handled); +} diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 9ec4e0d94..1fae18816 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -169,8 +169,26 @@ static int modify_irte(struct irq_2_iommu *irq_iommu, index = irq_iommu->irte_index + irq_iommu->sub_handle; irte = &iommu->ir_table->base[index]; - set_64bit(&irte->low, irte_modified->low); - set_64bit(&irte->high, irte_modified->high); +#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) + if ((irte->pst == 1) || (irte_modified->pst == 1)) { + bool ret; + + ret = cmpxchg_double(&irte->low, &irte->high, + irte->low, irte->high, + irte_modified->low, irte_modified->high); + /* + * We use cmpxchg16 to atomically update the 128-bit IRTE, + * and it cannot be updated by the hardware or other processors + * behind us, so the return value of cmpxchg16 should be the + * same as the old value. + */ + WARN_ON(!ret); + } else +#endif + { + set_64bit(&irte->low, irte_modified->low); + set_64bit(&irte->high, irte_modified->high); + } __iommu_flush_cache(iommu, irte, sizeof(*irte)); rc = qi_flush_iec(iommu, index, 0); @@ -384,7 +402,7 @@ static int set_msi_sid(struct irte *irte, struct pci_dev *dev) static int iommu_load_old_irte(struct intel_iommu *iommu) { - struct irte __iomem *old_ir_table; + struct irte *old_ir_table; phys_addr_t irt_phys; unsigned int i; size_t size; @@ -408,12 +426,12 @@ static int iommu_load_old_irte(struct intel_iommu *iommu) size = INTR_REMAP_TABLE_ENTRIES*sizeof(struct irte); /* Map the old IR table */ - old_ir_table = ioremap_cache(irt_phys, size); + old_ir_table = memremap(irt_phys, size, MEMREMAP_WB); if (!old_ir_table) return -ENOMEM; /* Copy data over */ - memcpy_fromio(iommu->ir_table->base, old_ir_table, size); + memcpy(iommu->ir_table->base, old_ir_table, size); __iommu_flush_cache(iommu, iommu->ir_table->base, size); @@ -426,7 +444,7 @@ static int iommu_load_old_irte(struct intel_iommu *iommu) bitmap_set(iommu->ir_table->bitmap, i, 1); } - iounmap(old_ir_table); + memunmap(old_ir_table); return 0; } @@ -672,7 +690,7 @@ static int __init intel_prepare_irq_remapping(void) if (!dmar_ir_support()) return -ENODEV; - if (parse_ioapics_under_ir() != 1) { + if (parse_ioapics_under_ir()) { pr_info("Not enabling interrupt remapping\n"); goto error; } @@ -727,7 +745,16 @@ static inline void set_irq_posting_cap(void) struct intel_iommu *iommu; if (!disable_irq_post) { - intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP; + /* + * If IRTE is in posted format, the 'pda' field goes across the + * 64-bit boundary, we need use cmpxchg16b to atomically update + * it. We only expose posted-interrupt when X86_FEATURE_CX16 + * is supported. Actually, hardware platforms supporting PI + * should have X86_FEATURE_CX16 support, this has been confirmed + * with Intel hardware guys. + */ + if ( cpu_has_cx16 ) + intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP; for_each_iommu(iommu, drhd) if (!cap_pi_support(iommu->cap)) { @@ -907,16 +934,21 @@ static int __init parse_ioapics_under_ir(void) bool ir_supported = false; int ioapic_idx; - for_each_iommu(iommu, drhd) - if (ecap_ir_support(iommu->ecap)) { - if (ir_parse_ioapic_hpet_scope(drhd->hdr, iommu)) - return -1; + for_each_iommu(iommu, drhd) { + int ret; - ir_supported = true; - } + if (!ecap_ir_support(iommu->ecap)) + continue; + + ret = ir_parse_ioapic_hpet_scope(drhd->hdr, iommu); + if (ret) + return ret; + + ir_supported = true; + } if (!ir_supported) - return 0; + return -ENODEV; for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) { int ioapic_id = mpc_ioapic_id(ioapic_idx); @@ -928,7 +960,7 @@ static int __init parse_ioapics_under_ir(void) } } - return 1; + return 0; } static int __init ir_dev_scope_init(void) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 049df495c..0e3b0092e 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -728,16 +728,35 @@ static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) } /* + * Generic device_group call-back function. It just allocates one + * iommu-group per device. + */ +struct iommu_group *generic_device_group(struct device *dev) +{ + struct iommu_group *group; + + group = iommu_group_alloc(); + if (IS_ERR(group)) + return NULL; + + return group; +} + +/* * Use standard PCI bus topology, isolation features, and DMA alias quirks * to find or create an IOMMU group for a device. */ -static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev) +struct iommu_group *pci_device_group(struct device *dev) { + struct pci_dev *pdev = to_pci_dev(dev); struct group_for_pci_data data; struct pci_bus *bus; struct iommu_group *group = NULL; u64 devfns[4] = { 0 }; + if (WARN_ON(!dev_is_pci(dev))) + return ERR_PTR(-EINVAL); + /* * Find the upstream DMA alias for the device. A device must not * be aliased due to topology in order to have its own IOMMU group. @@ -791,14 +810,6 @@ static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev) if (IS_ERR(group)) return NULL; - /* - * Try to allocate a default domain - needs support from the - * IOMMU driver. - */ - group->default_domain = __iommu_domain_alloc(pdev->dev.bus, - IOMMU_DOMAIN_DMA); - group->domain = group->default_domain; - return group; } @@ -814,6 +825,7 @@ static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev) */ struct iommu_group *iommu_group_get_for_dev(struct device *dev) { + const struct iommu_ops *ops = dev->bus->iommu_ops; struct iommu_group *group; int ret; @@ -821,14 +833,24 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev) if (group) return group; - if (!dev_is_pci(dev)) - return ERR_PTR(-EINVAL); + group = ERR_PTR(-EINVAL); - group = iommu_group_get_for_pci_dev(to_pci_dev(dev)); + if (ops && ops->device_group) + group = ops->device_group(dev); if (IS_ERR(group)) return group; + /* + * Try to allocate a default domain - needs support from the + * IOMMU driver. + */ + if (!group->default_domain) { + group->default_domain = __iommu_domain_alloc(dev->bus, + IOMMU_DOMAIN_DMA); + group->domain = group->default_domain; + } + ret = iommu_group_add_device(group, dev); if (ret) { iommu_group_put(group); @@ -1408,7 +1430,7 @@ size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova, min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap); for_each_sg(sg, s, nents, i) { - phys_addr_t phys = sg_phys(s); + phys_addr_t phys = page_to_phys(sg_page(s)) + s->offset; /* * We are mapping on IOMMU page boundaries, so offset within diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 8cf605fa9..dfb868e2d 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -295,7 +295,7 @@ static struct iommu_gather_ops ipmmu_gather_ops = { static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) { - phys_addr_t ttbr; + u64 ttbr; /* * Allocate the page table operations. diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 913455a5f..8adaaeae3 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -22,7 +22,7 @@ int irq_remap_broken; int disable_sourceid_checking; int no_x2apic_optout; -int disable_irq_post = 1; +int disable_irq_post = 0; static int disable_irq_remap; static struct irq_remap_ops *remap_ops; @@ -58,14 +58,18 @@ static __init int setup_irqremap(char *str) return -EINVAL; while (*str) { - if (!strncmp(str, "on", 2)) + if (!strncmp(str, "on", 2)) { disable_irq_remap = 0; - else if (!strncmp(str, "off", 3)) + disable_irq_post = 0; + } else if (!strncmp(str, "off", 3)) { disable_irq_remap = 1; - else if (!strncmp(str, "nosid", 5)) + disable_irq_post = 1; + } else if (!strncmp(str, "nosid", 5)) disable_sourceid_checking = 1; else if (!strncmp(str, "no_x2apic_optout", 16)) no_x2apic_optout = 1; + else if (!strncmp(str, "nopost", 6)) + disable_irq_post = 1; str += strcspn(str, ","); while (*str == ',') diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 36d0033c2..3dc5b65f3 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -26,6 +26,8 @@ #include <linux/of_iommu.h> #include <linux/of_irq.h> #include <linux/of_platform.h> +#include <linux/regmap.h> +#include <linux/mfd/syscon.h> #include <asm/cacheflush.h> @@ -112,6 +114,18 @@ void omap_iommu_restore_ctx(struct device *dev) } EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx); +static void dra7_cfg_dspsys_mmu(struct omap_iommu *obj, bool enable) +{ + u32 val, mask; + + if (!obj->syscfg) + return; + + mask = (1 << (obj->id * DSP_SYS_MMU_CONFIG_EN_SHIFT)); + val = enable ? mask : 0; + regmap_update_bits(obj->syscfg, DSP_SYS_MMU_CONFIG, mask, val); +} + static void __iommu_set_twl(struct omap_iommu *obj, bool on) { u32 l = iommu_read_reg(obj, MMU_CNTL); @@ -147,6 +161,8 @@ static int omap2_iommu_enable(struct omap_iommu *obj) iommu_write_reg(obj, pa, MMU_TTB); + dra7_cfg_dspsys_mmu(obj, true); + if (obj->has_bus_err_back) iommu_write_reg(obj, MMU_GP_REG_BUS_ERR_BACK_EN, MMU_GP_REG); @@ -161,6 +177,7 @@ static void omap2_iommu_disable(struct omap_iommu *obj) l &= ~MMU_CNTL_MASK; iommu_write_reg(obj, l, MMU_CNTL); + dra7_cfg_dspsys_mmu(obj, false); dev_dbg(obj->dev, "%s is shutting down\n", obj->name); } @@ -864,6 +881,42 @@ static void omap_iommu_detach(struct omap_iommu *obj) dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name); } +static int omap_iommu_dra7_get_dsp_system_cfg(struct platform_device *pdev, + struct omap_iommu *obj) +{ + struct device_node *np = pdev->dev.of_node; + int ret; + + if (!of_device_is_compatible(np, "ti,dra7-dsp-iommu")) + return 0; + + if (!of_property_read_bool(np, "ti,syscon-mmuconfig")) { + dev_err(&pdev->dev, "ti,syscon-mmuconfig property is missing\n"); + return -EINVAL; + } + + obj->syscfg = + syscon_regmap_lookup_by_phandle(np, "ti,syscon-mmuconfig"); + if (IS_ERR(obj->syscfg)) { + /* can fail with -EPROBE_DEFER */ + ret = PTR_ERR(obj->syscfg); + return ret; + } + + if (of_property_read_u32_index(np, "ti,syscon-mmuconfig", 1, + &obj->id)) { + dev_err(&pdev->dev, "couldn't get the IOMMU instance id within subsystem\n"); + return -EINVAL; + } + + if (obj->id != 0 && obj->id != 1) { + dev_err(&pdev->dev, "invalid IOMMU instance id\n"); + return -EINVAL; + } + + return 0; +} + /* * OMAP Device MMU(IOMMU) detection */ @@ -907,6 +960,10 @@ static int omap_iommu_probe(struct platform_device *pdev) if (IS_ERR(obj->regbase)) return PTR_ERR(obj->regbase); + err = omap_iommu_dra7_get_dsp_system_cfg(pdev, obj); + if (err) + return err; + irq = platform_get_irq(pdev, 0); if (irq < 0) return -ENODEV; @@ -943,6 +1000,7 @@ static const struct of_device_id omap_iommu_of_match[] = { { .compatible = "ti,omap2-iommu" }, { .compatible = "ti,omap4-iommu" }, { .compatible = "ti,dra7-iommu" }, + { .compatible = "ti,dra7-dsp-iommu" }, {}, }; diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h index a656df2f9..59628e501 100644 --- a/drivers/iommu/omap-iommu.h +++ b/drivers/iommu/omap-iommu.h @@ -30,6 +30,7 @@ struct iotlb_entry { struct omap_iommu { const char *name; void __iomem *regbase; + struct regmap *syscfg; struct device *dev; struct iommu_domain *domain; struct dentry *debug_dir; @@ -48,6 +49,7 @@ struct omap_iommu { void *ctx; /* iommu context: registres saved area */ int has_bus_err_back; + u32 id; }; struct cr_regs { @@ -159,6 +161,13 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev) ((pgsz) == MMU_CAM_PGSZ_4K) ? 0xfffff000 : 0) /* + * DSP_SYSTEM registers and bit definitions (applicable only for DRA7xx DSP) + */ +#define DSP_SYS_REVISION 0x00 +#define DSP_SYS_MMU_CONFIG 0x18 +#define DSP_SYS_MMU_CONFIG_EN_SHIFT 4 + +/* * utilities for super page(16MB, 1MB, 64KB and 4KB) */ diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c new file mode 100644 index 000000000..471ee36b9 --- /dev/null +++ b/drivers/iommu/s390-iommu.c @@ -0,0 +1,356 @@ +/* + * IOMMU API for s390 PCI devices + * + * Copyright IBM Corp. 2015 + * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> + */ + +#include <linux/pci.h> +#include <linux/iommu.h> +#include <linux/iommu-helper.h> +#include <linux/pci.h> +#include <linux/sizes.h> +#include <asm/pci_dma.h> + +/* + * Physically contiguous memory regions can be mapped with 4 KiB alignment, + * we allow all page sizes that are an order of 4KiB (no special large page + * support so far). + */ +#define S390_IOMMU_PGSIZES (~0xFFFUL) + +struct s390_domain { + struct iommu_domain domain; + struct list_head devices; + unsigned long *dma_table; + spinlock_t dma_table_lock; + spinlock_t list_lock; +}; + +struct s390_domain_device { + struct list_head list; + struct zpci_dev *zdev; +}; + +static struct s390_domain *to_s390_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct s390_domain, domain); +} + +static bool s390_iommu_capable(enum iommu_cap cap) +{ + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + return true; + case IOMMU_CAP_INTR_REMAP: + return true; + default: + return false; + } +} + +struct iommu_domain *s390_domain_alloc(unsigned domain_type) +{ + struct s390_domain *s390_domain; + + if (domain_type != IOMMU_DOMAIN_UNMANAGED) + return NULL; + + s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL); + if (!s390_domain) + return NULL; + + s390_domain->dma_table = dma_alloc_cpu_table(); + if (!s390_domain->dma_table) { + kfree(s390_domain); + return NULL; + } + + spin_lock_init(&s390_domain->dma_table_lock); + spin_lock_init(&s390_domain->list_lock); + INIT_LIST_HEAD(&s390_domain->devices); + + return &s390_domain->domain; +} + +void s390_domain_free(struct iommu_domain *domain) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + + dma_cleanup_tables(s390_domain->dma_table); + kfree(s390_domain); +} + +static int s390_iommu_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + struct zpci_dev *zdev = to_pci_dev(dev)->sysdata; + struct s390_domain_device *domain_device; + unsigned long flags; + int rc; + + if (!zdev) + return -ENODEV; + + domain_device = kzalloc(sizeof(*domain_device), GFP_KERNEL); + if (!domain_device) + return -ENOMEM; + + if (zdev->dma_table) + zpci_dma_exit_device(zdev); + + zdev->dma_table = s390_domain->dma_table; + rc = zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET, + zdev->start_dma + zdev->iommu_size - 1, + (u64) zdev->dma_table); + if (rc) + goto out_restore; + + spin_lock_irqsave(&s390_domain->list_lock, flags); + /* First device defines the DMA range limits */ + if (list_empty(&s390_domain->devices)) { + domain->geometry.aperture_start = zdev->start_dma; + domain->geometry.aperture_end = zdev->end_dma; + domain->geometry.force_aperture = true; + /* Allow only devices with identical DMA range limits */ + } else if (domain->geometry.aperture_start != zdev->start_dma || + domain->geometry.aperture_end != zdev->end_dma) { + rc = -EINVAL; + spin_unlock_irqrestore(&s390_domain->list_lock, flags); + goto out_restore; + } + domain_device->zdev = zdev; + zdev->s390_domain = s390_domain; + list_add(&domain_device->list, &s390_domain->devices); + spin_unlock_irqrestore(&s390_domain->list_lock, flags); + + return 0; + +out_restore: + zpci_dma_init_device(zdev); + kfree(domain_device); + + return rc; +} + +static void s390_iommu_detach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + struct zpci_dev *zdev = to_pci_dev(dev)->sysdata; + struct s390_domain_device *domain_device, *tmp; + unsigned long flags; + int found = 0; + + if (!zdev) + return; + + spin_lock_irqsave(&s390_domain->list_lock, flags); + list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices, + list) { + if (domain_device->zdev == zdev) { + list_del(&domain_device->list); + kfree(domain_device); + found = 1; + break; + } + } + spin_unlock_irqrestore(&s390_domain->list_lock, flags); + + if (found) { + zdev->s390_domain = NULL; + zpci_unregister_ioat(zdev, 0); + zpci_dma_init_device(zdev); + } +} + +static int s390_iommu_add_device(struct device *dev) +{ + struct iommu_group *group; + int rc; + + group = iommu_group_get(dev); + if (!group) { + group = iommu_group_alloc(); + if (IS_ERR(group)) + return PTR_ERR(group); + } + + rc = iommu_group_add_device(group, dev); + iommu_group_put(group); + + return rc; +} + +static void s390_iommu_remove_device(struct device *dev) +{ + struct zpci_dev *zdev = to_pci_dev(dev)->sysdata; + struct iommu_domain *domain; + + /* + * This is a workaround for a scenario where the IOMMU API common code + * "forgets" to call the detach_dev callback: After binding a device + * to vfio-pci and completing the VFIO_SET_IOMMU ioctl (which triggers + * the attach_dev), removing the device via + * "echo 1 > /sys/bus/pci/devices/.../remove" won't trigger detach_dev, + * only remove_device will be called via the BUS_NOTIFY_REMOVED_DEVICE + * notifier. + * + * So let's call detach_dev from here if it hasn't been called before. + */ + if (zdev && zdev->s390_domain) { + domain = iommu_get_domain_for_dev(dev); + if (domain) + s390_iommu_detach_device(domain, dev); + } + + iommu_group_remove_device(dev); +} + +static int s390_iommu_update_trans(struct s390_domain *s390_domain, + unsigned long pa, dma_addr_t dma_addr, + size_t size, int flags) +{ + struct s390_domain_device *domain_device; + u8 *page_addr = (u8 *) (pa & PAGE_MASK); + dma_addr_t start_dma_addr = dma_addr; + unsigned long irq_flags, nr_pages, i; + unsigned long *entry; + int rc = 0; + + if (dma_addr < s390_domain->domain.geometry.aperture_start || + dma_addr + size > s390_domain->domain.geometry.aperture_end) + return -EINVAL; + + nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + if (!nr_pages) + return 0; + + spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags); + for (i = 0; i < nr_pages; i++) { + entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr); + if (!entry) { + rc = -ENOMEM; + goto undo_cpu_trans; + } + dma_update_cpu_trans(entry, page_addr, flags); + page_addr += PAGE_SIZE; + dma_addr += PAGE_SIZE; + } + + spin_lock(&s390_domain->list_lock); + list_for_each_entry(domain_device, &s390_domain->devices, list) { + rc = zpci_refresh_trans((u64) domain_device->zdev->fh << 32, + start_dma_addr, nr_pages * PAGE_SIZE); + if (rc) + break; + } + spin_unlock(&s390_domain->list_lock); + +undo_cpu_trans: + if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) { + flags = ZPCI_PTE_INVALID; + while (i-- > 0) { + page_addr -= PAGE_SIZE; + dma_addr -= PAGE_SIZE; + entry = dma_walk_cpu_trans(s390_domain->dma_table, + dma_addr); + if (!entry) + break; + dma_update_cpu_trans(entry, page_addr, flags); + } + } + spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags); + + return rc; +} + +static int s390_iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + int flags = ZPCI_PTE_VALID, rc = 0; + + if (!(prot & IOMMU_READ)) + return -EINVAL; + + if (!(prot & IOMMU_WRITE)) + flags |= ZPCI_TABLE_PROTECTED; + + rc = s390_iommu_update_trans(s390_domain, (unsigned long) paddr, iova, + size, flags); + + return rc; +} + +static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + unsigned long *sto, *pto, *rto, flags; + unsigned int rtx, sx, px; + phys_addr_t phys = 0; + + if (iova < domain->geometry.aperture_start || + iova > domain->geometry.aperture_end) + return 0; + + rtx = calc_rtx(iova); + sx = calc_sx(iova); + px = calc_px(iova); + rto = s390_domain->dma_table; + + spin_lock_irqsave(&s390_domain->dma_table_lock, flags); + if (rto && reg_entry_isvalid(rto[rtx])) { + sto = get_rt_sto(rto[rtx]); + if (sto && reg_entry_isvalid(sto[sx])) { + pto = get_st_pto(sto[sx]); + if (pto && pt_entry_isvalid(pto[px])) + phys = pto[px] & ZPCI_PTE_ADDR_MASK; + } + } + spin_unlock_irqrestore(&s390_domain->dma_table_lock, flags); + + return phys; +} + +static size_t s390_iommu_unmap(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + struct s390_domain *s390_domain = to_s390_domain(domain); + int flags = ZPCI_PTE_INVALID; + phys_addr_t paddr; + int rc; + + paddr = s390_iommu_iova_to_phys(domain, iova); + if (!paddr) + return 0; + + rc = s390_iommu_update_trans(s390_domain, (unsigned long) paddr, iova, + size, flags); + if (rc) + return 0; + + return size; +} + +static struct iommu_ops s390_iommu_ops = { + .capable = s390_iommu_capable, + .domain_alloc = s390_domain_alloc, + .domain_free = s390_domain_free, + .attach_dev = s390_iommu_attach_device, + .detach_dev = s390_iommu_detach_device, + .map = s390_iommu_map, + .unmap = s390_iommu_unmap, + .iova_to_phys = s390_iommu_iova_to_phys, + .add_device = s390_iommu_add_device, + .remove_device = s390_iommu_remove_device, + .pgsize_bitmap = S390_IOMMU_PGSIZES, +}; + +static int __init s390_iommu_init(void) +{ + return bus_set_iommu(&pci_bus_type, &s390_iommu_ops); +} +subsys_initcall(s390_iommu_init); |