summaryrefslogtreecommitdiff
path: root/drivers/iommu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu')
-rw-r--r--drivers/iommu/Kconfig25
-rw-r--r--drivers/iommu/Makefile3
-rw-r--r--drivers/iommu/amd_iommu.c175
-rw-r--r--drivers/iommu/amd_iommu_init.c122
-rw-r--r--drivers/iommu/amd_iommu_types.h15
-rw-r--r--drivers/iommu/amd_iommu_v2.c20
-rw-r--r--drivers/iommu/arm-smmu-v3.c155
-rw-r--r--drivers/iommu/arm-smmu.c132
-rw-r--r--drivers/iommu/dma-iommu.c529
-rw-r--r--drivers/iommu/dmar.c42
-rw-r--r--drivers/iommu/fsl_pamu.c2
-rw-r--r--drivers/iommu/fsl_pamu_domain.c41
-rw-r--r--drivers/iommu/intel-iommu.c392
-rw-r--r--drivers/iommu/intel-svm.c622
-rw-r--r--drivers/iommu/intel_irq_remapping.c64
-rw-r--r--drivers/iommu/iommu.c48
-rw-r--r--drivers/iommu/ipmmu-vmsa.c2
-rw-r--r--drivers/iommu/irq_remapping.c12
-rw-r--r--drivers/iommu/omap-iommu.c58
-rw-r--r--drivers/iommu/omap-iommu.h9
-rw-r--r--drivers/iommu/s390-iommu.c356
21 files changed, 2329 insertions, 495 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index cbe6a890a..b9094e9da 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -48,6 +48,13 @@ config OF_IOMMU
def_bool y
depends on OF && IOMMU_API
+# IOMMU-agnostic DMA-mapping layer
+config IOMMU_DMA
+ bool
+ depends on NEED_SG_DMA_LENGTH
+ select IOMMU_API
+ select IOMMU_IOVA
+
config FSL_PAMU
bool "Freescale IOMMU support"
depends on PPC32
@@ -134,6 +141,16 @@ config INTEL_IOMMU
and include PCI device scope covered by these DMA
remapping devices.
+config INTEL_IOMMU_SVM
+ bool "Support for Shared Virtual Memory with Intel IOMMU"
+ depends on INTEL_IOMMU && X86
+ select PCI_PASID
+ select MMU_NOTIFIER
+ help
+ Shared Virtual Memory (SVM) provides a facility for devices
+ to access DMA resources through process address space by
+ means of a Process Address Space ID (PASID).
+
config INTEL_IOMMU_DEFAULT_ON
def_bool y
prompt "Enable Intel DMA Remapping Devices by default"
@@ -361,6 +378,7 @@ config ARM_SMMU_V3
depends on ARM64 && PCI
select IOMMU_API
select IOMMU_IO_PGTABLE_LPAE
+ select GENERIC_MSI_IRQ_DOMAIN
help
Support for implementations of the ARM System MMU architecture
version 3 providing translation support to a PCIe root complex.
@@ -368,4 +386,11 @@ config ARM_SMMU_V3
Say Y here if your system includes an IOMMU device implementing
the ARM SMMUv3 architecture.
+config S390_IOMMU
+ def_bool y if S390 && PCI
+ depends on S390 && PCI
+ select IOMMU_API
+ help
+ Support for the IOMMU API for s390 PCI devices.
+
endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index c6dcc513d..68faca022 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -1,6 +1,7 @@
obj-$(CONFIG_IOMMU_API) += iommu.o
obj-$(CONFIG_IOMMU_API) += iommu-traces.o
obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
+obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
obj-$(CONFIG_IOMMU_IOVA) += iova.o
@@ -12,6 +13,7 @@ obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
obj-$(CONFIG_DMAR_TABLE) += dmar.o
obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o
+obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o
obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o
obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
@@ -23,3 +25,4 @@ obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o
obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o
obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
+obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 532e2a211..8b2be1e77 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -89,8 +89,6 @@ static struct dma_map_ops amd_iommu_dma_ops;
struct iommu_dev_data {
struct list_head list; /* For domain->dev_list */
struct list_head dev_data_list; /* For global dev_data_list */
- struct list_head alias_list; /* Link alias-groups together */
- struct iommu_dev_data *alias_data;/* The alias dev_data */
struct protection_domain *domain; /* Domain the device is bound to */
u16 devid; /* PCI Device ID */
bool iommu_v2; /* Device can make use of IOMMUv2 */
@@ -136,8 +134,6 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid)
if (!dev_data)
return NULL;
- INIT_LIST_HEAD(&dev_data->alias_list);
-
dev_data->devid = devid;
spin_lock_irqsave(&dev_data_list_lock, flags);
@@ -147,17 +143,6 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid)
return dev_data;
}
-static void free_dev_data(struct iommu_dev_data *dev_data)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&dev_data_list_lock, flags);
- list_del(&dev_data->dev_data_list);
- spin_unlock_irqrestore(&dev_data_list_lock, flags);
-
- kfree(dev_data);
-}
-
static struct iommu_dev_data *search_dev_data(u16 devid)
{
struct iommu_dev_data *dev_data;
@@ -311,73 +296,10 @@ out:
iommu_group_put(group);
}
-static int __last_alias(struct pci_dev *pdev, u16 alias, void *data)
-{
- *(u16 *)data = alias;
- return 0;
-}
-
-static u16 get_alias(struct device *dev)
-{
- struct pci_dev *pdev = to_pci_dev(dev);
- u16 devid, ivrs_alias, pci_alias;
-
- devid = get_device_id(dev);
- ivrs_alias = amd_iommu_alias_table[devid];
- pci_for_each_dma_alias(pdev, __last_alias, &pci_alias);
-
- if (ivrs_alias == pci_alias)
- return ivrs_alias;
-
- /*
- * DMA alias showdown
- *
- * The IVRS is fairly reliable in telling us about aliases, but it
- * can't know about every screwy device. If we don't have an IVRS
- * reported alias, use the PCI reported alias. In that case we may
- * still need to initialize the rlookup and dev_table entries if the
- * alias is to a non-existent device.
- */
- if (ivrs_alias == devid) {
- if (!amd_iommu_rlookup_table[pci_alias]) {
- amd_iommu_rlookup_table[pci_alias] =
- amd_iommu_rlookup_table[devid];
- memcpy(amd_iommu_dev_table[pci_alias].data,
- amd_iommu_dev_table[devid].data,
- sizeof(amd_iommu_dev_table[pci_alias].data));
- }
-
- return pci_alias;
- }
-
- pr_info("AMD-Vi: Using IVRS reported alias %02x:%02x.%d "
- "for device %s[%04x:%04x], kernel reported alias "
- "%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias),
- PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device,
- PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias),
- PCI_FUNC(pci_alias));
-
- /*
- * If we don't have a PCI DMA alias and the IVRS alias is on the same
- * bus, then the IVRS table may know about a quirk that we don't.
- */
- if (pci_alias == devid &&
- PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) {
- pdev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN;
- pdev->dma_alias_devfn = ivrs_alias & 0xff;
- pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n",
- PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias),
- dev_name(dev));
- }
-
- return ivrs_alias;
-}
-
static int iommu_init_device(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct iommu_dev_data *dev_data;
- u16 alias;
if (dev->archdata.iommu)
return 0;
@@ -386,24 +308,6 @@ static int iommu_init_device(struct device *dev)
if (!dev_data)
return -ENOMEM;
- alias = get_alias(dev);
-
- if (alias != dev_data->devid) {
- struct iommu_dev_data *alias_data;
-
- alias_data = find_dev_data(alias);
- if (alias_data == NULL) {
- pr_err("AMD-Vi: Warning: Unhandled device %s\n",
- dev_name(dev));
- free_dev_data(dev_data);
- return -ENOTSUPP;
- }
- dev_data->alias_data = alias_data;
-
- /* Add device to the alias_list */
- list_add(&dev_data->alias_list, &alias_data->alias_list);
- }
-
if (pci_iommuv2_capable(pdev)) {
struct amd_iommu *iommu;
@@ -445,9 +349,6 @@ static void iommu_uninit_device(struct device *dev)
iommu_group_remove_device(dev);
- /* Unlink from alias, it may change if another device is re-plugged */
- dev_data->alias_data = NULL;
-
/* Remove dma-ops */
dev->archdata.dma_ops = NULL;
@@ -633,7 +534,7 @@ static void iommu_poll_events(struct amd_iommu *iommu)
while (head != tail) {
iommu_print_event(iommu, iommu->evt_buf + head);
- head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
+ head = (head + EVENT_ENTRY_SIZE) % EVT_BUFFER_SIZE;
}
writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
@@ -783,7 +684,7 @@ static void copy_cmd_to_buffer(struct amd_iommu *iommu,
u8 *target;
target = iommu->cmd_buf + tail;
- tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
+ tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
/* Copy command to buffer */
memcpy(target, cmd, sizeof(*cmd));
@@ -950,15 +851,13 @@ static int iommu_queue_command_sync(struct amd_iommu *iommu,
u32 left, tail, head, next_tail;
unsigned long flags;
- WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
-
again:
spin_lock_irqsave(&iommu->lock, flags);
head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
- next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
- left = (head - next_tail) % iommu->cmd_buf_size;
+ next_tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
+ left = (head - next_tail) % CMD_BUFFER_SIZE;
if (left <= 2) {
struct iommu_cmd sync_cmd;
@@ -1114,11 +1013,15 @@ static int device_flush_iotlb(struct iommu_dev_data *dev_data,
static int device_flush_dte(struct iommu_dev_data *dev_data)
{
struct amd_iommu *iommu;
+ u16 alias;
int ret;
iommu = amd_iommu_rlookup_table[dev_data->devid];
+ alias = amd_iommu_alias_table[dev_data->devid];
ret = iommu_flush_dte(iommu, dev_data->devid);
+ if (!ret && alias != dev_data->devid)
+ ret = iommu_flush_dte(iommu, alias);
if (ret)
return ret;
@@ -1984,27 +1887,33 @@ static void do_attach(struct iommu_dev_data *dev_data,
struct protection_domain *domain)
{
struct amd_iommu *iommu;
+ u16 alias;
bool ats;
iommu = amd_iommu_rlookup_table[dev_data->devid];
+ alias = amd_iommu_alias_table[dev_data->devid];
ats = dev_data->ats.enabled;
/* Update data structures */
dev_data->domain = domain;
list_add(&dev_data->list, &domain->dev_list);
- set_dte_entry(dev_data->devid, domain, ats);
/* Do reference counting */
domain->dev_iommu[iommu->index] += 1;
domain->dev_cnt += 1;
- /* Flush the DTE entry */
+ /* Update device table */
+ set_dte_entry(dev_data->devid, domain, ats);
+ if (alias != dev_data->devid)
+ set_dte_entry(dev_data->devid, domain, ats);
+
device_flush_dte(dev_data);
}
static void do_detach(struct iommu_dev_data *dev_data)
{
struct amd_iommu *iommu;
+ u16 alias;
/*
* First check if the device is still attached. It might already
@@ -2016,6 +1925,7 @@ static void do_detach(struct iommu_dev_data *dev_data)
return;
iommu = amd_iommu_rlookup_table[dev_data->devid];
+ alias = amd_iommu_alias_table[dev_data->devid];
/* decrease reference counters */
dev_data->domain->dev_iommu[iommu->index] -= 1;
@@ -2025,6 +1935,8 @@ static void do_detach(struct iommu_dev_data *dev_data)
dev_data->domain = NULL;
list_del(&dev_data->list);
clear_dte_entry(dev_data->devid);
+ if (alias != dev_data->devid)
+ clear_dte_entry(alias);
/* Flush the DTE entry */
device_flush_dte(dev_data);
@@ -2037,29 +1949,23 @@ static void do_detach(struct iommu_dev_data *dev_data)
static int __attach_device(struct iommu_dev_data *dev_data,
struct protection_domain *domain)
{
- struct iommu_dev_data *head, *entry;
int ret;
+ /*
+ * Must be called with IRQs disabled. Warn here to detect early
+ * when its not.
+ */
+ WARN_ON(!irqs_disabled());
+
/* lock domain */
spin_lock(&domain->lock);
- head = dev_data;
-
- if (head->alias_data != NULL)
- head = head->alias_data;
-
- /* Now we have the root of the alias group, if any */
-
ret = -EBUSY;
- if (head->domain != NULL)
+ if (dev_data->domain != NULL)
goto out_unlock;
/* Attach alias group root */
- do_attach(head, domain);
-
- /* Attach other devices in the alias group */
- list_for_each_entry(entry, &head->alias_list, alias_list)
- do_attach(entry, domain);
+ do_attach(dev_data, domain);
ret = 0;
@@ -2209,26 +2115,24 @@ static int attach_device(struct device *dev,
*/
static void __detach_device(struct iommu_dev_data *dev_data)
{
- struct iommu_dev_data *head, *entry;
struct protection_domain *domain;
- unsigned long flags;
- BUG_ON(!dev_data->domain);
-
- domain = dev_data->domain;
+ /*
+ * Must be called with IRQs disabled. Warn here to detect early
+ * when its not.
+ */
+ WARN_ON(!irqs_disabled());
- spin_lock_irqsave(&domain->lock, flags);
+ if (WARN_ON(!dev_data->domain))
+ return;
- head = dev_data;
- if (head->alias_data != NULL)
- head = head->alias_data;
+ domain = dev_data->domain;
- list_for_each_entry(entry, &head->alias_list, alias_list)
- do_detach(entry);
+ spin_lock(&domain->lock);
- do_detach(head);
+ do_detach(dev_data);
- spin_unlock_irqrestore(&domain->lock, flags);
+ spin_unlock(&domain->lock);
}
/*
@@ -2764,7 +2668,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
page = alloc_pages(flag | __GFP_NOWARN, get_order(size));
if (!page) {
- if (!(flag & __GFP_WAIT))
+ if (!gfpflags_allow_blocking(flag))
return NULL;
page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
@@ -3198,6 +3102,7 @@ static const struct iommu_ops amd_iommu_ops = {
.iova_to_phys = amd_iommu_iova_to_phys,
.add_device = amd_iommu_add_device,
.remove_device = amd_iommu_remove_device,
+ .device_group = pci_device_group,
.get_dm_regions = amd_iommu_get_dm_regions,
.put_dm_regions = amd_iommu_put_dm_regions,
.pgsize_bitmap = AMD_IOMMU_PGSIZES,
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 1b066e7d1..013bdfff2 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -138,7 +138,7 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have
to handle */
LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
we find in ACPI */
-u32 amd_iommu_unmap_flush; /* if true, flush on every unmap */
+bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
system */
@@ -408,20 +408,6 @@ static inline int ivhd_entry_length(u8 *ivhd)
}
/*
- * This function reads the last device id the IOMMU has to handle from the PCI
- * capability header for this IOMMU
- */
-static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr)
-{
- u32 cap;
-
- cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
- update_last_devid(PCI_DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
-
- return 0;
-}
-
-/*
* After reading the highest device id from the IOMMU PCI capability header
* this function looks if there is a higher device id defined in the ACPI table
*/
@@ -433,14 +419,13 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
p += sizeof(*h);
end += h->length;
- find_last_devid_on_pci(PCI_BUS_NUM(h->devid),
- PCI_SLOT(h->devid),
- PCI_FUNC(h->devid),
- h->cap_ptr);
-
while (p < end) {
dev = (struct ivhd_entry *)p;
switch (dev->type) {
+ case IVHD_DEV_ALL:
+ /* Use maximum BDF value for DEV_ALL */
+ update_last_devid(0xffff);
+ break;
case IVHD_DEV_SELECT:
case IVHD_DEV_RANGE_END:
case IVHD_DEV_ALIAS:
@@ -513,17 +498,12 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table)
* write commands to that buffer later and the IOMMU will execute them
* asynchronously
*/
-static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
+static int __init alloc_command_buffer(struct amd_iommu *iommu)
{
- u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- get_order(CMD_BUFFER_SIZE));
-
- if (cmd_buf == NULL)
- return NULL;
-
- iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED;
+ iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(CMD_BUFFER_SIZE));
- return cmd_buf;
+ return iommu->cmd_buf ? 0 : -ENOMEM;
}
/*
@@ -557,27 +537,20 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu)
&entry, sizeof(entry));
amd_iommu_reset_cmd_buffer(iommu);
- iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED);
}
static void __init free_command_buffer(struct amd_iommu *iommu)
{
- free_pages((unsigned long)iommu->cmd_buf,
- get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED)));
+ free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
}
/* allocates the memory where the IOMMU will log its events to */
-static u8 * __init alloc_event_buffer(struct amd_iommu *iommu)
+static int __init alloc_event_buffer(struct amd_iommu *iommu)
{
- iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- get_order(EVT_BUFFER_SIZE));
+ iommu->evt_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(EVT_BUFFER_SIZE));
- if (iommu->evt_buf == NULL)
- return NULL;
-
- iommu->evt_buf_size = EVT_BUFFER_SIZE;
-
- return iommu->evt_buf;
+ return iommu->evt_buf ? 0 : -ENOMEM;
}
static void iommu_enable_event_buffer(struct amd_iommu *iommu)
@@ -604,15 +577,12 @@ static void __init free_event_buffer(struct amd_iommu *iommu)
}
/* allocates the memory where the IOMMU will log its events to */
-static u8 * __init alloc_ppr_log(struct amd_iommu *iommu)
+static int __init alloc_ppr_log(struct amd_iommu *iommu)
{
- iommu->ppr_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- get_order(PPR_LOG_SIZE));
-
- if (iommu->ppr_log == NULL)
- return NULL;
+ iommu->ppr_log = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(PPR_LOG_SIZE));
- return iommu->ppr_log;
+ return iommu->ppr_log ? 0 : -ENOMEM;
}
static void iommu_enable_ppr_log(struct amd_iommu *iommu)
@@ -835,20 +805,10 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
switch (e->type) {
case IVHD_DEV_ALL:
- DUMP_printk(" DEV_ALL\t\t\t first devid: %02x:%02x.%x"
- " last device %02x:%02x.%x flags: %02x\n",
- PCI_BUS_NUM(iommu->first_device),
- PCI_SLOT(iommu->first_device),
- PCI_FUNC(iommu->first_device),
- PCI_BUS_NUM(iommu->last_device),
- PCI_SLOT(iommu->last_device),
- PCI_FUNC(iommu->last_device),
- e->flags);
+ DUMP_printk(" DEV_ALL\t\t\tflags: %02x\n", e->flags);
- for (dev_i = iommu->first_device;
- dev_i <= iommu->last_device; ++dev_i)
- set_dev_entry_from_acpi(iommu, dev_i,
- e->flags, 0);
+ for (dev_i = 0; dev_i <= amd_iommu_last_bdf; ++dev_i)
+ set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0);
break;
case IVHD_DEV_SELECT:
@@ -1004,17 +964,6 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
return 0;
}
-/* Initializes the device->iommu mapping for the driver */
-static int __init init_iommu_devices(struct amd_iommu *iommu)
-{
- u32 i;
-
- for (i = iommu->first_device; i <= iommu->last_device; ++i)
- set_iommu_for_device(iommu, i);
-
- return 0;
-}
-
static void __init free_iommu_one(struct amd_iommu *iommu)
{
free_command_buffer(iommu);
@@ -1111,12 +1060,10 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
if (!iommu->mmio_base)
return -ENOMEM;
- iommu->cmd_buf = alloc_command_buffer(iommu);
- if (!iommu->cmd_buf)
+ if (alloc_command_buffer(iommu))
return -ENOMEM;
- iommu->evt_buf = alloc_event_buffer(iommu);
- if (!iommu->evt_buf)
+ if (alloc_event_buffer(iommu))
return -ENOMEM;
iommu->int_enabled = false;
@@ -1135,8 +1082,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
*/
amd_iommu_rlookup_table[iommu->devid] = NULL;
- init_iommu_devices(iommu);
-
return 0;
}
@@ -1266,11 +1211,6 @@ static int iommu_init_pci(struct amd_iommu *iommu)
pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET,
&misc);
- iommu->first_device = PCI_DEVID(MMIO_GET_BUS(range),
- MMIO_GET_FD(range));
- iommu->last_device = PCI_DEVID(MMIO_GET_BUS(range),
- MMIO_GET_LD(range));
-
if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
amd_iommu_iotlb_sup = false;
@@ -1308,11 +1248,8 @@ static int iommu_init_pci(struct amd_iommu *iommu)
amd_iommu_v2_present = true;
}
- if (iommu_feature(iommu, FEATURE_PPR)) {
- iommu->ppr_log = alloc_ppr_log(iommu);
- if (!iommu->ppr_log)
- return -ENOMEM;
- }
+ if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu))
+ return -ENOMEM;
if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
amd_iommu_np_cache = true;
@@ -1758,11 +1695,8 @@ static void __init free_on_init_error(void)
free_pages((unsigned long)irq_lookup_table,
get_order(rlookup_table_size));
- if (amd_iommu_irq_cache) {
- kmem_cache_destroy(amd_iommu_irq_cache);
- amd_iommu_irq_cache = NULL;
-
- }
+ kmem_cache_destroy(amd_iommu_irq_cache);
+ amd_iommu_irq_cache = NULL;
free_pages((unsigned long)amd_iommu_rlookup_table,
get_order(rlookup_table_size));
@@ -2201,7 +2135,7 @@ int __init amd_iommu_detect(void)
iommu_detected = 1;
x86_init.iommu.iommu_init = amd_iommu_init;
- return 0;
+ return 1;
}
/****************************************************************************
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index c9b64722f..b08cf57bf 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -295,9 +295,9 @@
#define IOMMU_PTE_IR (1ULL << 61)
#define IOMMU_PTE_IW (1ULL << 62)
+#define DTE_FLAG_IOTLB (1ULL << 32)
+#define DTE_FLAG_GV (1ULL << 55)
#define DTE_FLAG_MASK (0x3ffULL << 32)
-#define DTE_FLAG_IOTLB (0x01UL << 32)
-#define DTE_FLAG_GV (0x01ULL << 55)
#define DTE_GLX_SHIFT (56)
#define DTE_GLX_MASK (3)
@@ -517,11 +517,6 @@ struct amd_iommu {
/* pci domain of this IOMMU */
u16 pci_seg;
- /* first device this IOMMU handles. read from PCI */
- u16 first_device;
- /* last device this IOMMU handles. read from PCI */
- u16 last_device;
-
/* start of exclusion range of that IOMMU */
u64 exclusion_start;
/* length of exclusion range of that IOMMU */
@@ -529,11 +524,7 @@ struct amd_iommu {
/* command buffer virtual address */
u8 *cmd_buf;
- /* size of command buffer */
- u32 cmd_buf_size;
- /* size of event buffer */
- u32 evt_buf_size;
/* event buffer virtual address */
u8 *evt_buf;
@@ -675,7 +666,7 @@ extern unsigned long *amd_iommu_pd_alloc_bitmap;
* If true, the addresses will be flushed on unmap time, not when
* they are reused
*/
-extern u32 amd_iommu_unmap_flush;
+extern bool amd_iommu_unmap_flush;
/* Smallest max PASID supported by any IOMMU in the system */
extern u32 amd_iommu_max_pasid;
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index d21d4edf7..7caf2fa23 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -494,6 +494,22 @@ static void handle_fault_error(struct fault *fault)
}
}
+static bool access_error(struct vm_area_struct *vma, struct fault *fault)
+{
+ unsigned long requested = 0;
+
+ if (fault->flags & PPR_FAULT_EXEC)
+ requested |= VM_EXEC;
+
+ if (fault->flags & PPR_FAULT_READ)
+ requested |= VM_READ;
+
+ if (fault->flags & PPR_FAULT_WRITE)
+ requested |= VM_WRITE;
+
+ return (requested & ~vma->vm_flags) != 0;
+}
+
static void do_fault(struct work_struct *work)
{
struct fault *fault = container_of(work, struct fault, work);
@@ -516,8 +532,8 @@ static void do_fault(struct work_struct *work)
goto out;
}
- if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) {
- /* handle_mm_fault would BUG_ON() */
+ /* Check if we have the right permissions on the vma */
+ if (access_error(vma, fault)) {
up_read(&mm->mmap_sem);
handle_fault_error(fault);
goto out;
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 286e890e7..4e5118a4c 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -26,8 +26,10 @@
#include <linux/iommu.h>
#include <linux/iopoll.h>
#include <linux/module.h>
+#include <linux/msi.h>
#include <linux/of.h>
#include <linux/of_address.h>
+#include <linux/of_platform.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
@@ -403,6 +405,31 @@ enum pri_resp {
PRI_RESP_SUCC,
};
+enum arm_smmu_msi_index {
+ EVTQ_MSI_INDEX,
+ GERROR_MSI_INDEX,
+ PRIQ_MSI_INDEX,
+ ARM_SMMU_MAX_MSIS,
+};
+
+static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
+ [EVTQ_MSI_INDEX] = {
+ ARM_SMMU_EVTQ_IRQ_CFG0,
+ ARM_SMMU_EVTQ_IRQ_CFG1,
+ ARM_SMMU_EVTQ_IRQ_CFG2,
+ },
+ [GERROR_MSI_INDEX] = {
+ ARM_SMMU_GERROR_IRQ_CFG0,
+ ARM_SMMU_GERROR_IRQ_CFG1,
+ ARM_SMMU_GERROR_IRQ_CFG2,
+ },
+ [PRIQ_MSI_INDEX] = {
+ ARM_SMMU_PRIQ_IRQ_CFG0,
+ ARM_SMMU_PRIQ_IRQ_CFG1,
+ ARM_SMMU_PRIQ_IRQ_CFG2,
+ },
+};
+
struct arm_smmu_cmdq_ent {
/* Common fields */
u8 opcode;
@@ -570,7 +597,6 @@ struct arm_smmu_device {
unsigned int sid_bits;
struct arm_smmu_strtab_cfg strtab_cfg;
- struct list_head list;
};
/* SMMU private data for an IOMMU group */
@@ -605,10 +631,6 @@ struct arm_smmu_domain {
struct iommu_domain domain;
};
-/* Our list of SMMU instances */
-static DEFINE_SPINLOCK(arm_smmu_devices_lock);
-static LIST_HEAD(arm_smmu_devices);
-
struct arm_smmu_option_prop {
u32 opt;
const char *prop;
@@ -1427,7 +1449,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
struct io_pgtable_cfg *pgtbl_cfg)
{
int ret;
- u16 asid;
+ int asid;
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
@@ -1439,10 +1461,11 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
&cfg->cdptr_dma, GFP_KERNEL);
if (!cfg->cdptr) {
dev_warn(smmu->dev, "failed to allocate context descriptor\n");
+ ret = -ENOMEM;
goto out_free_asid;
}
- cfg->cd.asid = asid;
+ cfg->cd.asid = (u16)asid;
cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
cfg->cd.tcr = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
@@ -1456,7 +1479,7 @@ out_free_asid:
static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
struct io_pgtable_cfg *pgtbl_cfg)
{
- u16 vmid;
+ int vmid;
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
@@ -1464,7 +1487,7 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
if (IS_ERR_VALUE(vmid))
return vmid;
- cfg->vmid = vmid;
+ cfg->vmid = (u16)vmid;
cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
cfg->vtcr = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
return 0;
@@ -1726,7 +1749,8 @@ static void __arm_smmu_release_pci_iommudata(void *data)
static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev)
{
struct device_node *of_node;
- struct arm_smmu_device *curr, *smmu = NULL;
+ struct platform_device *smmu_pdev;
+ struct arm_smmu_device *smmu = NULL;
struct pci_bus *bus = pdev->bus;
/* Walk up to the root bus */
@@ -1739,14 +1763,10 @@ static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev)
return NULL;
/* See if we can find an SMMU corresponding to the phandle */
- spin_lock(&arm_smmu_devices_lock);
- list_for_each_entry(curr, &arm_smmu_devices, list) {
- if (curr->dev->of_node == of_node) {
- smmu = curr;
- break;
- }
- }
- spin_unlock(&arm_smmu_devices_lock);
+ smmu_pdev = of_find_device_by_node(of_node);
+ if (smmu_pdev)
+ smmu = platform_get_drvdata(smmu_pdev);
+
of_node_put(of_node);
return smmu;
}
@@ -1902,6 +1922,7 @@ static struct iommu_ops arm_smmu_ops = {
.iova_to_phys = arm_smmu_iova_to_phys,
.add_device = arm_smmu_add_device,
.remove_device = arm_smmu_remove_device,
+ .device_group = pci_device_group,
.domain_get_attr = arm_smmu_domain_get_attr,
.domain_set_attr = arm_smmu_domain_set_attr,
.pgsize_bitmap = -1UL, /* Restricted during device attach */
@@ -2186,6 +2207,72 @@ static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
1, ARM_SMMU_POLL_TIMEOUT_US);
}
+static void arm_smmu_free_msis(void *data)
+{
+ struct device *dev = data;
+ platform_msi_domain_free_irqs(dev);
+}
+
+static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
+{
+ phys_addr_t doorbell;
+ struct device *dev = msi_desc_to_dev(desc);
+ struct arm_smmu_device *smmu = dev_get_drvdata(dev);
+ phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
+
+ doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
+ doorbell &= MSI_CFG0_ADDR_MASK << MSI_CFG0_ADDR_SHIFT;
+
+ writeq_relaxed(doorbell, smmu->base + cfg[0]);
+ writel_relaxed(msg->data, smmu->base + cfg[1]);
+ writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
+}
+
+static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
+{
+ struct msi_desc *desc;
+ int ret, nvec = ARM_SMMU_MAX_MSIS;
+ struct device *dev = smmu->dev;
+
+ /* Clear the MSI address regs */
+ writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
+ writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
+
+ if (smmu->features & ARM_SMMU_FEAT_PRI)
+ writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
+ else
+ nvec--;
+
+ if (!(smmu->features & ARM_SMMU_FEAT_MSI))
+ return;
+
+ /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
+ ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
+ if (ret) {
+ dev_warn(dev, "failed to allocate MSIs\n");
+ return;
+ }
+
+ for_each_msi_entry(desc, dev) {
+ switch (desc->platform.msi_index) {
+ case EVTQ_MSI_INDEX:
+ smmu->evtq.q.irq = desc->irq;
+ break;
+ case GERROR_MSI_INDEX:
+ smmu->gerr_irq = desc->irq;
+ break;
+ case PRIQ_MSI_INDEX:
+ smmu->priq.q.irq = desc->irq;
+ break;
+ default: /* Unknown */
+ continue;
+ }
+ }
+
+ /* Add callback to free MSIs on teardown */
+ devm_add_action(dev, arm_smmu_free_msis, dev);
+}
+
static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
{
int ret, irq;
@@ -2199,11 +2286,9 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
return ret;
}
- /* Clear the MSI address regs */
- writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
- writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
+ arm_smmu_setup_msis(smmu);
- /* Request wired interrupt lines */
+ /* Request interrupt lines */
irq = smmu->evtq.q.irq;
if (irq) {
ret = devm_request_threaded_irq(smmu->dev, irq,
@@ -2232,8 +2317,6 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
}
if (smmu->features & ARM_SMMU_FEAT_PRI) {
- writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
-
irq = smmu->priq.q.irq;
if (irq) {
ret = devm_request_threaded_irq(smmu->dev, irq,
@@ -2612,16 +2695,14 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
if (ret)
return ret;
+ /* Record our private device structure */
+ platform_set_drvdata(pdev, smmu);
+
/* Reset the device */
ret = arm_smmu_device_reset(smmu);
if (ret)
goto out_free_structures;
- /* Record our private device structure */
- INIT_LIST_HEAD(&smmu->list);
- spin_lock(&arm_smmu_devices_lock);
- list_add(&smmu->list, &arm_smmu_devices);
- spin_unlock(&arm_smmu_devices_lock);
return 0;
out_free_structures:
@@ -2631,21 +2712,7 @@ out_free_structures:
static int arm_smmu_device_remove(struct platform_device *pdev)
{
- struct arm_smmu_device *curr, *smmu = NULL;
- struct device *dev = &pdev->dev;
-
- spin_lock(&arm_smmu_devices_lock);
- list_for_each_entry(curr, &arm_smmu_devices, list) {
- if (curr->dev == dev) {
- smmu = curr;
- list_del(&smmu->list);
- break;
- }
- }
- spin_unlock(&arm_smmu_devices_lock);
-
- if (!smmu)
- return -ENODEV;
+ struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
arm_smmu_device_disable(smmu);
arm_smmu_free_structures(smmu);
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 48a39dfa9..47dc7a793 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -70,6 +70,18 @@
((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \
? 0x400 : 0))
+#ifdef CONFIG_64BIT
+#define smmu_writeq writeq_relaxed
+#else
+#define smmu_writeq(reg64, addr) \
+ do { \
+ u64 __val = (reg64); \
+ void __iomem *__addr = (addr); \
+ writel_relaxed(__val >> 32, __addr + 4); \
+ writel_relaxed(__val, __addr); \
+ } while (0)
+#endif
+
/* Configuration registers */
#define ARM_SMMU_GR0_sCR0 0x0
#define sCR0_CLIENTPD (1 << 0)
@@ -185,10 +197,8 @@
#define ARM_SMMU_CB_SCTLR 0x0
#define ARM_SMMU_CB_RESUME 0x8
#define ARM_SMMU_CB_TTBCR2 0x10
-#define ARM_SMMU_CB_TTBR0_LO 0x20
-#define ARM_SMMU_CB_TTBR0_HI 0x24
-#define ARM_SMMU_CB_TTBR1_LO 0x28
-#define ARM_SMMU_CB_TTBR1_HI 0x2c
+#define ARM_SMMU_CB_TTBR0 0x20
+#define ARM_SMMU_CB_TTBR1 0x28
#define ARM_SMMU_CB_TTBCR 0x30
#define ARM_SMMU_CB_S1_MAIR0 0x38
#define ARM_SMMU_CB_S1_MAIR1 0x3c
@@ -226,7 +236,7 @@
#define TTBCR2_SEP_SHIFT 15
#define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT)
-#define TTBRn_HI_ASID_SHIFT 16
+#define TTBRn_ASID_SHIFT 48
#define FSR_MULTI (1 << 31)
#define FSR_SS (1 << 30)
@@ -695,12 +705,12 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
struct io_pgtable_cfg *pgtbl_cfg)
{
u32 reg;
+ u64 reg64;
bool stage1;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
struct arm_smmu_device *smmu = smmu_domain->smmu;
- void __iomem *cb_base, *gr0_base, *gr1_base;
+ void __iomem *cb_base, *gr1_base;
- gr0_base = ARM_SMMU_GR0(smmu);
gr1_base = ARM_SMMU_GR1(smmu);
stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
@@ -738,22 +748,17 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
/* TTBRs */
if (stage1) {
- reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
- writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
- reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0] >> 32;
- reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
- writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
-
- reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
- writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_LO);
- reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1] >> 32;
- reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
- writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_HI);
+ reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
+
+ reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) << TTBRn_ASID_SHIFT;
+ smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR0);
+
+ reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
+ reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) << TTBRn_ASID_SHIFT;
+ smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR1);
} else {
- reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
- writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
- reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr >> 32;
- writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
+ reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
+ smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR0);
}
/* TTBCR */
@@ -1212,17 +1217,15 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
/* ATS1 registers can only be written atomically */
va = iova & ~0xfffUL;
-#ifdef CONFIG_64BIT
if (smmu->version == ARM_SMMU_V2)
- writeq_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
+ smmu_writeq(va, cb_base + ARM_SMMU_CB_ATS1PR);
else
-#endif
writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
!(tmp & ATSR_ACTIVE), 5, 50)) {
dev_err(dev,
- "iova to phys timed out on 0x%pad. Falling back to software table walk.\n",
+ "iova to phys timed out on %pad. Falling back to software table walk.\n",
&iova);
return ops->iova_to_phys(ops, iova);
}
@@ -1292,33 +1295,25 @@ static void __arm_smmu_release_pci_iommudata(void *data)
kfree(data);
}
-static int arm_smmu_add_pci_device(struct pci_dev *pdev)
+static int arm_smmu_init_pci_device(struct pci_dev *pdev,
+ struct iommu_group *group)
{
- int i, ret;
- u16 sid;
- struct iommu_group *group;
struct arm_smmu_master_cfg *cfg;
-
- group = iommu_group_get_for_dev(&pdev->dev);
- if (IS_ERR(group))
- return PTR_ERR(group);
+ u16 sid;
+ int i;
cfg = iommu_group_get_iommudata(group);
if (!cfg) {
cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
- if (!cfg) {
- ret = -ENOMEM;
- goto out_put_group;
- }
+ if (!cfg)
+ return -ENOMEM;
iommu_group_set_iommudata(group, cfg,
__arm_smmu_release_pci_iommudata);
}
- if (cfg->num_streamids >= MAX_MASTER_STREAMIDS) {
- ret = -ENOSPC;
- goto out_put_group;
- }
+ if (cfg->num_streamids >= MAX_MASTER_STREAMIDS)
+ return -ENOSPC;
/*
* Assume Stream ID == Requester ID for now.
@@ -1334,16 +1329,13 @@ static int arm_smmu_add_pci_device(struct pci_dev *pdev)
cfg->streamids[cfg->num_streamids++] = sid;
return 0;
-out_put_group:
- iommu_group_put(group);
- return ret;
}
-static int arm_smmu_add_platform_device(struct device *dev)
+static int arm_smmu_init_platform_device(struct device *dev,
+ struct iommu_group *group)
{
- struct iommu_group *group;
- struct arm_smmu_master *master;
struct arm_smmu_device *smmu = find_smmu_for_device(dev);
+ struct arm_smmu_master *master;
if (!smmu)
return -ENODEV;
@@ -1352,21 +1344,20 @@ static int arm_smmu_add_platform_device(struct device *dev)
if (!master)
return -ENODEV;
- /* No automatic group creation for platform devices */
- group = iommu_group_alloc();
- if (IS_ERR(group))
- return PTR_ERR(group);
-
iommu_group_set_iommudata(group, &master->cfg, NULL);
- return iommu_group_add_device(group, dev);
+
+ return 0;
}
static int arm_smmu_add_device(struct device *dev)
{
- if (dev_is_pci(dev))
- return arm_smmu_add_pci_device(to_pci_dev(dev));
+ struct iommu_group *group;
+
+ group = iommu_group_get_for_dev(dev);
+ if (IS_ERR(group))
+ return PTR_ERR(group);
- return arm_smmu_add_platform_device(dev);
+ return 0;
}
static void arm_smmu_remove_device(struct device *dev)
@@ -1374,6 +1365,32 @@ static void arm_smmu_remove_device(struct device *dev)
iommu_group_remove_device(dev);
}
+static struct iommu_group *arm_smmu_device_group(struct device *dev)
+{
+ struct iommu_group *group;
+ int ret;
+
+ if (dev_is_pci(dev))
+ group = pci_device_group(dev);
+ else
+ group = generic_device_group(dev);
+
+ if (IS_ERR(group))
+ return group;
+
+ if (dev_is_pci(dev))
+ ret = arm_smmu_init_pci_device(to_pci_dev(dev), group);
+ else
+ ret = arm_smmu_init_platform_device(dev, group);
+
+ if (ret) {
+ iommu_group_put(group);
+ group = ERR_PTR(ret);
+ }
+
+ return group;
+}
+
static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
enum iommu_attr attr, void *data)
{
@@ -1430,6 +1447,7 @@ static struct iommu_ops arm_smmu_ops = {
.iova_to_phys = arm_smmu_iova_to_phys,
.add_device = arm_smmu_add_device,
.remove_device = arm_smmu_remove_device,
+ .device_group = arm_smmu_device_group,
.domain_get_attr = arm_smmu_domain_get_attr,
.domain_set_attr = arm_smmu_domain_set_attr,
.pgsize_bitmap = -1UL, /* Restricted during device attach */
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
new file mode 100644
index 000000000..72d618266
--- /dev/null
+++ b/drivers/iommu/dma-iommu.c
@@ -0,0 +1,529 @@
+/*
+ * A fairly generic DMA-API to IOMMU-API glue layer.
+ *
+ * Copyright (C) 2014-2015 ARM Ltd.
+ *
+ * based in part on arch/arm/mm/dma-mapping.c:
+ * Copyright (C) 2000-2004 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/device.h>
+#include <linux/dma-iommu.h>
+#include <linux/gfp.h>
+#include <linux/huge_mm.h>
+#include <linux/iommu.h>
+#include <linux/iova.h>
+#include <linux/mm.h>
+#include <linux/scatterlist.h>
+#include <linux/vmalloc.h>
+
+int iommu_dma_init(void)
+{
+ return iova_cache_get();
+}
+
+/**
+ * iommu_get_dma_cookie - Acquire DMA-API resources for a domain
+ * @domain: IOMMU domain to prepare for DMA-API usage
+ *
+ * IOMMU drivers should normally call this from their domain_alloc
+ * callback when domain->type == IOMMU_DOMAIN_DMA.
+ */
+int iommu_get_dma_cookie(struct iommu_domain *domain)
+{
+ struct iova_domain *iovad;
+
+ if (domain->iova_cookie)
+ return -EEXIST;
+
+ iovad = kzalloc(sizeof(*iovad), GFP_KERNEL);
+ domain->iova_cookie = iovad;
+
+ return iovad ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL(iommu_get_dma_cookie);
+
+/**
+ * iommu_put_dma_cookie - Release a domain's DMA mapping resources
+ * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
+ *
+ * IOMMU drivers should normally call this from their domain_free callback.
+ */
+void iommu_put_dma_cookie(struct iommu_domain *domain)
+{
+ struct iova_domain *iovad = domain->iova_cookie;
+
+ if (!iovad)
+ return;
+
+ put_iova_domain(iovad);
+ kfree(iovad);
+ domain->iova_cookie = NULL;
+}
+EXPORT_SYMBOL(iommu_put_dma_cookie);
+
+/**
+ * iommu_dma_init_domain - Initialise a DMA mapping domain
+ * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
+ * @base: IOVA at which the mappable address space starts
+ * @size: Size of IOVA space
+ *
+ * @base and @size should be exact multiples of IOMMU page granularity to
+ * avoid rounding surprises. If necessary, we reserve the page at address 0
+ * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but
+ * any change which could make prior IOVAs invalid will fail.
+ */
+int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size)
+{
+ struct iova_domain *iovad = domain->iova_cookie;
+ unsigned long order, base_pfn, end_pfn;
+
+ if (!iovad)
+ return -ENODEV;
+
+ /* Use the smallest supported page size for IOVA granularity */
+ order = __ffs(domain->ops->pgsize_bitmap);
+ base_pfn = max_t(unsigned long, 1, base >> order);
+ end_pfn = (base + size - 1) >> order;
+
+ /* Check the domain allows at least some access to the device... */
+ if (domain->geometry.force_aperture) {
+ if (base > domain->geometry.aperture_end ||
+ base + size <= domain->geometry.aperture_start) {
+ pr_warn("specified DMA range outside IOMMU capability\n");
+ return -EFAULT;
+ }
+ /* ...then finally give it a kicking to make sure it fits */
+ base_pfn = max_t(unsigned long, base_pfn,
+ domain->geometry.aperture_start >> order);
+ end_pfn = min_t(unsigned long, end_pfn,
+ domain->geometry.aperture_end >> order);
+ }
+
+ /* All we can safely do with an existing domain is enlarge it */
+ if (iovad->start_pfn) {
+ if (1UL << order != iovad->granule ||
+ base_pfn != iovad->start_pfn ||
+ end_pfn < iovad->dma_32bit_pfn) {
+ pr_warn("Incompatible range for DMA domain\n");
+ return -EFAULT;
+ }
+ iovad->dma_32bit_pfn = end_pfn;
+ } else {
+ init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn);
+ }
+ return 0;
+}
+EXPORT_SYMBOL(iommu_dma_init_domain);
+
+/**
+ * dma_direction_to_prot - Translate DMA API directions to IOMMU API page flags
+ * @dir: Direction of DMA transfer
+ * @coherent: Is the DMA master cache-coherent?
+ *
+ * Return: corresponding IOMMU API page protection flags
+ */
+int dma_direction_to_prot(enum dma_data_direction dir, bool coherent)
+{
+ int prot = coherent ? IOMMU_CACHE : 0;
+
+ switch (dir) {
+ case DMA_BIDIRECTIONAL:
+ return prot | IOMMU_READ | IOMMU_WRITE;
+ case DMA_TO_DEVICE:
+ return prot | IOMMU_READ;
+ case DMA_FROM_DEVICE:
+ return prot | IOMMU_WRITE;
+ default:
+ return 0;
+ }
+}
+
+static struct iova *__alloc_iova(struct iova_domain *iovad, size_t size,
+ dma_addr_t dma_limit)
+{
+ unsigned long shift = iova_shift(iovad);
+ unsigned long length = iova_align(iovad, size) >> shift;
+
+ /*
+ * Enforce size-alignment to be safe - there could perhaps be an
+ * attribute to control this per-device, or at least per-domain...
+ */
+ return alloc_iova(iovad, length, dma_limit >> shift, true);
+}
+
+/* The IOVA allocator knows what we mapped, so just unmap whatever that was */
+static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr)
+{
+ struct iova_domain *iovad = domain->iova_cookie;
+ unsigned long shift = iova_shift(iovad);
+ unsigned long pfn = dma_addr >> shift;
+ struct iova *iova = find_iova(iovad, pfn);
+ size_t size;
+
+ if (WARN_ON(!iova))
+ return;
+
+ size = iova_size(iova) << shift;
+ size -= iommu_unmap(domain, pfn << shift, size);
+ /* ...and if we can't, then something is horribly, horribly wrong */
+ WARN_ON(size > 0);
+ __free_iova(iovad, iova);
+}
+
+static void __iommu_dma_free_pages(struct page **pages, int count)
+{
+ while (count--)
+ __free_page(pages[count]);
+ kvfree(pages);
+}
+
+static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
+{
+ struct page **pages;
+ unsigned int i = 0, array_size = count * sizeof(*pages);
+ unsigned int order = MAX_ORDER;
+
+ if (array_size <= PAGE_SIZE)
+ pages = kzalloc(array_size, GFP_KERNEL);
+ else
+ pages = vzalloc(array_size);
+ if (!pages)
+ return NULL;
+
+ /* IOMMU can map any pages, so himem can also be used here */
+ gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
+
+ while (count) {
+ struct page *page = NULL;
+ int j;
+
+ /*
+ * Higher-order allocations are a convenience rather
+ * than a necessity, hence using __GFP_NORETRY until
+ * falling back to single-page allocations.
+ */
+ for (order = min_t(unsigned int, order, __fls(count));
+ order > 0; order--) {
+ page = alloc_pages(gfp | __GFP_NORETRY, order);
+ if (!page)
+ continue;
+ if (PageCompound(page)) {
+ if (!split_huge_page(page))
+ break;
+ __free_pages(page, order);
+ } else {
+ split_page(page, order);
+ break;
+ }
+ }
+ if (!page)
+ page = alloc_page(gfp);
+ if (!page) {
+ __iommu_dma_free_pages(pages, i);
+ return NULL;
+ }
+ j = 1 << order;
+ count -= j;
+ while (j--)
+ pages[i++] = page++;
+ }
+ return pages;
+}
+
+/**
+ * iommu_dma_free - Free a buffer allocated by iommu_dma_alloc()
+ * @dev: Device which owns this buffer
+ * @pages: Array of buffer pages as returned by iommu_dma_alloc()
+ * @size: Size of buffer in bytes
+ * @handle: DMA address of buffer
+ *
+ * Frees both the pages associated with the buffer, and the array
+ * describing them
+ */
+void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
+ dma_addr_t *handle)
+{
+ __iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle);
+ __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
+ *handle = DMA_ERROR_CODE;
+}
+
+/**
+ * iommu_dma_alloc - Allocate and map a buffer contiguous in IOVA space
+ * @dev: Device to allocate memory for. Must be a real device
+ * attached to an iommu_dma_domain
+ * @size: Size of buffer in bytes
+ * @gfp: Allocation flags
+ * @prot: IOMMU mapping flags
+ * @handle: Out argument for allocated DMA handle
+ * @flush_page: Arch callback which must ensure PAGE_SIZE bytes from the
+ * given VA/PA are visible to the given non-coherent device.
+ *
+ * If @size is less than PAGE_SIZE, then a full CPU page will be allocated,
+ * but an IOMMU which supports smaller pages might not map the whole thing.
+ *
+ * Return: Array of struct page pointers describing the buffer,
+ * or NULL on failure.
+ */
+struct page **iommu_dma_alloc(struct device *dev, size_t size,
+ gfp_t gfp, int prot, dma_addr_t *handle,
+ void (*flush_page)(struct device *, const void *, phys_addr_t))
+{
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+ struct iova_domain *iovad = domain->iova_cookie;
+ struct iova *iova;
+ struct page **pages;
+ struct sg_table sgt;
+ dma_addr_t dma_addr;
+ unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+ *handle = DMA_ERROR_CODE;
+
+ pages = __iommu_dma_alloc_pages(count, gfp);
+ if (!pages)
+ return NULL;
+
+ iova = __alloc_iova(iovad, size, dev->coherent_dma_mask);
+ if (!iova)
+ goto out_free_pages;
+
+ size = iova_align(iovad, size);
+ if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, GFP_KERNEL))
+ goto out_free_iova;
+
+ if (!(prot & IOMMU_CACHE)) {
+ struct sg_mapping_iter miter;
+ /*
+ * The CPU-centric flushing implied by SG_MITER_TO_SG isn't
+ * sufficient here, so skip it by using the "wrong" direction.
+ */
+ sg_miter_start(&miter, sgt.sgl, sgt.orig_nents, SG_MITER_FROM_SG);
+ while (sg_miter_next(&miter))
+ flush_page(dev, miter.addr, page_to_phys(miter.page));
+ sg_miter_stop(&miter);
+ }
+
+ dma_addr = iova_dma_addr(iovad, iova);
+ if (iommu_map_sg(domain, dma_addr, sgt.sgl, sgt.orig_nents, prot)
+ < size)
+ goto out_free_sg;
+
+ *handle = dma_addr;
+ sg_free_table(&sgt);
+ return pages;
+
+out_free_sg:
+ sg_free_table(&sgt);
+out_free_iova:
+ __free_iova(iovad, iova);
+out_free_pages:
+ __iommu_dma_free_pages(pages, count);
+ return NULL;
+}
+
+/**
+ * iommu_dma_mmap - Map a buffer into provided user VMA
+ * @pages: Array representing buffer from iommu_dma_alloc()
+ * @size: Size of buffer in bytes
+ * @vma: VMA describing requested userspace mapping
+ *
+ * Maps the pages of the buffer in @pages into @vma. The caller is responsible
+ * for verifying the correct size and protection of @vma beforehand.
+ */
+
+int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma)
+{
+ unsigned long uaddr = vma->vm_start;
+ unsigned int i, count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ int ret = -ENXIO;
+
+ for (i = vma->vm_pgoff; i < count && uaddr < vma->vm_end; i++) {
+ ret = vm_insert_page(vma, uaddr, pages[i]);
+ if (ret)
+ break;
+ uaddr += PAGE_SIZE;
+ }
+ return ret;
+}
+
+dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size, int prot)
+{
+ dma_addr_t dma_addr;
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+ struct iova_domain *iovad = domain->iova_cookie;
+ phys_addr_t phys = page_to_phys(page) + offset;
+ size_t iova_off = iova_offset(iovad, phys);
+ size_t len = iova_align(iovad, size + iova_off);
+ struct iova *iova = __alloc_iova(iovad, len, dma_get_mask(dev));
+
+ if (!iova)
+ return DMA_ERROR_CODE;
+
+ dma_addr = iova_dma_addr(iovad, iova);
+ if (iommu_map(domain, dma_addr, phys - iova_off, len, prot)) {
+ __free_iova(iovad, iova);
+ return DMA_ERROR_CODE;
+ }
+ return dma_addr + iova_off;
+}
+
+void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size,
+ enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+ __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle);
+}
+
+/*
+ * Prepare a successfully-mapped scatterlist to give back to the caller.
+ * Handling IOVA concatenation can come later, if needed
+ */
+static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
+ dma_addr_t dma_addr)
+{
+ struct scatterlist *s;
+ int i;
+
+ for_each_sg(sg, s, nents, i) {
+ /* Un-swizzling the fields here, hence the naming mismatch */
+ unsigned int s_offset = sg_dma_address(s);
+ unsigned int s_length = sg_dma_len(s);
+ unsigned int s_dma_len = s->length;
+
+ s->offset = s_offset;
+ s->length = s_length;
+ sg_dma_address(s) = dma_addr + s_offset;
+ dma_addr += s_dma_len;
+ }
+ return i;
+}
+
+/*
+ * If mapping failed, then just restore the original list,
+ * but making sure the DMA fields are invalidated.
+ */
+static void __invalidate_sg(struct scatterlist *sg, int nents)
+{
+ struct scatterlist *s;
+ int i;
+
+ for_each_sg(sg, s, nents, i) {
+ if (sg_dma_address(s) != DMA_ERROR_CODE)
+ s->offset = sg_dma_address(s);
+ if (sg_dma_len(s))
+ s->length = sg_dma_len(s);
+ sg_dma_address(s) = DMA_ERROR_CODE;
+ sg_dma_len(s) = 0;
+ }
+}
+
+/*
+ * The DMA API client is passing in a scatterlist which could describe
+ * any old buffer layout, but the IOMMU API requires everything to be
+ * aligned to IOMMU pages. Hence the need for this complicated bit of
+ * impedance-matching, to be able to hand off a suitably-aligned list,
+ * but still preserve the original offsets and sizes for the caller.
+ */
+int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
+ int nents, int prot)
+{
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+ struct iova_domain *iovad = domain->iova_cookie;
+ struct iova *iova;
+ struct scatterlist *s, *prev = NULL;
+ dma_addr_t dma_addr;
+ size_t iova_len = 0;
+ int i;
+
+ /*
+ * Work out how much IOVA space we need, and align the segments to
+ * IOVA granules for the IOMMU driver to handle. With some clever
+ * trickery we can modify the list in-place, but reversibly, by
+ * hiding the original data in the as-yet-unused DMA fields.
+ */
+ for_each_sg(sg, s, nents, i) {
+ size_t s_offset = iova_offset(iovad, s->offset);
+ size_t s_length = s->length;
+
+ sg_dma_address(s) = s_offset;
+ sg_dma_len(s) = s_length;
+ s->offset -= s_offset;
+ s_length = iova_align(iovad, s_length + s_offset);
+ s->length = s_length;
+
+ /*
+ * The simple way to avoid the rare case of a segment
+ * crossing the boundary mask is to pad the previous one
+ * to end at a naturally-aligned IOVA for this one's size,
+ * at the cost of potentially over-allocating a little.
+ */
+ if (prev) {
+ size_t pad_len = roundup_pow_of_two(s_length);
+
+ pad_len = (pad_len - iova_len) & (pad_len - 1);
+ prev->length += pad_len;
+ iova_len += pad_len;
+ }
+
+ iova_len += s_length;
+ prev = s;
+ }
+
+ iova = __alloc_iova(iovad, iova_len, dma_get_mask(dev));
+ if (!iova)
+ goto out_restore_sg;
+
+ /*
+ * We'll leave any physical concatenation to the IOMMU driver's
+ * implementation - it knows better than we do.
+ */
+ dma_addr = iova_dma_addr(iovad, iova);
+ if (iommu_map_sg(domain, dma_addr, sg, nents, prot) < iova_len)
+ goto out_free_iova;
+
+ return __finalise_sg(dev, sg, nents, dma_addr);
+
+out_free_iova:
+ __free_iova(iovad, iova);
+out_restore_sg:
+ __invalidate_sg(sg, nents);
+ return 0;
+}
+
+void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
+ enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+ /*
+ * The scatterlist segments are mapped into a single
+ * contiguous IOVA allocation, so this is incredibly easy.
+ */
+ __iommu_dma_unmap(iommu_get_domain_for_dev(dev), sg_dma_address(sg));
+}
+
+int iommu_dma_supported(struct device *dev, u64 mask)
+{
+ /*
+ * 'Special' IOMMUs which don't have the same addressing capability
+ * as the CPU will have to wait until we have some way to query that
+ * before they'll be able to use this framework.
+ */
+ return 1;
+}
+
+int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+ return dma_addr == DMA_ERROR_CODE;
+}
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 8757f8dfc..80e3c1760 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1086,6 +1086,11 @@ static void free_iommu(struct intel_iommu *iommu)
iommu_device_destroy(iommu->iommu_dev);
if (iommu->irq) {
+ if (iommu->pr_irq) {
+ free_irq(iommu->pr_irq, iommu);
+ dmar_free_hwirq(iommu->pr_irq);
+ iommu->pr_irq = 0;
+ }
free_irq(iommu->irq, iommu);
dmar_free_hwirq(iommu->irq);
iommu->irq = 0;
@@ -1493,53 +1498,68 @@ static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
}
}
+
+static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
+{
+ if (iommu->irq == irq)
+ return DMAR_FECTL_REG;
+ else if (iommu->pr_irq == irq)
+ return DMAR_PECTL_REG;
+ else
+ BUG();
+}
+
void dmar_msi_unmask(struct irq_data *data)
{
struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
+ int reg = dmar_msi_reg(iommu, data->irq);
unsigned long flag;
/* unmask it */
raw_spin_lock_irqsave(&iommu->register_lock, flag);
- writel(0, iommu->reg + DMAR_FECTL_REG);
+ writel(0, iommu->reg + reg);
/* Read a reg to force flush the post write */
- readl(iommu->reg + DMAR_FECTL_REG);
+ readl(iommu->reg + reg);
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
void dmar_msi_mask(struct irq_data *data)
{
- unsigned long flag;
struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
+ int reg = dmar_msi_reg(iommu, data->irq);
+ unsigned long flag;
/* mask it */
raw_spin_lock_irqsave(&iommu->register_lock, flag);
- writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
+ writel(DMA_FECTL_IM, iommu->reg + reg);
/* Read a reg to force flush the post write */
- readl(iommu->reg + DMAR_FECTL_REG);
+ readl(iommu->reg + reg);
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
void dmar_msi_write(int irq, struct msi_msg *msg)
{
struct intel_iommu *iommu = irq_get_handler_data(irq);
+ int reg = dmar_msi_reg(iommu, irq);
unsigned long flag;
raw_spin_lock_irqsave(&iommu->register_lock, flag);
- writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
- writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
- writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
+ writel(msg->data, iommu->reg + reg + 4);
+ writel(msg->address_lo, iommu->reg + reg + 8);
+ writel(msg->address_hi, iommu->reg + reg + 12);
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
void dmar_msi_read(int irq, struct msi_msg *msg)
{
struct intel_iommu *iommu = irq_get_handler_data(irq);
+ int reg = dmar_msi_reg(iommu, irq);
unsigned long flag;
raw_spin_lock_irqsave(&iommu->register_lock, flag);
- msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
- msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
- msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
+ msg->data = readl(iommu->reg + reg + 4);
+ msg->address_lo = readl(iommu->reg + reg + 8);
+ msg->address_hi = readl(iommu->reg + reg + 12);
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
index 2570f2a25..a34355fca 100644
--- a/drivers/iommu/fsl_pamu.c
+++ b/drivers/iommu/fsl_pamu.c
@@ -20,11 +20,11 @@
#include "fsl_pamu.h"
+#include <linux/fsl/guts.h>
#include <linux/interrupt.h>
#include <linux/genalloc.h>
#include <asm/mpc85xx.h>
-#include <asm/fsl_guts.h>
/* define indexes for each operation mapping scenario */
#define OMI_QMAN 0x00
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index 1d452930c..da0e1e30e 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -923,7 +923,7 @@ static struct iommu_group *get_pci_device_group(struct pci_dev *pdev)
pci_endpt_partioning = check_pci_ctl_endpt_part(pci_ctl);
/* We can partition PCIe devices so assign device group to the device */
if (pci_endpt_partioning) {
- group = iommu_group_get_for_dev(&pdev->dev);
+ group = pci_device_group(&pdev->dev);
/*
* PCIe controller is not a paritionable entity
@@ -956,44 +956,34 @@ static struct iommu_group *get_pci_device_group(struct pci_dev *pdev)
return group;
}
-static int fsl_pamu_add_device(struct device *dev)
+static struct iommu_group *fsl_pamu_device_group(struct device *dev)
{
struct iommu_group *group = ERR_PTR(-ENODEV);
- struct pci_dev *pdev;
- const u32 *prop;
- int ret = 0, len;
+ int len;
/*
* For platform devices we allocate a separate group for
* each of the devices.
*/
- if (dev_is_pci(dev)) {
- pdev = to_pci_dev(dev);
- /* Don't create device groups for virtual PCI bridges */
- if (pdev->subordinate)
- return 0;
+ if (dev_is_pci(dev))
+ group = get_pci_device_group(to_pci_dev(dev));
+ else if (of_get_property(dev->of_node, "fsl,liodn", &len))
+ group = get_device_iommu_group(dev);
- group = get_pci_device_group(pdev);
+ return group;
+}
- } else {
- prop = of_get_property(dev->of_node, "fsl,liodn", &len);
- if (prop)
- group = get_device_iommu_group(dev);
- }
+static int fsl_pamu_add_device(struct device *dev)
+{
+ struct iommu_group *group;
+ group = iommu_group_get_for_dev(dev);
if (IS_ERR(group))
return PTR_ERR(group);
- /*
- * Check if device has already been added to an iommu group.
- * Group could have already been created for a PCI device in
- * the iommu_group_get_for_dev path.
- */
- if (!dev->iommu_group)
- ret = iommu_group_add_device(group, dev);
-
iommu_group_put(group);
- return ret;
+
+ return 0;
}
static void fsl_pamu_remove_device(struct device *dev)
@@ -1072,6 +1062,7 @@ static const struct iommu_ops fsl_pamu_ops = {
.domain_get_attr = fsl_pamu_get_domain_attr,
.add_device = fsl_pamu_add_device,
.remove_device = fsl_pamu_remove_device,
+ .device_group = fsl_pamu_device_group,
};
int __init pamu_domain_init(void)
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index d65cf4239..ac7387686 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -34,6 +34,7 @@
#include <linux/mempool.h>
#include <linux/memory.h>
#include <linux/timer.h>
+#include <linux/io.h>
#include <linux/iova.h>
#include <linux/iommu.h>
#include <linux/intel-iommu.h>
@@ -418,10 +419,13 @@ struct device_domain_info {
struct list_head global; /* link to global list */
u8 bus; /* PCI bus number */
u8 devfn; /* PCI devfn number */
- struct {
- u8 enabled:1;
- u8 qdep;
- } ats; /* ATS state */
+ u8 pasid_supported:3;
+ u8 pasid_enabled:1;
+ u8 pri_supported:1;
+ u8 pri_enabled:1;
+ u8 ats_supported:1;
+ u8 ats_enabled:1;
+ u8 ats_qdep;
struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
struct intel_iommu *iommu; /* IOMMU used by this device */
struct dmar_domain *domain; /* pointer to domain */
@@ -497,13 +501,37 @@ static int dmar_forcedac;
static int intel_iommu_strict;
static int intel_iommu_superpage = 1;
static int intel_iommu_ecs = 1;
+static int intel_iommu_pasid28;
+static int iommu_identity_mapping;
+
+#define IDENTMAP_ALL 1
+#define IDENTMAP_GFX 2
+#define IDENTMAP_AZALIA 4
-/* We only actually use ECS when PASID support (on the new bit 40)
- * is also advertised. Some early implementations — the ones with
- * PASID support on bit 28 — have issues even when we *only* use
- * extended root/context tables. */
+/* Broadwell and Skylake have broken ECS support — normal so-called "second
+ * level" translation of DMA requests-without-PASID doesn't actually happen
+ * unless you also set the NESTE bit in an extended context-entry. Which of
+ * course means that SVM doesn't work because it's trying to do nested
+ * translation of the physical addresses it finds in the process page tables,
+ * through the IOVA->phys mapping found in the "second level" page tables.
+ *
+ * The VT-d specification was retroactively changed to change the definition
+ * of the capability bits and pretend that Broadwell/Skylake never happened...
+ * but unfortunately the wrong bit was changed. It's ECS which is broken, but
+ * for some reason it was the PASID capability bit which was redefined (from
+ * bit 28 on BDW/SKL to bit 40 in future).
+ *
+ * So our test for ECS needs to eschew those implementations which set the old
+ * PASID capabiity bit 28, since those are the ones on which ECS is broken.
+ * Unless we are working around the 'pasid28' limitations, that is, by putting
+ * the device into passthrough mode for normal DMA and thus masking the bug.
+ */
#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
- ecap_pasid(iommu->ecap))
+ (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
+/* PASID support is thus enabled if ECS is enabled and *either* of the old
+ * or new capability bits are set. */
+#define pasid_enabled(iommu) (ecs_enabled(iommu) && \
+ (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
int intel_iommu_gfx_mapped;
EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
@@ -566,6 +594,11 @@ static int __init intel_iommu_setup(char *str)
printk(KERN_INFO
"Intel-IOMMU: disable extended context table support\n");
intel_iommu_ecs = 0;
+ } else if (!strncmp(str, "pasid28", 7)) {
+ printk(KERN_INFO
+ "Intel-IOMMU: enable pre-production PASID support\n");
+ intel_iommu_pasid28 = 1;
+ iommu_identity_mapping |= IDENTMAP_GFX;
}
str += strcspn(str, ",");
@@ -1407,37 +1440,22 @@ static struct device_domain_info *
iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
u8 bus, u8 devfn)
{
- bool found = false;
struct device_domain_info *info;
- struct pci_dev *pdev;
assert_spin_locked(&device_domain_lock);
- if (!ecap_dev_iotlb_support(iommu->ecap))
- return NULL;
-
if (!iommu->qi)
return NULL;
list_for_each_entry(info, &domain->devices, link)
if (info->iommu == iommu && info->bus == bus &&
info->devfn == devfn) {
- found = true;
+ if (info->ats_supported && info->dev)
+ return info;
break;
}
- if (!found || !info->dev || !dev_is_pci(info->dev))
- return NULL;
-
- pdev = to_pci_dev(info->dev);
-
- if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
- return NULL;
-
- if (!dmar_find_matched_atsr_unit(pdev))
- return NULL;
-
- return info;
+ return NULL;
}
static void iommu_enable_dev_iotlb(struct device_domain_info *info)
@@ -1448,20 +1466,48 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info)
return;
pdev = to_pci_dev(info->dev);
- if (pci_enable_ats(pdev, VTD_PAGE_SHIFT))
- return;
- info->ats.enabled = 1;
- info->ats.qdep = pci_ats_queue_depth(pdev);
+#ifdef CONFIG_INTEL_IOMMU_SVM
+ /* The PCIe spec, in its wisdom, declares that the behaviour of
+ the device if you enable PASID support after ATS support is
+ undefined. So always enable PASID support on devices which
+ have it, even if we can't yet know if we're ever going to
+ use it. */
+ if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
+ info->pasid_enabled = 1;
+
+ if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
+ info->pri_enabled = 1;
+#endif
+ if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
+ info->ats_enabled = 1;
+ info->ats_qdep = pci_ats_queue_depth(pdev);
+ }
}
static void iommu_disable_dev_iotlb(struct device_domain_info *info)
{
- if (!info->ats.enabled)
+ struct pci_dev *pdev;
+
+ if (dev_is_pci(info->dev))
return;
- pci_disable_ats(to_pci_dev(info->dev));
- info->ats.enabled = 0;
+ pdev = to_pci_dev(info->dev);
+
+ if (info->ats_enabled) {
+ pci_disable_ats(pdev);
+ info->ats_enabled = 0;
+ }
+#ifdef CONFIG_INTEL_IOMMU_SVM
+ if (info->pri_enabled) {
+ pci_disable_pri(pdev);
+ info->pri_enabled = 0;
+ }
+ if (info->pasid_enabled) {
+ pci_disable_pasid(pdev);
+ info->pasid_enabled = 0;
+ }
+#endif
}
static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
@@ -1473,11 +1519,11 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
spin_lock_irqsave(&device_domain_lock, flags);
list_for_each_entry(info, &domain->devices, link) {
- if (!info->ats.enabled)
+ if (!info->ats_enabled)
continue;
sid = info->bus << 8 | info->devfn;
- qdep = info->ats.qdep;
+ qdep = info->ats_qdep;
qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
}
spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -1667,6 +1713,14 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
/* free context mapping */
free_context_table(iommu);
+
+#ifdef CONFIG_INTEL_IOMMU_SVM
+ if (pasid_enabled(iommu)) {
+ if (ecap_prs(iommu->ecap))
+ intel_svm_finish_prq(iommu);
+ intel_svm_free_pasid_tables(iommu);
+ }
+#endif
}
static struct dmar_domain *alloc_domain(int flags)
@@ -1934,8 +1988,10 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
}
info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
- translation = info ? CONTEXT_TT_DEV_IOTLB :
- CONTEXT_TT_MULTI_LEVEL;
+ if (info && info->ats_supported)
+ translation = CONTEXT_TT_DEV_IOTLB;
+ else
+ translation = CONTEXT_TT_MULTI_LEVEL;
context_set_address_root(context, virt_to_phys(pgd));
context_set_address_width(context, iommu->agaw);
@@ -2103,7 +2159,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
sg_res = aligned_nrpages(sg->offset, sg->length);
sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
sg->dma_length = sg->length;
- pteval = (sg_phys(sg) & PAGE_MASK) | prot;
+ pteval = page_to_phys(sg_page(sg)) | prot;
phys_pfn = pteval >> VTD_PAGE_SHIFT;
}
@@ -2273,12 +2329,34 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
info->bus = bus;
info->devfn = devfn;
- info->ats.enabled = 0;
- info->ats.qdep = 0;
+ info->ats_supported = info->pasid_supported = info->pri_supported = 0;
+ info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
+ info->ats_qdep = 0;
info->dev = dev;
info->domain = domain;
info->iommu = iommu;
+ if (dev && dev_is_pci(dev)) {
+ struct pci_dev *pdev = to_pci_dev(info->dev);
+
+ if (ecap_dev_iotlb_support(iommu->ecap) &&
+ pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
+ dmar_find_matched_atsr_unit(pdev))
+ info->ats_supported = 1;
+
+ if (ecs_enabled(iommu)) {
+ if (pasid_enabled(iommu)) {
+ int features = pci_pasid_features(pdev);
+ if (features >= 0)
+ info->pasid_supported = features | 1;
+ }
+
+ if (info->ats_supported && ecap_prs(iommu->ecap) &&
+ pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
+ info->pri_supported = 1;
+ }
+ }
+
spin_lock_irqsave(&device_domain_lock, flags);
if (dev)
found = find_domain(dev);
@@ -2404,11 +2482,6 @@ found_domain:
return domain;
}
-static int iommu_identity_mapping;
-#define IDENTMAP_ALL 1
-#define IDENTMAP_GFX 2
-#define IDENTMAP_AZALIA 4
-
static int iommu_domain_identity_map(struct dmar_domain *domain,
unsigned long long start,
unsigned long long end)
@@ -2434,17 +2507,11 @@ static int iommu_domain_identity_map(struct dmar_domain *domain,
DMA_PTE_READ|DMA_PTE_WRITE);
}
-static int iommu_prepare_identity_map(struct device *dev,
- unsigned long long start,
- unsigned long long end)
+static int domain_prepare_identity_map(struct device *dev,
+ struct dmar_domain *domain,
+ unsigned long long start,
+ unsigned long long end)
{
- struct dmar_domain *domain;
- int ret;
-
- domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
- if (!domain)
- return -ENOMEM;
-
/* For _hardware_ passthrough, don't bother. But for software
passthrough, we do it anyway -- it may indicate a memory
range which is reserved in E820, so which didn't get set
@@ -2464,8 +2531,7 @@ static int iommu_prepare_identity_map(struct device *dev,
dmi_get_system_info(DMI_BIOS_VENDOR),
dmi_get_system_info(DMI_BIOS_VERSION),
dmi_get_system_info(DMI_PRODUCT_VERSION));
- ret = -EIO;
- goto error;
+ return -EIO;
}
if (end >> agaw_to_width(domain->agaw)) {
@@ -2475,18 +2541,27 @@ static int iommu_prepare_identity_map(struct device *dev,
dmi_get_system_info(DMI_BIOS_VENDOR),
dmi_get_system_info(DMI_BIOS_VERSION),
dmi_get_system_info(DMI_PRODUCT_VERSION));
- ret = -EIO;
- goto error;
+ return -EIO;
}
- ret = iommu_domain_identity_map(domain, start, end);
- if (ret)
- goto error;
+ return iommu_domain_identity_map(domain, start, end);
+}
- return 0;
+static int iommu_prepare_identity_map(struct device *dev,
+ unsigned long long start,
+ unsigned long long end)
+{
+ struct dmar_domain *domain;
+ int ret;
+
+ domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
+ if (!domain)
+ return -ENOMEM;
+
+ ret = domain_prepare_identity_map(dev, domain, start, end);
+ if (ret)
+ domain_exit(domain);
- error:
- domain_exit(domain);
return ret;
}
@@ -2812,18 +2887,18 @@ static void intel_iommu_init_qi(struct intel_iommu *iommu)
}
static int copy_context_table(struct intel_iommu *iommu,
- struct root_entry __iomem *old_re,
+ struct root_entry *old_re,
struct context_entry **tbl,
int bus, bool ext)
{
int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
- struct context_entry __iomem *old_ce = NULL;
struct context_entry *new_ce = NULL, ce;
+ struct context_entry *old_ce = NULL;
struct root_entry re;
phys_addr_t old_ce_phys;
tbl_idx = ext ? bus * 2 : bus;
- memcpy_fromio(&re, old_re, sizeof(re));
+ memcpy(&re, old_re, sizeof(re));
for (devfn = 0; devfn < 256; devfn++) {
/* First calculate the correct index */
@@ -2858,7 +2933,8 @@ static int copy_context_table(struct intel_iommu *iommu,
}
ret = -ENOMEM;
- old_ce = ioremap_cache(old_ce_phys, PAGE_SIZE);
+ old_ce = memremap(old_ce_phys, PAGE_SIZE,
+ MEMREMAP_WB);
if (!old_ce)
goto out;
@@ -2870,7 +2946,7 @@ static int copy_context_table(struct intel_iommu *iommu,
}
/* Now copy the context entry */
- memcpy_fromio(&ce, old_ce + idx, sizeof(ce));
+ memcpy(&ce, old_ce + idx, sizeof(ce));
if (!__context_present(&ce))
continue;
@@ -2906,7 +2982,7 @@ static int copy_context_table(struct intel_iommu *iommu,
__iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
out_unmap:
- iounmap(old_ce);
+ memunmap(old_ce);
out:
return ret;
@@ -2914,8 +2990,8 @@ out:
static int copy_translation_tables(struct intel_iommu *iommu)
{
- struct root_entry __iomem *old_rt;
struct context_entry **ctxt_tbls;
+ struct root_entry *old_rt;
phys_addr_t old_rt_phys;
int ctxt_table_entries;
unsigned long flags;
@@ -2940,7 +3016,7 @@ static int copy_translation_tables(struct intel_iommu *iommu)
if (!old_rt_phys)
return -EINVAL;
- old_rt = ioremap_cache(old_rt_phys, PAGE_SIZE);
+ old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
if (!old_rt)
return -ENOMEM;
@@ -2989,7 +3065,7 @@ static int copy_translation_tables(struct intel_iommu *iommu)
ret = 0;
out_unmap:
- iounmap(old_rt);
+ memunmap(old_rt);
return ret;
}
@@ -3100,6 +3176,10 @@ static int __init init_dmars(void)
if (!ecap_pass_through(iommu->ecap))
hw_pass_through = 0;
+#ifdef CONFIG_INTEL_IOMMU_SVM
+ if (pasid_enabled(iommu))
+ intel_svm_alloc_pasid_tables(iommu);
+#endif
}
if (iommu_pass_through)
@@ -3187,6 +3267,13 @@ domains_done:
iommu_flush_write_buffer(iommu);
+#ifdef CONFIG_INTEL_IOMMU_SVM
+ if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
+ ret = intel_svm_enable_prq(iommu);
+ if (ret)
+ goto free_iommu;
+ }
+#endif
ret = dmar_set_interrupt(iommu);
if (ret)
goto free_iommu;
@@ -3246,7 +3333,10 @@ static struct iova *intel_alloc_iova(struct device *dev,
static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
{
+ struct dmar_rmrr_unit *rmrr;
struct dmar_domain *domain;
+ struct device *i_dev;
+ int i, ret;
domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
if (!domain) {
@@ -3255,6 +3345,23 @@ static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
return NULL;
}
+ /* We have a new domain - setup possible RMRRs for the device */
+ rcu_read_lock();
+ for_each_rmrr_units(rmrr) {
+ for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
+ i, i_dev) {
+ if (i_dev != dev)
+ continue;
+
+ ret = domain_prepare_identity_map(dev, domain,
+ rmrr->base_address,
+ rmrr->end_address);
+ if (ret)
+ dev_err(dev, "Mapping reserved region failed\n");
+ }
+ }
+ rcu_read_unlock();
+
return domain;
}
@@ -3540,7 +3647,7 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
flags |= GFP_DMA32;
}
- if (flags & __GFP_WAIT) {
+ if (gfpflags_allow_blocking(flags)) {
unsigned int count = size >> PAGE_SHIFT;
page = dma_alloc_from_contiguous(dev, count, order);
@@ -3597,7 +3704,7 @@ static int intel_nontranslate_map_sg(struct device *hddev,
for_each_sg(sglist, sg, nelems, i) {
BUG_ON(!sg_page(sg));
- sg->dma_address = sg_phys(sg);
+ sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
sg->dma_length = sg->length;
}
return nelems;
@@ -4115,6 +4222,11 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
if (ret)
goto out;
+#ifdef CONFIG_INTEL_IOMMU_SVM
+ if (pasid_enabled(iommu))
+ intel_svm_alloc_pasid_tables(iommu);
+#endif
+
if (dmaru->ignored) {
/*
* we always have to disable PMRs or DMA may fail on this device
@@ -4126,6 +4238,14 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
intel_iommu_init_qi(iommu);
iommu_flush_write_buffer(iommu);
+
+#ifdef CONFIG_INTEL_IOMMU_SVM
+ if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
+ ret = intel_svm_enable_prq(iommu);
+ if (ret)
+ goto disable_iommu;
+ }
+#endif
ret = dmar_set_interrupt(iommu);
if (ret)
goto disable_iommu;
@@ -4194,14 +4314,17 @@ int dmar_find_matched_atsr_unit(struct pci_dev *dev)
dev = pci_physfn(dev);
for (bus = dev->bus; bus; bus = bus->parent) {
bridge = bus->self;
- if (!bridge || !pci_is_pcie(bridge) ||
+ /* If it's an integrated device, allow ATS */
+ if (!bridge)
+ return 1;
+ /* Connected via non-PCIe: no ATS */
+ if (!pci_is_pcie(bridge) ||
pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
return 0;
+ /* If we found the root port, look it up in the ATSR */
if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
break;
}
- if (!bridge)
- return 0;
rcu_read_lock();
list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
@@ -4865,6 +4988,114 @@ static void intel_iommu_remove_device(struct device *dev)
iommu_device_unlink(iommu->iommu_dev, dev);
}
+#ifdef CONFIG_INTEL_IOMMU_SVM
+int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
+{
+ struct device_domain_info *info;
+ struct context_entry *context;
+ struct dmar_domain *domain;
+ unsigned long flags;
+ u64 ctx_lo;
+ int ret;
+
+ domain = get_valid_domain_for_dev(sdev->dev);
+ if (!domain)
+ return -EINVAL;
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ spin_lock(&iommu->lock);
+
+ ret = -EINVAL;
+ info = sdev->dev->archdata.iommu;
+ if (!info || !info->pasid_supported)
+ goto out;
+
+ context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
+ if (WARN_ON(!context))
+ goto out;
+
+ ctx_lo = context[0].lo;
+
+ sdev->did = domain->iommu_did[iommu->seq_id];
+ sdev->sid = PCI_DEVID(info->bus, info->devfn);
+
+ if (!(ctx_lo & CONTEXT_PASIDE)) {
+ context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
+ context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | ecap_pss(iommu->ecap);
+ wmb();
+ /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
+ * extended to permit requests-with-PASID if the PASIDE bit
+ * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
+ * however, the PASIDE bit is ignored and requests-with-PASID
+ * are unconditionally blocked. Which makes less sense.
+ * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
+ * "guest mode" translation types depending on whether ATS
+ * is available or not. Annoyingly, we can't use the new
+ * modes *unless* PASIDE is set. */
+ if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
+ ctx_lo &= ~CONTEXT_TT_MASK;
+ if (info->ats_supported)
+ ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
+ else
+ ctx_lo |= CONTEXT_TT_PT_PASID << 2;
+ }
+ ctx_lo |= CONTEXT_PASIDE;
+ if (iommu->pasid_state_table)
+ ctx_lo |= CONTEXT_DINVE;
+ if (info->pri_supported)
+ ctx_lo |= CONTEXT_PRS;
+ context[0].lo = ctx_lo;
+ wmb();
+ iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
+ DMA_CCMD_MASK_NOBIT,
+ DMA_CCMD_DEVICE_INVL);
+ }
+
+ /* Enable PASID support in the device, if it wasn't already */
+ if (!info->pasid_enabled)
+ iommu_enable_dev_iotlb(info);
+
+ if (info->ats_enabled) {
+ sdev->dev_iotlb = 1;
+ sdev->qdep = info->ats_qdep;
+ if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
+ sdev->qdep = 0;
+ }
+ ret = 0;
+
+ out:
+ spin_unlock(&iommu->lock);
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+
+ return ret;
+}
+
+struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
+{
+ struct intel_iommu *iommu;
+ u8 bus, devfn;
+
+ if (iommu_dummy(dev)) {
+ dev_warn(dev,
+ "No IOMMU translation for device; cannot enable SVM\n");
+ return NULL;
+ }
+
+ iommu = device_to_iommu(dev, &bus, &devfn);
+ if ((!iommu)) {
+ dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
+ return NULL;
+ }
+
+ if (!iommu->pasid_table) {
+ dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
+ return NULL;
+ }
+
+ return iommu;
+}
+#endif /* CONFIG_INTEL_IOMMU_SVM */
+
static const struct iommu_ops intel_iommu_ops = {
.capable = intel_iommu_capable,
.domain_alloc = intel_iommu_domain_alloc,
@@ -4877,6 +5108,7 @@ static const struct iommu_ops intel_iommu_ops = {
.iova_to_phys = intel_iommu_iova_to_phys,
.add_device = intel_iommu_add_device,
.remove_device = intel_iommu_remove_device,
+ .device_group = pci_device_group,
.pgsize_bitmap = INTEL_IOMMU_PGSIZES,
};
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
new file mode 100644
index 000000000..50464833d
--- /dev/null
+++ b/drivers/iommu/intel-svm.c
@@ -0,0 +1,622 @@
+/*
+ * Copyright © 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * Authors: David Woodhouse <dwmw2@infradead.org>
+ */
+
+#include <linux/intel-iommu.h>
+#include <linux/mmu_notifier.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/intel-svm.h>
+#include <linux/rculist.h>
+#include <linux/pci.h>
+#include <linux/pci-ats.h>
+#include <linux/dmar.h>
+#include <linux/interrupt.h>
+
+static irqreturn_t prq_event_thread(int irq, void *d);
+
+struct pasid_entry {
+ u64 val;
+};
+
+struct pasid_state_entry {
+ u64 val;
+};
+
+int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
+{
+ struct page *pages;
+ int order;
+
+ order = ecap_pss(iommu->ecap) + 7 - PAGE_SHIFT;
+ if (order < 0)
+ order = 0;
+
+ pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
+ if (!pages) {
+ pr_warn("IOMMU: %s: Failed to allocate PASID table\n",
+ iommu->name);
+ return -ENOMEM;
+ }
+ iommu->pasid_table = page_address(pages);
+ pr_info("%s: Allocated order %d PASID table.\n", iommu->name, order);
+
+ if (ecap_dis(iommu->ecap)) {
+ pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
+ if (pages)
+ iommu->pasid_state_table = page_address(pages);
+ else
+ pr_warn("IOMMU: %s: Failed to allocate PASID state table\n",
+ iommu->name);
+ }
+
+ idr_init(&iommu->pasid_idr);
+
+ return 0;
+}
+
+int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
+{
+ int order;
+
+ order = ecap_pss(iommu->ecap) + 7 - PAGE_SHIFT;
+ if (order < 0)
+ order = 0;
+
+ if (iommu->pasid_table) {
+ free_pages((unsigned long)iommu->pasid_table, order);
+ iommu->pasid_table = NULL;
+ }
+ if (iommu->pasid_state_table) {
+ free_pages((unsigned long)iommu->pasid_state_table, order);
+ iommu->pasid_state_table = NULL;
+ }
+ idr_destroy(&iommu->pasid_idr);
+ return 0;
+}
+
+#define PRQ_ORDER 0
+
+int intel_svm_enable_prq(struct intel_iommu *iommu)
+{
+ struct page *pages;
+ int irq, ret;
+
+ pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
+ if (!pages) {
+ pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
+ iommu->name);
+ return -ENOMEM;
+ }
+ iommu->prq = page_address(pages);
+
+ irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu);
+ if (irq <= 0) {
+ pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
+ iommu->name);
+ ret = -EINVAL;
+ err:
+ free_pages((unsigned long)iommu->prq, PRQ_ORDER);
+ iommu->prq = NULL;
+ return ret;
+ }
+ iommu->pr_irq = irq;
+
+ snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
+
+ ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
+ iommu->prq_name, iommu);
+ if (ret) {
+ pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
+ iommu->name);
+ dmar_free_hwirq(irq);
+ goto err;
+ }
+ dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
+ dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
+ dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
+
+ return 0;
+}
+
+int intel_svm_finish_prq(struct intel_iommu *iommu)
+{
+ dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
+ dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
+ dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
+
+ free_irq(iommu->pr_irq, iommu);
+ dmar_free_hwirq(iommu->pr_irq);
+ iommu->pr_irq = 0;
+
+ free_pages((unsigned long)iommu->prq, PRQ_ORDER);
+ iommu->prq = NULL;
+
+ return 0;
+}
+
+static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev,
+ unsigned long address, unsigned long pages, int ih, int gl)
+{
+ struct qi_desc desc;
+
+ if (pages == -1) {
+ /* For global kernel pages we have to flush them in *all* PASIDs
+ * because that's the only option the hardware gives us. Despite
+ * the fact that they are actually only accessible through one. */
+ if (gl)
+ desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
+ QI_EIOTLB_GRAN(QI_GRAN_ALL_ALL) | QI_EIOTLB_TYPE;
+ else
+ desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
+ QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE;
+ desc.high = 0;
+ } else {
+ int mask = ilog2(__roundup_pow_of_two(pages));
+
+ desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
+ QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE;
+ desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(gl) |
+ QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask);
+ }
+ qi_submit_sync(&desc, svm->iommu);
+
+ if (sdev->dev_iotlb) {
+ desc.low = QI_DEV_EIOTLB_PASID(svm->pasid) | QI_DEV_EIOTLB_SID(sdev->sid) |
+ QI_DEV_EIOTLB_QDEP(sdev->qdep) | QI_DEIOTLB_TYPE;
+ if (pages == -1) {
+ desc.high = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) | QI_DEV_EIOTLB_SIZE;
+ } else if (pages > 1) {
+ /* The least significant zero bit indicates the size. So,
+ * for example, an "address" value of 0x12345f000 will
+ * flush from 0x123440000 to 0x12347ffff (256KiB). */
+ unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT);
+ unsigned long mask = __rounddown_pow_of_two(address ^ last);;
+
+ desc.high = QI_DEV_EIOTLB_ADDR((address & ~mask) | (mask - 1)) | QI_DEV_EIOTLB_SIZE;
+ } else {
+ desc.high = QI_DEV_EIOTLB_ADDR(address);
+ }
+ qi_submit_sync(&desc, svm->iommu);
+ }
+}
+
+static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
+ unsigned long pages, int ih, int gl)
+{
+ struct intel_svm_dev *sdev;
+
+ /* Try deferred invalidate if available */
+ if (svm->iommu->pasid_state_table &&
+ !cmpxchg64(&svm->iommu->pasid_state_table[svm->pasid].val, 0, 1ULL << 63))
+ return;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(sdev, &svm->devs, list)
+ intel_flush_svm_range_dev(svm, sdev, address, pages, ih, gl);
+ rcu_read_unlock();
+}
+
+static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm,
+ unsigned long address, pte_t pte)
+{
+ struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
+
+ intel_flush_svm_range(svm, address, 1, 1, 0);
+}
+
+static void intel_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
+ unsigned long address)
+{
+ struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
+
+ intel_flush_svm_range(svm, address, 1, 1, 0);
+}
+
+/* Pages have been freed at this point */
+static void intel_invalidate_range(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
+
+ intel_flush_svm_range(svm, start,
+ (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0, 0);
+}
+
+
+static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev, int pasid)
+{
+ struct qi_desc desc;
+
+ desc.high = 0;
+ desc.low = QI_PC_TYPE | QI_PC_DID(sdev->did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid);
+
+ qi_submit_sync(&desc, svm->iommu);
+}
+
+static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+ struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
+
+ svm->iommu->pasid_table[svm->pasid].val = 0;
+
+ /* There's no need to do any flush because we can't get here if there
+ * are any devices left anyway. */
+ WARN_ON(!list_empty(&svm->devs));
+}
+
+static const struct mmu_notifier_ops intel_mmuops = {
+ .release = intel_mm_release,
+ .change_pte = intel_change_pte,
+ .invalidate_page = intel_invalidate_page,
+ .invalidate_range = intel_invalidate_range,
+};
+
+static DEFINE_MUTEX(pasid_mutex);
+
+int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
+{
+ struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+ struct intel_svm_dev *sdev;
+ struct intel_svm *svm = NULL;
+ struct mm_struct *mm = NULL;
+ int pasid_max;
+ int ret;
+
+ if (WARN_ON(!iommu))
+ return -EINVAL;
+
+ if (dev_is_pci(dev)) {
+ pasid_max = pci_max_pasids(to_pci_dev(dev));
+ if (pasid_max < 0)
+ return -EINVAL;
+ } else
+ pasid_max = 1 << 20;
+
+ if ((flags & SVM_FLAG_SUPERVISOR_MODE)) {
+ if (!ecap_srs(iommu->ecap))
+ return -EINVAL;
+ } else if (pasid) {
+ mm = get_task_mm(current);
+ BUG_ON(!mm);
+ }
+
+ mutex_lock(&pasid_mutex);
+ if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
+ int i;
+
+ idr_for_each_entry(&iommu->pasid_idr, svm, i) {
+ if (svm->mm != mm ||
+ (svm->flags & SVM_FLAG_PRIVATE_PASID))
+ continue;
+
+ if (svm->pasid >= pasid_max) {
+ dev_warn(dev,
+ "Limited PASID width. Cannot use existing PASID %d\n",
+ svm->pasid);
+ ret = -ENOSPC;
+ goto out;
+ }
+
+ list_for_each_entry(sdev, &svm->devs, list) {
+ if (dev == sdev->dev) {
+ if (sdev->ops != ops) {
+ ret = -EBUSY;
+ goto out;
+ }
+ sdev->users++;
+ goto success;
+ }
+ }
+
+ break;
+ }
+ }
+
+ sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
+ if (!sdev) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ sdev->dev = dev;
+
+ ret = intel_iommu_enable_pasid(iommu, sdev);
+ if (ret || !pasid) {
+ /* If they don't actually want to assign a PASID, this is
+ * just an enabling check/preparation. */
+ kfree(sdev);
+ goto out;
+ }
+ /* Finish the setup now we know we're keeping it */
+ sdev->users = 1;
+ sdev->ops = ops;
+ init_rcu_head(&sdev->rcu);
+
+ if (!svm) {
+ svm = kzalloc(sizeof(*svm), GFP_KERNEL);
+ if (!svm) {
+ ret = -ENOMEM;
+ kfree(sdev);
+ goto out;
+ }
+ svm->iommu = iommu;
+
+ if (pasid_max > 2 << ecap_pss(iommu->ecap))
+ pasid_max = 2 << ecap_pss(iommu->ecap);
+
+ /* Do not use PASID 0 in caching mode (virtualised IOMMU) */
+ ret = idr_alloc(&iommu->pasid_idr, svm,
+ !!cap_caching_mode(iommu->cap),
+ pasid_max - 1, GFP_KERNEL);
+ if (ret < 0) {
+ kfree(svm);
+ goto out;
+ }
+ svm->pasid = ret;
+ svm->notifier.ops = &intel_mmuops;
+ svm->mm = mm;
+ svm->flags = flags;
+ INIT_LIST_HEAD_RCU(&svm->devs);
+ ret = -ENOMEM;
+ if (mm) {
+ ret = mmu_notifier_register(&svm->notifier, mm);
+ if (ret) {
+ idr_remove(&svm->iommu->pasid_idr, svm->pasid);
+ kfree(svm);
+ kfree(sdev);
+ goto out;
+ }
+ iommu->pasid_table[svm->pasid].val = (u64)__pa(mm->pgd) | 1;
+ mm = NULL;
+ } else
+ iommu->pasid_table[svm->pasid].val = (u64)__pa(init_mm.pgd) | 1 | (1ULL << 11);
+ wmb();
+ /* In caching mode, we still have to flush with PASID 0 when
+ * a PASID table entry becomes present. Not entirely clear
+ * *why* that would be the case — surely we could just issue
+ * a flush with the PASID value that we've changed? The PASID
+ * is the index into the table, after all. It's not like domain
+ * IDs in the case of the equivalent context-entry change in
+ * caching mode. And for that matter it's not entirely clear why
+ * a VMM would be in the business of caching the PASID table
+ * anyway. Surely that can be left entirely to the guest? */
+ if (cap_caching_mode(iommu->cap))
+ intel_flush_pasid_dev(svm, sdev, 0);
+ }
+ list_add_rcu(&sdev->list, &svm->devs);
+
+ success:
+ *pasid = svm->pasid;
+ ret = 0;
+ out:
+ mutex_unlock(&pasid_mutex);
+ if (mm)
+ mmput(mm);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(intel_svm_bind_mm);
+
+int intel_svm_unbind_mm(struct device *dev, int pasid)
+{
+ struct intel_svm_dev *sdev;
+ struct intel_iommu *iommu;
+ struct intel_svm *svm;
+ int ret = -EINVAL;
+
+ mutex_lock(&pasid_mutex);
+ iommu = intel_svm_device_to_iommu(dev);
+ if (!iommu || !iommu->pasid_table)
+ goto out;
+
+ svm = idr_find(&iommu->pasid_idr, pasid);
+ if (!svm)
+ goto out;
+
+ list_for_each_entry(sdev, &svm->devs, list) {
+ if (dev == sdev->dev) {
+ ret = 0;
+ sdev->users--;
+ if (!sdev->users) {
+ list_del_rcu(&sdev->list);
+ /* Flush the PASID cache and IOTLB for this device.
+ * Note that we do depend on the hardware *not* using
+ * the PASID any more. Just as we depend on other
+ * devices never using PASIDs that they have no right
+ * to use. We have a *shared* PASID table, because it's
+ * large and has to be physically contiguous. So it's
+ * hard to be as defensive as we might like. */
+ intel_flush_pasid_dev(svm, sdev, svm->pasid);
+ intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
+ kfree_rcu(sdev, rcu);
+
+ if (list_empty(&svm->devs)) {
+ mmu_notifier_unregister(&svm->notifier, svm->mm);
+
+ idr_remove(&svm->iommu->pasid_idr, svm->pasid);
+ if (svm->mm)
+ mmput(svm->mm);
+ /* We mandate that no page faults may be outstanding
+ * for the PASID when intel_svm_unbind_mm() is called.
+ * If that is not obeyed, subtle errors will happen.
+ * Let's make them less subtle... */
+ memset(svm, 0x6b, sizeof(*svm));
+ kfree(svm);
+ }
+ }
+ break;
+ }
+ }
+ out:
+ mutex_unlock(&pasid_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);
+
+/* Page request queue descriptor */
+struct page_req_dsc {
+ u64 srr:1;
+ u64 bof:1;
+ u64 pasid_present:1;
+ u64 lpig:1;
+ u64 pasid:20;
+ u64 bus:8;
+ u64 private:23;
+ u64 prg_index:9;
+ u64 rd_req:1;
+ u64 wr_req:1;
+ u64 exe_req:1;
+ u64 priv_req:1;
+ u64 devfn:8;
+ u64 addr:52;
+};
+
+#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x10)
+
+static bool access_error(struct vm_area_struct *vma, struct page_req_dsc *req)
+{
+ unsigned long requested = 0;
+
+ if (req->exe_req)
+ requested |= VM_EXEC;
+
+ if (req->rd_req)
+ requested |= VM_READ;
+
+ if (req->wr_req)
+ requested |= VM_WRITE;
+
+ return (requested & ~vma->vm_flags) != 0;
+}
+
+static irqreturn_t prq_event_thread(int irq, void *d)
+{
+ struct intel_iommu *iommu = d;
+ struct intel_svm *svm = NULL;
+ int head, tail, handled = 0;
+
+ tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
+ head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
+ while (head != tail) {
+ struct intel_svm_dev *sdev;
+ struct vm_area_struct *vma;
+ struct page_req_dsc *req;
+ struct qi_desc resp;
+ int ret, result;
+ u64 address;
+
+ handled = 1;
+
+ req = &iommu->prq[head / sizeof(*req)];
+
+ result = QI_RESP_FAILURE;
+ address = (u64)req->addr << VTD_PAGE_SHIFT;
+ if (!req->pasid_present) {
+ pr_err("%s: Page request without PASID: %08llx %08llx\n",
+ iommu->name, ((unsigned long long *)req)[0],
+ ((unsigned long long *)req)[1]);
+ goto bad_req;
+ }
+
+ if (!svm || svm->pasid != req->pasid) {
+ rcu_read_lock();
+ svm = idr_find(&iommu->pasid_idr, req->pasid);
+ /* It *can't* go away, because the driver is not permitted
+ * to unbind the mm while any page faults are outstanding.
+ * So we only need RCU to protect the internal idr code. */
+ rcu_read_unlock();
+
+ if (!svm) {
+ pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n",
+ iommu->name, req->pasid, ((unsigned long long *)req)[0],
+ ((unsigned long long *)req)[1]);
+ goto no_pasid;
+ }
+ }
+
+ result = QI_RESP_INVALID;
+ /* Since we're using init_mm.pgd directly, we should never take
+ * any faults on kernel addresses. */
+ if (!svm->mm)
+ goto bad_req;
+ down_read(&svm->mm->mmap_sem);
+ vma = find_extend_vma(svm->mm, address);
+ if (!vma || address < vma->vm_start)
+ goto invalid;
+
+ if (access_error(vma, req))
+ goto invalid;
+
+ ret = handle_mm_fault(svm->mm, vma, address,
+ req->wr_req ? FAULT_FLAG_WRITE : 0);
+ if (ret & VM_FAULT_ERROR)
+ goto invalid;
+
+ result = QI_RESP_SUCCESS;
+ invalid:
+ up_read(&svm->mm->mmap_sem);
+ bad_req:
+ /* Accounting for major/minor faults? */
+ rcu_read_lock();
+ list_for_each_entry_rcu(sdev, &svm->devs, list) {
+ if (sdev->sid == PCI_DEVID(req->bus, req->devfn))
+ break;
+ }
+ /* Other devices can go away, but the drivers are not permitted
+ * to unbind while any page faults might be in flight. So it's
+ * OK to drop the 'lock' here now we have it. */
+ rcu_read_unlock();
+
+ if (WARN_ON(&sdev->list == &svm->devs))
+ sdev = NULL;
+
+ if (sdev && sdev->ops && sdev->ops->fault_cb) {
+ int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
+ (req->exe_req << 1) | (req->priv_req);
+ sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, req->private, rwxp, result);
+ }
+ /* We get here in the error case where the PASID lookup failed,
+ and these can be NULL. Do not use them below this point! */
+ sdev = NULL;
+ svm = NULL;
+ no_pasid:
+ if (req->lpig) {
+ /* Page Group Response */
+ resp.low = QI_PGRP_PASID(req->pasid) |
+ QI_PGRP_DID((req->bus << 8) | req->devfn) |
+ QI_PGRP_PASID_P(req->pasid_present) |
+ QI_PGRP_RESP_TYPE;
+ resp.high = QI_PGRP_IDX(req->prg_index) |
+ QI_PGRP_PRIV(req->private) | QI_PGRP_RESP_CODE(result);
+
+ qi_submit_sync(&resp, iommu);
+ } else if (req->srr) {
+ /* Page Stream Response */
+ resp.low = QI_PSTRM_IDX(req->prg_index) |
+ QI_PSTRM_PRIV(req->private) | QI_PSTRM_BUS(req->bus) |
+ QI_PSTRM_PASID(req->pasid) | QI_PSTRM_RESP_TYPE;
+ resp.high = QI_PSTRM_ADDR(address) | QI_PSTRM_DEVFN(req->devfn) |
+ QI_PSTRM_RESP_CODE(result);
+
+ qi_submit_sync(&resp, iommu);
+ }
+
+ head = (head + sizeof(*req)) & PRQ_RING_MASK;
+ }
+
+ dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
+
+ return IRQ_RETVAL(handled);
+}
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 9ec4e0d94..1fae18816 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -169,8 +169,26 @@ static int modify_irte(struct irq_2_iommu *irq_iommu,
index = irq_iommu->irte_index + irq_iommu->sub_handle;
irte = &iommu->ir_table->base[index];
- set_64bit(&irte->low, irte_modified->low);
- set_64bit(&irte->high, irte_modified->high);
+#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE)
+ if ((irte->pst == 1) || (irte_modified->pst == 1)) {
+ bool ret;
+
+ ret = cmpxchg_double(&irte->low, &irte->high,
+ irte->low, irte->high,
+ irte_modified->low, irte_modified->high);
+ /*
+ * We use cmpxchg16 to atomically update the 128-bit IRTE,
+ * and it cannot be updated by the hardware or other processors
+ * behind us, so the return value of cmpxchg16 should be the
+ * same as the old value.
+ */
+ WARN_ON(!ret);
+ } else
+#endif
+ {
+ set_64bit(&irte->low, irte_modified->low);
+ set_64bit(&irte->high, irte_modified->high);
+ }
__iommu_flush_cache(iommu, irte, sizeof(*irte));
rc = qi_flush_iec(iommu, index, 0);
@@ -384,7 +402,7 @@ static int set_msi_sid(struct irte *irte, struct pci_dev *dev)
static int iommu_load_old_irte(struct intel_iommu *iommu)
{
- struct irte __iomem *old_ir_table;
+ struct irte *old_ir_table;
phys_addr_t irt_phys;
unsigned int i;
size_t size;
@@ -408,12 +426,12 @@ static int iommu_load_old_irte(struct intel_iommu *iommu)
size = INTR_REMAP_TABLE_ENTRIES*sizeof(struct irte);
/* Map the old IR table */
- old_ir_table = ioremap_cache(irt_phys, size);
+ old_ir_table = memremap(irt_phys, size, MEMREMAP_WB);
if (!old_ir_table)
return -ENOMEM;
/* Copy data over */
- memcpy_fromio(iommu->ir_table->base, old_ir_table, size);
+ memcpy(iommu->ir_table->base, old_ir_table, size);
__iommu_flush_cache(iommu, iommu->ir_table->base, size);
@@ -426,7 +444,7 @@ static int iommu_load_old_irte(struct intel_iommu *iommu)
bitmap_set(iommu->ir_table->bitmap, i, 1);
}
- iounmap(old_ir_table);
+ memunmap(old_ir_table);
return 0;
}
@@ -672,7 +690,7 @@ static int __init intel_prepare_irq_remapping(void)
if (!dmar_ir_support())
return -ENODEV;
- if (parse_ioapics_under_ir() != 1) {
+ if (parse_ioapics_under_ir()) {
pr_info("Not enabling interrupt remapping\n");
goto error;
}
@@ -727,7 +745,16 @@ static inline void set_irq_posting_cap(void)
struct intel_iommu *iommu;
if (!disable_irq_post) {
- intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP;
+ /*
+ * If IRTE is in posted format, the 'pda' field goes across the
+ * 64-bit boundary, we need use cmpxchg16b to atomically update
+ * it. We only expose posted-interrupt when X86_FEATURE_CX16
+ * is supported. Actually, hardware platforms supporting PI
+ * should have X86_FEATURE_CX16 support, this has been confirmed
+ * with Intel hardware guys.
+ */
+ if ( cpu_has_cx16 )
+ intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP;
for_each_iommu(iommu, drhd)
if (!cap_pi_support(iommu->cap)) {
@@ -907,16 +934,21 @@ static int __init parse_ioapics_under_ir(void)
bool ir_supported = false;
int ioapic_idx;
- for_each_iommu(iommu, drhd)
- if (ecap_ir_support(iommu->ecap)) {
- if (ir_parse_ioapic_hpet_scope(drhd->hdr, iommu))
- return -1;
+ for_each_iommu(iommu, drhd) {
+ int ret;
- ir_supported = true;
- }
+ if (!ecap_ir_support(iommu->ecap))
+ continue;
+
+ ret = ir_parse_ioapic_hpet_scope(drhd->hdr, iommu);
+ if (ret)
+ return ret;
+
+ ir_supported = true;
+ }
if (!ir_supported)
- return 0;
+ return -ENODEV;
for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
int ioapic_id = mpc_ioapic_id(ioapic_idx);
@@ -928,7 +960,7 @@ static int __init parse_ioapics_under_ir(void)
}
}
- return 1;
+ return 0;
}
static int __init ir_dev_scope_init(void)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 049df495c..0e3b0092e 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -728,16 +728,35 @@ static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque)
}
/*
+ * Generic device_group call-back function. It just allocates one
+ * iommu-group per device.
+ */
+struct iommu_group *generic_device_group(struct device *dev)
+{
+ struct iommu_group *group;
+
+ group = iommu_group_alloc();
+ if (IS_ERR(group))
+ return NULL;
+
+ return group;
+}
+
+/*
* Use standard PCI bus topology, isolation features, and DMA alias quirks
* to find or create an IOMMU group for a device.
*/
-static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev)
+struct iommu_group *pci_device_group(struct device *dev)
{
+ struct pci_dev *pdev = to_pci_dev(dev);
struct group_for_pci_data data;
struct pci_bus *bus;
struct iommu_group *group = NULL;
u64 devfns[4] = { 0 };
+ if (WARN_ON(!dev_is_pci(dev)))
+ return ERR_PTR(-EINVAL);
+
/*
* Find the upstream DMA alias for the device. A device must not
* be aliased due to topology in order to have its own IOMMU group.
@@ -791,14 +810,6 @@ static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev)
if (IS_ERR(group))
return NULL;
- /*
- * Try to allocate a default domain - needs support from the
- * IOMMU driver.
- */
- group->default_domain = __iommu_domain_alloc(pdev->dev.bus,
- IOMMU_DOMAIN_DMA);
- group->domain = group->default_domain;
-
return group;
}
@@ -814,6 +825,7 @@ static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev)
*/
struct iommu_group *iommu_group_get_for_dev(struct device *dev)
{
+ const struct iommu_ops *ops = dev->bus->iommu_ops;
struct iommu_group *group;
int ret;
@@ -821,14 +833,24 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev)
if (group)
return group;
- if (!dev_is_pci(dev))
- return ERR_PTR(-EINVAL);
+ group = ERR_PTR(-EINVAL);
- group = iommu_group_get_for_pci_dev(to_pci_dev(dev));
+ if (ops && ops->device_group)
+ group = ops->device_group(dev);
if (IS_ERR(group))
return group;
+ /*
+ * Try to allocate a default domain - needs support from the
+ * IOMMU driver.
+ */
+ if (!group->default_domain) {
+ group->default_domain = __iommu_domain_alloc(dev->bus,
+ IOMMU_DOMAIN_DMA);
+ group->domain = group->default_domain;
+ }
+
ret = iommu_group_add_device(group, dev);
if (ret) {
iommu_group_put(group);
@@ -1408,7 +1430,7 @@ size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
for_each_sg(sg, s, nents, i) {
- phys_addr_t phys = sg_phys(s);
+ phys_addr_t phys = page_to_phys(sg_page(s)) + s->offset;
/*
* We are mapping on IOMMU page boundaries, so offset within
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 8cf605fa9..dfb868e2d 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -295,7 +295,7 @@ static struct iommu_gather_ops ipmmu_gather_ops = {
static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
{
- phys_addr_t ttbr;
+ u64 ttbr;
/*
* Allocate the page table operations.
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 913455a5f..8adaaeae3 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -22,7 +22,7 @@ int irq_remap_broken;
int disable_sourceid_checking;
int no_x2apic_optout;
-int disable_irq_post = 1;
+int disable_irq_post = 0;
static int disable_irq_remap;
static struct irq_remap_ops *remap_ops;
@@ -58,14 +58,18 @@ static __init int setup_irqremap(char *str)
return -EINVAL;
while (*str) {
- if (!strncmp(str, "on", 2))
+ if (!strncmp(str, "on", 2)) {
disable_irq_remap = 0;
- else if (!strncmp(str, "off", 3))
+ disable_irq_post = 0;
+ } else if (!strncmp(str, "off", 3)) {
disable_irq_remap = 1;
- else if (!strncmp(str, "nosid", 5))
+ disable_irq_post = 1;
+ } else if (!strncmp(str, "nosid", 5))
disable_sourceid_checking = 1;
else if (!strncmp(str, "no_x2apic_optout", 16))
no_x2apic_optout = 1;
+ else if (!strncmp(str, "nopost", 6))
+ disable_irq_post = 1;
str += strcspn(str, ",");
while (*str == ',')
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 36d0033c2..3dc5b65f3 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -26,6 +26,8 @@
#include <linux/of_iommu.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
+#include <linux/regmap.h>
+#include <linux/mfd/syscon.h>
#include <asm/cacheflush.h>
@@ -112,6 +114,18 @@ void omap_iommu_restore_ctx(struct device *dev)
}
EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx);
+static void dra7_cfg_dspsys_mmu(struct omap_iommu *obj, bool enable)
+{
+ u32 val, mask;
+
+ if (!obj->syscfg)
+ return;
+
+ mask = (1 << (obj->id * DSP_SYS_MMU_CONFIG_EN_SHIFT));
+ val = enable ? mask : 0;
+ regmap_update_bits(obj->syscfg, DSP_SYS_MMU_CONFIG, mask, val);
+}
+
static void __iommu_set_twl(struct omap_iommu *obj, bool on)
{
u32 l = iommu_read_reg(obj, MMU_CNTL);
@@ -147,6 +161,8 @@ static int omap2_iommu_enable(struct omap_iommu *obj)
iommu_write_reg(obj, pa, MMU_TTB);
+ dra7_cfg_dspsys_mmu(obj, true);
+
if (obj->has_bus_err_back)
iommu_write_reg(obj, MMU_GP_REG_BUS_ERR_BACK_EN, MMU_GP_REG);
@@ -161,6 +177,7 @@ static void omap2_iommu_disable(struct omap_iommu *obj)
l &= ~MMU_CNTL_MASK;
iommu_write_reg(obj, l, MMU_CNTL);
+ dra7_cfg_dspsys_mmu(obj, false);
dev_dbg(obj->dev, "%s is shutting down\n", obj->name);
}
@@ -864,6 +881,42 @@ static void omap_iommu_detach(struct omap_iommu *obj)
dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name);
}
+static int omap_iommu_dra7_get_dsp_system_cfg(struct platform_device *pdev,
+ struct omap_iommu *obj)
+{
+ struct device_node *np = pdev->dev.of_node;
+ int ret;
+
+ if (!of_device_is_compatible(np, "ti,dra7-dsp-iommu"))
+ return 0;
+
+ if (!of_property_read_bool(np, "ti,syscon-mmuconfig")) {
+ dev_err(&pdev->dev, "ti,syscon-mmuconfig property is missing\n");
+ return -EINVAL;
+ }
+
+ obj->syscfg =
+ syscon_regmap_lookup_by_phandle(np, "ti,syscon-mmuconfig");
+ if (IS_ERR(obj->syscfg)) {
+ /* can fail with -EPROBE_DEFER */
+ ret = PTR_ERR(obj->syscfg);
+ return ret;
+ }
+
+ if (of_property_read_u32_index(np, "ti,syscon-mmuconfig", 1,
+ &obj->id)) {
+ dev_err(&pdev->dev, "couldn't get the IOMMU instance id within subsystem\n");
+ return -EINVAL;
+ }
+
+ if (obj->id != 0 && obj->id != 1) {
+ dev_err(&pdev->dev, "invalid IOMMU instance id\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/*
* OMAP Device MMU(IOMMU) detection
*/
@@ -907,6 +960,10 @@ static int omap_iommu_probe(struct platform_device *pdev)
if (IS_ERR(obj->regbase))
return PTR_ERR(obj->regbase);
+ err = omap_iommu_dra7_get_dsp_system_cfg(pdev, obj);
+ if (err)
+ return err;
+
irq = platform_get_irq(pdev, 0);
if (irq < 0)
return -ENODEV;
@@ -943,6 +1000,7 @@ static const struct of_device_id omap_iommu_of_match[] = {
{ .compatible = "ti,omap2-iommu" },
{ .compatible = "ti,omap4-iommu" },
{ .compatible = "ti,dra7-iommu" },
+ { .compatible = "ti,dra7-dsp-iommu" },
{},
};
diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h
index a656df2f9..59628e501 100644
--- a/drivers/iommu/omap-iommu.h
+++ b/drivers/iommu/omap-iommu.h
@@ -30,6 +30,7 @@ struct iotlb_entry {
struct omap_iommu {
const char *name;
void __iomem *regbase;
+ struct regmap *syscfg;
struct device *dev;
struct iommu_domain *domain;
struct dentry *debug_dir;
@@ -48,6 +49,7 @@ struct omap_iommu {
void *ctx; /* iommu context: registres saved area */
int has_bus_err_back;
+ u32 id;
};
struct cr_regs {
@@ -159,6 +161,13 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev)
((pgsz) == MMU_CAM_PGSZ_4K) ? 0xfffff000 : 0)
/*
+ * DSP_SYSTEM registers and bit definitions (applicable only for DRA7xx DSP)
+ */
+#define DSP_SYS_REVISION 0x00
+#define DSP_SYS_MMU_CONFIG 0x18
+#define DSP_SYS_MMU_CONFIG_EN_SHIFT 4
+
+/*
* utilities for super page(16MB, 1MB, 64KB and 4KB)
*/
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
new file mode 100644
index 000000000..471ee36b9
--- /dev/null
+++ b/drivers/iommu/s390-iommu.c
@@ -0,0 +1,356 @@
+/*
+ * IOMMU API for s390 PCI devices
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#include <linux/pci.h>
+#include <linux/iommu.h>
+#include <linux/iommu-helper.h>
+#include <linux/pci.h>
+#include <linux/sizes.h>
+#include <asm/pci_dma.h>
+
+/*
+ * Physically contiguous memory regions can be mapped with 4 KiB alignment,
+ * we allow all page sizes that are an order of 4KiB (no special large page
+ * support so far).
+ */
+#define S390_IOMMU_PGSIZES (~0xFFFUL)
+
+struct s390_domain {
+ struct iommu_domain domain;
+ struct list_head devices;
+ unsigned long *dma_table;
+ spinlock_t dma_table_lock;
+ spinlock_t list_lock;
+};
+
+struct s390_domain_device {
+ struct list_head list;
+ struct zpci_dev *zdev;
+};
+
+static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
+{
+ return container_of(dom, struct s390_domain, domain);
+}
+
+static bool s390_iommu_capable(enum iommu_cap cap)
+{
+ switch (cap) {
+ case IOMMU_CAP_CACHE_COHERENCY:
+ return true;
+ case IOMMU_CAP_INTR_REMAP:
+ return true;
+ default:
+ return false;
+ }
+}
+
+struct iommu_domain *s390_domain_alloc(unsigned domain_type)
+{
+ struct s390_domain *s390_domain;
+
+ if (domain_type != IOMMU_DOMAIN_UNMANAGED)
+ return NULL;
+
+ s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL);
+ if (!s390_domain)
+ return NULL;
+
+ s390_domain->dma_table = dma_alloc_cpu_table();
+ if (!s390_domain->dma_table) {
+ kfree(s390_domain);
+ return NULL;
+ }
+
+ spin_lock_init(&s390_domain->dma_table_lock);
+ spin_lock_init(&s390_domain->list_lock);
+ INIT_LIST_HEAD(&s390_domain->devices);
+
+ return &s390_domain->domain;
+}
+
+void s390_domain_free(struct iommu_domain *domain)
+{
+ struct s390_domain *s390_domain = to_s390_domain(domain);
+
+ dma_cleanup_tables(s390_domain->dma_table);
+ kfree(s390_domain);
+}
+
+static int s390_iommu_attach_device(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct s390_domain *s390_domain = to_s390_domain(domain);
+ struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
+ struct s390_domain_device *domain_device;
+ unsigned long flags;
+ int rc;
+
+ if (!zdev)
+ return -ENODEV;
+
+ domain_device = kzalloc(sizeof(*domain_device), GFP_KERNEL);
+ if (!domain_device)
+ return -ENOMEM;
+
+ if (zdev->dma_table)
+ zpci_dma_exit_device(zdev);
+
+ zdev->dma_table = s390_domain->dma_table;
+ rc = zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET,
+ zdev->start_dma + zdev->iommu_size - 1,
+ (u64) zdev->dma_table);
+ if (rc)
+ goto out_restore;
+
+ spin_lock_irqsave(&s390_domain->list_lock, flags);
+ /* First device defines the DMA range limits */
+ if (list_empty(&s390_domain->devices)) {
+ domain->geometry.aperture_start = zdev->start_dma;
+ domain->geometry.aperture_end = zdev->end_dma;
+ domain->geometry.force_aperture = true;
+ /* Allow only devices with identical DMA range limits */
+ } else if (domain->geometry.aperture_start != zdev->start_dma ||
+ domain->geometry.aperture_end != zdev->end_dma) {
+ rc = -EINVAL;
+ spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+ goto out_restore;
+ }
+ domain_device->zdev = zdev;
+ zdev->s390_domain = s390_domain;
+ list_add(&domain_device->list, &s390_domain->devices);
+ spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+
+ return 0;
+
+out_restore:
+ zpci_dma_init_device(zdev);
+ kfree(domain_device);
+
+ return rc;
+}
+
+static void s390_iommu_detach_device(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct s390_domain *s390_domain = to_s390_domain(domain);
+ struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
+ struct s390_domain_device *domain_device, *tmp;
+ unsigned long flags;
+ int found = 0;
+
+ if (!zdev)
+ return;
+
+ spin_lock_irqsave(&s390_domain->list_lock, flags);
+ list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices,
+ list) {
+ if (domain_device->zdev == zdev) {
+ list_del(&domain_device->list);
+ kfree(domain_device);
+ found = 1;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+
+ if (found) {
+ zdev->s390_domain = NULL;
+ zpci_unregister_ioat(zdev, 0);
+ zpci_dma_init_device(zdev);
+ }
+}
+
+static int s390_iommu_add_device(struct device *dev)
+{
+ struct iommu_group *group;
+ int rc;
+
+ group = iommu_group_get(dev);
+ if (!group) {
+ group = iommu_group_alloc();
+ if (IS_ERR(group))
+ return PTR_ERR(group);
+ }
+
+ rc = iommu_group_add_device(group, dev);
+ iommu_group_put(group);
+
+ return rc;
+}
+
+static void s390_iommu_remove_device(struct device *dev)
+{
+ struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
+ struct iommu_domain *domain;
+
+ /*
+ * This is a workaround for a scenario where the IOMMU API common code
+ * "forgets" to call the detach_dev callback: After binding a device
+ * to vfio-pci and completing the VFIO_SET_IOMMU ioctl (which triggers
+ * the attach_dev), removing the device via
+ * "echo 1 > /sys/bus/pci/devices/.../remove" won't trigger detach_dev,
+ * only remove_device will be called via the BUS_NOTIFY_REMOVED_DEVICE
+ * notifier.
+ *
+ * So let's call detach_dev from here if it hasn't been called before.
+ */
+ if (zdev && zdev->s390_domain) {
+ domain = iommu_get_domain_for_dev(dev);
+ if (domain)
+ s390_iommu_detach_device(domain, dev);
+ }
+
+ iommu_group_remove_device(dev);
+}
+
+static int s390_iommu_update_trans(struct s390_domain *s390_domain,
+ unsigned long pa, dma_addr_t dma_addr,
+ size_t size, int flags)
+{
+ struct s390_domain_device *domain_device;
+ u8 *page_addr = (u8 *) (pa & PAGE_MASK);
+ dma_addr_t start_dma_addr = dma_addr;
+ unsigned long irq_flags, nr_pages, i;
+ unsigned long *entry;
+ int rc = 0;
+
+ if (dma_addr < s390_domain->domain.geometry.aperture_start ||
+ dma_addr + size > s390_domain->domain.geometry.aperture_end)
+ return -EINVAL;
+
+ nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ if (!nr_pages)
+ return 0;
+
+ spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags);
+ for (i = 0; i < nr_pages; i++) {
+ entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
+ if (!entry) {
+ rc = -ENOMEM;
+ goto undo_cpu_trans;
+ }
+ dma_update_cpu_trans(entry, page_addr, flags);
+ page_addr += PAGE_SIZE;
+ dma_addr += PAGE_SIZE;
+ }
+
+ spin_lock(&s390_domain->list_lock);
+ list_for_each_entry(domain_device, &s390_domain->devices, list) {
+ rc = zpci_refresh_trans((u64) domain_device->zdev->fh << 32,
+ start_dma_addr, nr_pages * PAGE_SIZE);
+ if (rc)
+ break;
+ }
+ spin_unlock(&s390_domain->list_lock);
+
+undo_cpu_trans:
+ if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
+ flags = ZPCI_PTE_INVALID;
+ while (i-- > 0) {
+ page_addr -= PAGE_SIZE;
+ dma_addr -= PAGE_SIZE;
+ entry = dma_walk_cpu_trans(s390_domain->dma_table,
+ dma_addr);
+ if (!entry)
+ break;
+ dma_update_cpu_trans(entry, page_addr, flags);
+ }
+ }
+ spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags);
+
+ return rc;
+}
+
+static int s390_iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot)
+{
+ struct s390_domain *s390_domain = to_s390_domain(domain);
+ int flags = ZPCI_PTE_VALID, rc = 0;
+
+ if (!(prot & IOMMU_READ))
+ return -EINVAL;
+
+ if (!(prot & IOMMU_WRITE))
+ flags |= ZPCI_TABLE_PROTECTED;
+
+ rc = s390_iommu_update_trans(s390_domain, (unsigned long) paddr, iova,
+ size, flags);
+
+ return rc;
+}
+
+static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
+ dma_addr_t iova)
+{
+ struct s390_domain *s390_domain = to_s390_domain(domain);
+ unsigned long *sto, *pto, *rto, flags;
+ unsigned int rtx, sx, px;
+ phys_addr_t phys = 0;
+
+ if (iova < domain->geometry.aperture_start ||
+ iova > domain->geometry.aperture_end)
+ return 0;
+
+ rtx = calc_rtx(iova);
+ sx = calc_sx(iova);
+ px = calc_px(iova);
+ rto = s390_domain->dma_table;
+
+ spin_lock_irqsave(&s390_domain->dma_table_lock, flags);
+ if (rto && reg_entry_isvalid(rto[rtx])) {
+ sto = get_rt_sto(rto[rtx]);
+ if (sto && reg_entry_isvalid(sto[sx])) {
+ pto = get_st_pto(sto[sx]);
+ if (pto && pt_entry_isvalid(pto[px]))
+ phys = pto[px] & ZPCI_PTE_ADDR_MASK;
+ }
+ }
+ spin_unlock_irqrestore(&s390_domain->dma_table_lock, flags);
+
+ return phys;
+}
+
+static size_t s390_iommu_unmap(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
+{
+ struct s390_domain *s390_domain = to_s390_domain(domain);
+ int flags = ZPCI_PTE_INVALID;
+ phys_addr_t paddr;
+ int rc;
+
+ paddr = s390_iommu_iova_to_phys(domain, iova);
+ if (!paddr)
+ return 0;
+
+ rc = s390_iommu_update_trans(s390_domain, (unsigned long) paddr, iova,
+ size, flags);
+ if (rc)
+ return 0;
+
+ return size;
+}
+
+static struct iommu_ops s390_iommu_ops = {
+ .capable = s390_iommu_capable,
+ .domain_alloc = s390_domain_alloc,
+ .domain_free = s390_domain_free,
+ .attach_dev = s390_iommu_attach_device,
+ .detach_dev = s390_iommu_detach_device,
+ .map = s390_iommu_map,
+ .unmap = s390_iommu_unmap,
+ .iova_to_phys = s390_iommu_iova_to_phys,
+ .add_device = s390_iommu_add_device,
+ .remove_device = s390_iommu_remove_device,
+ .pgsize_bitmap = S390_IOMMU_PGSIZES,
+};
+
+static int __init s390_iommu_init(void)
+{
+ return bus_set_iommu(&pci_bus_type, &s390_iommu_ops);
+}
+subsys_initcall(s390_iommu_init);